Merge pull request #141 from KeygraphHQ/refactor/architecture

refactor: decompose activities into services layer with structured error handling
2026-02-17 12:22:23 -08:00
parent 7fb0c30769 a960ad1182
commit afa0e9b701
56 changed files with 2899 additions and 2913 deletions
@@ -8,13 +8,14 @@ You are debugging an issue. Follow this structured approach to avoid spinning in
 - Read the full error message and stack trace
 - Identify the layer where the error originated:
  - **CLI/Args** - Input validation, path resolution
-  - **Config Parsing** - YAML parsing, JSON Schema validation
+  - **Config Parsing** - YAML parsing, JSON Schema validation (`src/config-parser.ts`)
-  - **Session Management** - Mutex, session.json, lock files
+  - **Session Management** - Agent definitions (`src/session-manager.ts`), mutex (`src/utils/concurrency.ts`)
-  - **Audit System** - Logging, metrics tracking, atomic writes
+  - **DI Container** - Container initialization/lookup (`src/services/container.ts`)
-  - **Claude SDK** - Agent execution, MCP servers, turn handling
+  - **Services** - AgentExecutionService, ConfigLoaderService, ExploitationCheckerService, error-handling (`src/services/`)
-  - **Git Operations** - Checkpoints, rollback, commit
+  - **Audit System** - Logging, metrics tracking, atomic writes (`src/audit/`)
-  - **Tool Execution** - nmap, subfinder, whatweb
+  - **Claude SDK** - Agent execution, MCP servers, turn handling (`src/ai/claude-executor.ts`)
-  - **Validation** - Deliverable checks, queue validation
+  - **Git Operations** - Checkpoints, rollback, commit (`src/services/git-manager.ts`)
  - **Validation** - Deliverable checks, queue validation (`src/services/queue-validation.ts`)
 ## Step 2: Check Relevant Logs
@@ -37,12 +38,14 @@ For Shannon, trace through these layers:
 1. **Temporal Client** → `src/temporal/client.ts` - Workflow initiation
 2. **Workflow** → `src/temporal/workflows.ts` - Pipeline orchestration
-3. **Activities** → `src/temporal/activities.ts` - Agent execution with heartbeats
+3. **Activities** → `src/temporal/activities.ts` - Thin wrappers: heartbeat, error classification
-4. **Config** → `src/config-parser.ts` - YAML loading, schema validation
+4. **Container** → `src/services/container.ts` - Per-workflow DI
-5. **Session** → `src/session-manager.ts` - Agent definitions, execution order
+5. **Services** → `src/services/agent-execution.ts` - Agent lifecycle
-6. **Audit** → `src/audit/audit-session.ts` - Logging facade, metrics tracking
+6. **Config** → `src/config-parser.ts` via `src/services/config-loader.ts`
-7. **Executor** → `src/ai/claude-executor.ts` - SDK calls, MCP setup, retry logic
+7. **Prompts** → `src/services/prompt-manager.ts`
-8. **Validation** → `src/queue-validation.ts` - Deliverable checks
+8. **Audit** → `src/audit/audit-session.ts` - Logging facade, metrics tracking
 9. **Executor** → `src/ai/claude-executor.ts` - SDK calls, MCP setup, retry logic
 10. **Validation** → `src/services/queue-validation.ts` - Deliverable checks
 ## Step 4: Identify Root Cause
@@ -58,7 +61,10 @@ For Shannon, trace through these layers:
 | Cost/timing not tracked | Metrics not reloaded before update | Add `metricsTracker.reload()` before updates |
 | session.json corrupted | Partial write during crash | Delete and restart, or restore from backup |
 | YAML config rejected | Invalid schema or unsafe content | Run through AJV validator manually |
-| Prompt variable not replaced | Missing `{{VARIABLE}}` in context | Check `prompt-manager.ts` interpolation |
+| Prompt variable not replaced | Missing `{{VARIABLE}}` in context | Check `src/services/prompt-manager.ts` interpolation |
 | Service returns Err result | Check `ErrorCode` in Result | Trace through `classifyErrorForTemporal()` in `src/services/error-handling.ts` |
 | Container not found | `getOrCreateContainer()` not called | Check activity setup code in `src/temporal/activities.ts` |
 | ActivityLogger undefined | `createActivityLogger()` not called | Must be called at top of each activity function |
 **MCP Server Issues:**
 ```bash
@@ -123,6 +129,8 @@ shannon <URL> <REPO> --pipeline-testing
 ## Quick Reference: Error Types
 `ErrorCode` enum in `src/types/errors.ts` provides finer-grained classification used by `classifyErrorForTemporal()` in `src/services/error-handling.ts`.
 | PentestError Type | Meaning | Retryable? |
 |-------------------|---------|------------|
 | `config` | Configuration file issues | No |
@@ -19,6 +19,8 @@ git diff HEAD
 - [ ] **Retryable flag matches behavior** - If error will be retried, set `retryable: true`
 - [ ] **Context includes debugging info** - Add relevant paths, tool names, error codes to context object
 - [ ] **Never swallow errors silently** - Always log or propagate errors
 - [ ] **Use ErrorCode enum** - Prefer `ErrorCode.CONFIG_INVALID` over string matching for classification
 - [ ] **Result<T,E> for service returns** - Services return `Result`, not throw
 ### Audit System & Concurrency (CRITICAL)
 - [ ] **Mutex protection for parallel operations** - Use `sessionMutex.lock()` when updating `session.json` during parallel agent execution
@@ -41,6 +43,13 @@ git diff HEAD
 - [ ] **Duplicate rule detection** - Same `type:url_path` cannot appear twice
 - [ ] **JSON Schema validation before use** - Config must pass AJV validation
 ### Services Layer & DI Container (CRITICAL)
 - [ ] **Business logic in services, not activities** — Activities: heartbeat loop, error classification, container calls only. Domain logic → `src/services/`
 - [ ] **Services accept ActivityLogger** — Never import `@temporalio/*` in services. Use `ActivityLogger` interface from `src/types/`
 - [ ] **Result type for fallible operations** — Service methods return `Result<T, PentestError>`, unwrap with `isOk()`/`isErr()`. Activities call `executeOrThrow()` at the boundary
 - [ ] **Container lifecycle** — `getOrCreateContainer()` at activity start, `removeContainer()` only in workflow cleanup
 - [ ] **AuditSession not in container** — Must be passed per-agent call (parallel safety)
 ### Session & Agent Management (CRITICAL)
 - [ ] **Deliverable dependencies respected** - Exploitation agents only run if vulnerability queue exists AND has items
 - [ ] **Queue validation before exploitation** - Use `safeValidateQueueAndDeliverable()` to check eligibility
@@ -91,6 +100,8 @@ git diff HEAD
 - [ ] **Duplicate retry logic** - Don't implement retry at both caller and callee level
 - [ ] **Hardcoded error message matching** - Prefer error codes over regex on error.message
 - [ ] **Missing timeout on long operations** - Git operations and API calls should have timeouts
 - [ ] **Console.log in services** — Use `ActivityLogger`. Only CLI display code (`client.ts`, `worker.ts`, `output-formatters.ts`) uses console.log
 - [ ] **Temporal imports in services** — Services must stay Temporal-agnostic. If you need Temporal APIs, it belongs in activities
 ### Code Quality
 - [ ] **No dead code added** - Remove unused imports, functions, variables
@@ -41,18 +41,20 @@ npm run build
 ## Architecture
 ### Core Modules
- `src/session-manager.ts` — Agent definitions, execution order, parallel groups
+- `src/session-manager.ts` — Agent definitions (`AGENTS` record). Agent types in `src/types/agents.ts`
 - `src/ai/claude-executor.ts` — Claude Agent SDK integration with retry logic and git checkpoints
 - `src/config-parser.ts` — YAML config parsing with JSON Schema validation
- `src/error-handling.ts` — Categorized error types (PentestError, ConfigError, NetworkError) with retry logic
+- `src/ai/claude-executor.ts` — Claude Agent SDK integration with retry logic
- `src/tool-checker.ts` — Validates external security tool availability before execution
+- `src/services/` — Business logic layer (Temporal-agnostic). Activities delegate here. Key: `agent-execution.ts`, `error-handling.ts`, `container.ts`
- `src/queue-validation.ts` — Deliverable validation and agent prerequisites
+- `src/types/` — Consolidated types: `Result<T,E>`, `ErrorCode`, `AgentName`, `ActivityLogger`, etc.
 - `src/utils/` — Shared utilities (file I/O, formatting, concurrency)
 ### Temporal Orchestration
 Durable workflow orchestration with crash recovery, queryable progress, intelligent retry, and parallel execution (5 concurrent agents in vuln/exploit phases).
 - `src/temporal/workflows.ts` — Main workflow (`pentestPipelineWorkflow`)
- `src/temporal/activities.ts` — Activity implementations with heartbeats
+- `src/temporal/activities.ts` — Thin wrappers — heartbeat loop, error classification, container lifecycle. Business logic delegated to `src/services/`
 - `src/temporal/activity-logger.ts` — `TemporalActivityLogger` implementation of `ActivityLogger` interface
 - `src/temporal/summary-mapper.ts` — Maps `PipelineSummary` to `WorkflowSummary`
 - `src/temporal/worker.ts` — Worker entry point
 - `src/temporal/client.ts` — CLI client for starting workflows
 - `src/temporal/shared.ts` — Types, interfaces, query definitions
@@ -66,30 +68,32 @@ Durable workflow orchestration with crash recovery, queryable progress, intellig
 ### Supporting Systems
 - **Configuration** — YAML configs in `configs/` with JSON Schema validation (`config-schema.json`). Supports auth settings, MFA/TOTP, and per-app testing parameters
- **Prompts** — Per-phase templates in `prompts/` with variable substitution (`{{TARGET_URL}}`, `{{CONFIG_CONTEXT}}`). Shared partials in `prompts/shared/` via `prompt-manager.ts`
+- **Prompts** — Per-phase templates in `prompts/` with variable substitution (`{{TARGET_URL}}`, `{{CONFIG_CONTEXT}}`). Shared partials in `prompts/shared/` via `src/services/prompt-manager.ts`
 - **SDK Integration** — Uses `@anthropic-ai/claude-agent-sdk` with `maxTurns: 10_000` and `bypassPermissions` mode. Playwright MCP for browser automation, TOTP generation via MCP tool. Login flow template at `prompts/shared/login-instructions.txt` supports form, SSO, API, and basic auth
- **Audit System** — Crash-safe append-only logging in `audit-logs/{hostname}_{sessionId}/`. Tracks session metrics, per-agent logs, prompts, and deliverables
+- **Audit System** — Crash-safe append-only logging in `audit-logs/{hostname}_{sessionId}/`. Tracks session metrics, per-agent logs, prompts, and deliverables. WorkflowLogger (`audit/workflow-logger.ts`) provides unified human-readable per-workflow logs, backed by LogStream (`audit/log-stream.ts`) shared stream primitive
 - **Deliverables** — Saved to `deliverables/` in the target repo via the `save_deliverable` MCP tool
 - **Workspaces & Resume** — Named workspaces via `WORKSPACE=<name>` or auto-named from URL+timestamp. Resume passes `--workspace` to the Temporal client (`src/temporal/client.ts`), which loads `session.json` to detect completed agents. `loadResumeState()` in `src/temporal/activities.ts` validates deliverable existence, restores git checkpoints, and cleans up incomplete deliverables. Workspace listing via `src/temporal/workspaces.ts`
 ## Development Notes
 ### Adding a New Agent
-1. Define agent in `src/session-manager.ts` (add to `AGENT_QUEUE` and parallel group)
+1. Define agent in `src/session-manager.ts` (add to `AGENTS` record). `ALL_AGENTS`/`AgentName` types live in `src/types/agents.ts`
 2. Create prompt template in `prompts/` (e.g., `vuln-newtype.txt`)
-3. Add activity function in `src/temporal/activities.ts`
+3. Two-layer pattern: add a thin activity wrapper in `src/temporal/activities.ts` (heartbeat + error classification). `AgentExecutionService` in `src/services/agent-execution.ts` handles the agent lifecycle automatically via the `AGENTS` registry
 4. Register activity in `src/temporal/workflows.ts` within the appropriate phase
 ### Modifying Prompts
 - Variable substitution: `{{TARGET_URL}}`, `{{CONFIG_CONTEXT}}`, `{{LOGIN_INSTRUCTIONS}}`
- Shared partials in `prompts/shared/` included via `prompt-manager.ts`
+- Shared partials in `prompts/shared/` included via `src/services/prompt-manager.ts`
 - Test with `PIPELINE_TESTING=true` for fast iteration
 ### Key Design Patterns
 - **Configuration-Driven** — YAML configs with JSON Schema validation
 - **Progressive Analysis** — Each phase builds on previous results
 - **SDK-First** — Claude Agent SDK handles autonomous analysis
- **Modular Error Handling** — Categorized errors with automatic retry (3 attempts per agent)
+- **Modular Error Handling** — `ErrorCode` enum, `Result<T,E>` for explicit error propagation, automatic retry (3 attempts per agent)
 - **Services Boundary** — Activities are thin Temporal wrappers; `src/services/` owns business logic, accepts `ActivityLogger`, returns `Result<T,E>`. No Temporal imports in services
 - **DI Container** — Per-workflow in `src/services/container.ts`. `AuditSession` excluded (parallel safety)
 ### Security
 Defensive security tool only. Use only on systems you own or have explicit permission to test.
@@ -111,18 +115,36 @@ Defensive security tool only. Use only on systems you own or have explicit permi
 - Use `function` keyword for top-level functions (not arrow functions)
 - Explicit return type annotations on exported/top-level functions
 - Prefer `readonly` for data that shouldn't be mutated
 - `exactOptionalPropertyTypes` is enabled — use spread for optional props, not direct `undefined` assignment
 ### Avoid
 - Combining multiple concerns into a single function to "save lines"
 - Dense callback chains when sequential logic is clearer
 - Sacrificing readability for DRY — some repetition is fine if clearer
 - Abstractions for one-time operations
 - Backwards-compatibility shims, deprecated wrappers, or re-exports for removed code — delete the old code, don't preserve it
 ### Comments
 Comments must be **timeless** — no references to this conversation, refactoring history, or the AI.
 **Patterns used in this codebase:**
 - `/** JSDoc */` — file headers (after license) and exported functions/interfaces
 - `// N. Description` — numbered sequential steps inside function bodies. Use when a
  function has 3+ distinct phases where at least one isn't immediately obvious from the
  code. Each step marks the start of a logical phase. Reference: `AgentExecutionService.execute`
  (steps 1-9) and `injectModelIntoReport` (steps 1-5)
 - `// === Section ===` — high-level dividers between groups of functions in long files,
  or to label major branching/classification blocks (e.g., `// === SPENDING CAP SAFEGUARD ===`).
  Not for sequential steps inside function bodies — use numbered steps for that
 - `// NOTE:` / `// WARNING:` / `// IMPORTANT:` — gotchas and constraints
 **Never:** obvious comments, conversation references ("as discussed"), history ("moved from X")
 ## Key Files
 **Entry Points:** `src/temporal/workflows.ts`, `src/temporal/activities.ts`, `src/temporal/worker.ts`, `src/temporal/client.ts`
-**Core Logic:** `src/session-manager.ts`, `src/ai/claude-executor.ts`, `src/config-parser.ts`, `src/audit/`
+**Core Logic:** `src/session-manager.ts`, `src/ai/claude-executor.ts`, `src/config-parser.ts`, `src/services/`, `src/audit/`
 **Config:** `shannon` (CLI), `docker-compose.yml`, `configs/`, `prompts/`
@@ -21,7 +21,6 @@
        "figlet": "^1.9.3",
        "gradient-string": "^3.0.0",
        "js-yaml": "^4.1.0",
        "zod": "^4.3.6",
        "zx": "^8.0.0"
      },
      "devDependencies": {
@@ -23,7 +23,6 @@
    "figlet": "^1.9.3",
    "gradient-string": "^3.0.0",
    "js-yaml": "^4.1.0",
    "zod": "^4.3.6",
    "zx": "^8.0.0"
  },
  "devDependencies": {
@@ -7,18 +7,16 @@
 // Production Claude agent execution with retry, git checkpoints, and audit logging
 import { fs, path } from 'zx';
 import chalk, { type ChalkInstance } from 'chalk';
 import { query } from '@anthropic-ai/claude-agent-sdk';
-import { isRetryableError, getRetryDelay, PentestError } from '../error-handling.js';
+import { isRetryableError, PentestError } from '../services/error-handling.js';
-import { timingResults, Timer } from '../utils/metrics.js';
+import { isSpendingCapBehavior } from '../utils/billing-detection.js';
 import { Timer } from '../utils/metrics.js';
 import { formatTimestamp } from '../utils/formatting.js';
-import { createGitCheckpoint, commitGitSuccess, rollbackGitWorkspace, getGitCommitHash } from '../utils/git-manager.js';
+import { AGENT_VALIDATORS, MCP_AGENT_MAPPING } from '../session-manager.js';
 import { AGENT_VALIDATORS, MCP_AGENT_MAPPING } from '../constants.js';
 import { AuditSession } from '../audit/index.js';
 import { createShannonHelperServer } from '../../mcp-server/dist/index.js';
-import type { SessionMetadata } from '../audit/utils.js';
+import { AGENTS } from '../session-manager.js';
 import { getPromptNameForAgent } from '../types/agents.js';
 import type { AgentName } from '../types/index.js';
 import { dispatchMessage } from './message-handlers.js';
@@ -26,6 +24,7 @@ import { detectExecutionContext, formatErrorOutput, formatCompletionMessage } fr
 import { createProgressManager } from './progress-manager.js';
 import { createAuditLogger } from './audit-logger.js';
 import { getActualModelName } from './router-utils.js';
 import type { ActivityLogger } from '../types/activity-logger.js';
 declare global {
  var SHANNON_DISABLE_LOADER: boolean | undefined;
@@ -58,24 +57,27 @@ type McpServer = ReturnType<typeof createShannonHelperServer> | StdioMcpServer;
 // Configures MCP servers for agent execution, with Docker-specific Chromium handling
 function buildMcpServers(
  sourceDir: string,
-  agentName: string | null
+  agentName: string | null,
  logger: ActivityLogger
 ): Record<string, McpServer> {
  // 1. Create the shannon-helper server (always present)
  const shannonHelperServer = createShannonHelperServer(sourceDir);
  const mcpServers: Record<string, McpServer> = {
    'shannon-helper': shannonHelperServer,
  };
  // 2. Look up the agent's Playwright MCP mapping
  if (agentName) {
-    const promptName = getPromptNameForAgent(agentName as AgentName);
+    const promptTemplate = AGENTS[agentName as AgentName].promptTemplate;
-    const playwrightMcpName = MCP_AGENT_MAPPING[promptName as keyof typeof MCP_AGENT_MAPPING] || null;
+    const playwrightMcpName = MCP_AGENT_MAPPING[promptTemplate as keyof typeof MCP_AGENT_MAPPING] || null;
    if (playwrightMcpName) {
-      console.log(chalk.gray(`    Assigned ${agentName} -> ${playwrightMcpName}`));
+      logger.info(`Assigned ${agentName} -> ${playwrightMcpName}`);
      const userDataDir = `/tmp/${playwrightMcpName}`;
-      // Docker uses system Chromium; local dev uses Playwright's bundled browsers
+      // 3. Configure Playwright MCP args with Docker/local browser handling
      const isDocker = process.env.SHANNON_DOCKER === 'true';
      const mcpArgs: string[] = [
@@ -84,7 +86,6 @@ function buildMcpServers(
        '--user-data-dir', userDataDir,
      ];
      // Docker: Use system Chromium; Local: Use Playwright's bundled browsers
      if (isDocker) {
        mcpArgs.push('--executable-path', '/usr/bin/chromium-browser');
        mcpArgs.push('--browser', 'chromium');
@@ -107,6 +108,7 @@ function buildMcpServers(
    }
  }
  // 4. Return configured servers
  return mcpServers;
 }
@@ -142,23 +144,23 @@ async function writeErrorLog(
    };
    const logPath = path.join(sourceDir, 'error.log');
    await fs.appendFile(logPath, JSON.stringify(errorLog) + '\n');
-  } catch (logError) {
+  } catch {
-    const logErrMsg = logError instanceof Error ? logError.message : String(logError);
+    // Best-effort error log writing - don't propagate failures
    console.log(chalk.gray(`    (Failed to write error log: ${logErrMsg})`));
  }
 }
 export async function validateAgentOutput(
  result: ClaudePromptResult,
  agentName: string | null,
-  sourceDir: string
+  sourceDir: string,
  logger: ActivityLogger
 ): Promise<boolean> {
-  console.log(chalk.blue(`    Validating ${agentName} agent output`));
+  logger.info(`Validating ${agentName} agent output`);
  try {
    // Check if agent completed successfully
    if (!result.success || !result.result) {
-      console.log(chalk.red(`    Validation failed: Agent execution was unsuccessful`));
+      logger.error('Validation failed: Agent execution was unsuccessful');
      return false;
    }
@@ -166,28 +168,27 @@ export async function validateAgentOutput(
    const validator = agentName ? AGENT_VALIDATORS[agentName as keyof typeof AGENT_VALIDATORS] : undefined;
    if (!validator) {
-      console.log(chalk.yellow(`    No validator found for agent "${agentName}" - assuming success`));
+      logger.warn(`No validator found for agent "${agentName}" - assuming success`);
-      console.log(chalk.green(`    Validation passed: Unknown agent with successful result`));
+      logger.info('Validation passed: Unknown agent with successful result');
      return true;
    }
-    console.log(chalk.blue(`    Using validator for agent: ${agentName}`));
+    logger.info(`Using validator for agent: ${agentName}`, { sourceDir });
    console.log(chalk.blue(`    Source directory: ${sourceDir}`));
    // Apply validation function
-    const validationResult = await validator(sourceDir);
+    const validationResult = await validator(sourceDir, logger);
    if (validationResult) {
-      console.log(chalk.green(`    Validation passed: Required files/structure present`));
+      logger.info('Validation passed: Required files/structure present');
    } else {
-      console.log(chalk.red(`    Validation failed: Missing required deliverable files`));
+      logger.error('Validation failed: Missing required deliverable files');
    }
    return validationResult;
  } catch (error) {
    const errMsg = error instanceof Error ? error.message : String(error);
-    console.log(chalk.red(`    Validation failed with error: ${errMsg}`));
+    logger.error(`Validation failed with error: ${errMsg}`);
    return false;
  }
 }
@@ -200,14 +201,14 @@ export async function runClaudePrompt(
  context: string = '',
  description: string = 'Claude analysis',
  agentName: string | null = null,
  colorFn: ChalkInstance = chalk.cyan,
  sessionMetadata: SessionMetadata | null = null,
  auditSession: AuditSession | null = null,
-  attemptNumber: number = 1
+  logger: ActivityLogger
 ): Promise<ClaudePromptResult> {
  // 1. Initialize timing and prompt
  const timer = new Timer(`agent-${description.toLowerCase().replace(/\s+/g, '-')}`);
  const fullPrompt = context ? `${context}\n\n${prompt}` : prompt;
  // 2. Set up progress and audit infrastructure
  const execContext = detectExecutionContext(description);
  const progress = createProgressManager(
    { description, useCleanOutput: execContext.useCleanOutput },
@@ -215,11 +216,12 @@ export async function runClaudePrompt(
  );
  const auditLogger = createAuditLogger(auditSession);
-  console.log(chalk.blue(`  Running Claude Code: ${description}...`));
+  logger.info(`Running Claude Code: ${description}...`);
-  const mcpServers = buildMcpServers(sourceDir, agentName);
+  // 3. Configure MCP servers
  const mcpServers = buildMcpServers(sourceDir, agentName, logger);
-  // Build env vars to pass to SDK subprocesses
+  // 4. Build env vars to pass to SDK subprocesses
  const sdkEnv: Record<string, string> = {
    CLAUDE_CODE_MAX_OUTPUT_TOKENS: process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS || '64000',
  };
@@ -230,6 +232,7 @@ export async function runClaudePrompt(
    sdkEnv.CLAUDE_CODE_OAUTH_TOKEN = process.env.CLAUDE_CODE_OAUTH_TOKEN;
  }
  // 5. Configure SDK options
  const options = {
    model: 'claude-sonnet-4-5-20250929',
    maxTurns: 10_000,
@@ -241,7 +244,7 @@ export async function runClaudePrompt(
  };
  if (!execContext.useCleanOutput) {
-    console.log(chalk.gray(`    SDK Options: maxTurns=${options.maxTurns}, cwd=${sourceDir}, permissions=BYPASS`));
+    logger.info(`SDK Options: maxTurns=${options.maxTurns}, cwd=${sourceDir}, permissions=BYPASS`);
  }
  let turnCount = 0;
@@ -252,10 +255,11 @@ export async function runClaudePrompt(
  progress.start();
  try {
    // 6. Process the message stream
    const messageLoopResult = await processMessageStream(
      fullPrompt,
      options,
-      { execContext, description, colorFn, progress, auditLogger },
+      { execContext, description, progress, auditLogger, logger },
      timer
    );
@@ -266,30 +270,21 @@ export async function runClaudePrompt(
    const model = messageLoopResult.model;
    // === SPENDING CAP SAFEGUARD ===
-    // Defense-in-depth: Detect spending cap that slipped through detectApiError().
+    // 7. Defense-in-depth: Detect spending cap that slipped through detectApiError().
-    // When spending cap is hit, Claude returns a short message with $0 cost.
+    // Uses consolidated billing detection from utils/billing-detection.ts
-    // Legitimate agent work NEVER costs $0 with only 1-2 turns.
+    if (isSpendingCapBehavior(turnCount, totalCost, result || '')) {
-    if (turnCount <= 2 && totalCost === 0) {
+      throw new PentestError(
-      const resultLower = (result || '').toLowerCase();
+        `Spending cap likely reached (turns=${turnCount}, cost=$0): ${result?.slice(0, 100)}`,
-      const BILLING_KEYWORDS = ['spending', 'cap', 'limit', 'budget', 'resets'];
+        'billing',
-      const looksLikeBillingError = BILLING_KEYWORDS.some((kw) =>
+        true // Retryable - Temporal will use 5-30 min backoff
        resultLower.includes(kw)
      );
      if (looksLikeBillingError) {
        throw new PentestError(
          `Spending cap likely reached (turns=${turnCount}, cost=$0): ${result?.slice(0, 100)}`,
          'billing',
          true // Retryable - Temporal will use 5-30 min backoff
        );
      }
    }
    // 8. Finalize successful result
    const duration = timer.stop();
    timingResults.agents[execContext.agentKey] = duration;
    if (apiErrorDetected) {
-      console.log(chalk.yellow(`  API Error detected in ${description} - will validate deliverables before failing`));
+      logger.warn(`API Error detected in ${description} - will validate deliverables before failing`);
    }
    progress.finish(formatCompletionMessage(execContext, description, turnCount, duration));
@@ -306,8 +301,8 @@ export async function runClaudePrompt(
    };
  } catch (error) {
    // 9. Handle errors — log, write error file, return failure
    const duration = timer.stop();
    timingResults.agents[execContext.agentKey] = duration;
    const err = error as Error & { code?: string; status?: number };
@@ -340,9 +335,9 @@ interface MessageLoopResult {
 interface MessageLoopDeps {
  execContext: ReturnType<typeof detectExecutionContext>;
  description: string;
  colorFn: ChalkInstance;
  progress: ReturnType<typeof createProgressManager>;
  auditLogger: ReturnType<typeof createAuditLogger>;
  logger: ActivityLogger;
 }
 async function processMessageStream(
@@ -351,7 +346,7 @@ async function processMessageStream(
  deps: MessageLoopDeps,
  timer: Timer
 ): Promise<MessageLoopResult> {
-  const { execContext, description, colorFn, progress, auditLogger } = deps;
+  const { execContext, description, progress, auditLogger, logger } = deps;
  const HEARTBEAT_INTERVAL = 30000;
  let turnCount = 0;
@@ -365,7 +360,7 @@ async function processMessageStream(
    // Heartbeat logging when loader is disabled
    const now = Date.now();
    if (global.SHANNON_DISABLE_LOADER && now - lastHeartbeat > HEARTBEAT_INTERVAL) {
-      console.log(chalk.blue(`    [${Math.floor((now - timer.startTime) / 1000)}s] ${description} running... (Turn ${turnCount})`));
+      logger.info(`[${Math.floor((now - timer.startTime) / 1000)}s] ${description} running... (Turn ${turnCount})`);
      lastHeartbeat = now;
    }
@@ -377,7 +372,7 @@ async function processMessageStream(
    const dispatchResult = await dispatchMessage(
      message as { type: string; subtype?: string },
      turnCount,
-      { execContext, description, colorFn, progress, auditLogger }
+      { execContext, description, progress, auditLogger, logger }
    );
    if (dispatchResult.type === 'throw') {
@@ -403,153 +398,3 @@ async function processMessageStream(
  return { turnCount, result, apiErrorDetected, cost, model };
 }
 // Main entry point for agent execution. Handles retries, git checkpoints, and validation.
 export async function runClaudePromptWithRetry(
  prompt: string,
  sourceDir: string,
  _allowedTools: string = 'Read',
  context: string = '',
  description: string = 'Claude analysis',
  agentName: string | null = null,
  colorFn: ChalkInstance = chalk.cyan,
  sessionMetadata: SessionMetadata | null = null
 ): Promise<ClaudePromptResult> {
  const maxRetries = 3;
  let lastError: Error | undefined;
  let retryContext = context;
  console.log(chalk.cyan(`Starting ${description} with ${maxRetries} max attempts`));
  let auditSession: AuditSession | null = null;
  if (sessionMetadata && agentName) {
    auditSession = new AuditSession(sessionMetadata);
    await auditSession.initialize();
  }
  for (let attempt = 1; attempt <= maxRetries; attempt++) {
    await createGitCheckpoint(sourceDir, description, attempt);
    if (auditSession && agentName) {
      const fullPrompt = retryContext ? `${retryContext}\n\n${prompt}` : prompt;
      await auditSession.startAgent(agentName, fullPrompt, attempt);
    }
    try {
      const result = await runClaudePrompt(
        prompt, sourceDir, retryContext,
        description, agentName, colorFn, sessionMetadata, auditSession, attempt
      );
      if (result.success) {
        const validationPassed = await validateAgentOutput(result, agentName, sourceDir);
        if (validationPassed) {
          if (result.apiErrorDetected) {
            console.log(chalk.yellow(`Validation: Ready for exploitation despite API error warnings`));
          }
          if (auditSession && agentName) {
            const commitHash = await getGitCommitHash(sourceDir);
            const endResult: {
              attemptNumber: number;
              duration_ms: number;
              cost_usd: number;
              success: true;
              checkpoint?: string;
            } = {
              attemptNumber: attempt,
              duration_ms: result.duration,
              cost_usd: result.cost || 0,
              success: true,
            };
            if (commitHash) {
              endResult.checkpoint = commitHash;
            }
            await auditSession.endAgent(agentName, endResult);
          }
          await commitGitSuccess(sourceDir, description);
          console.log(chalk.green.bold(`${description} completed successfully on attempt ${attempt}/${maxRetries}`));
          return result;
        // Validation failure is retryable - agent might succeed on retry with cleaner workspace
        } else {
          console.log(chalk.yellow(`${description} completed but output validation failed`));
          if (auditSession && agentName) {
            await auditSession.endAgent(agentName, {
              attemptNumber: attempt,
              duration_ms: result.duration,
              cost_usd: result.partialCost || result.cost || 0,
              success: false,
              error: 'Output validation failed',
              isFinalAttempt: attempt === maxRetries
            });
          }
          if (result.apiErrorDetected) {
            console.log(chalk.yellow(`API Error detected with validation failure - treating as retryable`));
            lastError = new Error('API Error: terminated with validation failure');
          } else {
            lastError = new Error('Output validation failed');
          }
          if (attempt < maxRetries) {
            await rollbackGitWorkspace(sourceDir, 'validation failure');
            continue;
          } else {
            throw new PentestError(
              `Agent ${description} failed output validation after ${maxRetries} attempts. Required deliverable files were not created.`,
              'validation',
              false,
              { description, sourceDir, attemptsExhausted: maxRetries }
            );
          }
        }
      }
    } catch (error) {
      const err = error as Error & { duration?: number; cost?: number; partialResults?: unknown };
      lastError = err;
      if (auditSession && agentName) {
        await auditSession.endAgent(agentName, {
          attemptNumber: attempt,
          duration_ms: err.duration || 0,
          cost_usd: err.cost || 0,
          success: false,
          error: err.message,
          isFinalAttempt: attempt === maxRetries
        });
      }
      if (!isRetryableError(err)) {
        console.log(chalk.red(`${description} failed with non-retryable error: ${err.message}`));
        await rollbackGitWorkspace(sourceDir, 'non-retryable error cleanup');
        throw err;
      }
      if (attempt < maxRetries) {
        await rollbackGitWorkspace(sourceDir, 'retryable error cleanup');
        const delay = getRetryDelay(err, attempt);
        const delaySeconds = (delay / 1000).toFixed(1);
        console.log(chalk.yellow(`${description} failed (attempt ${attempt}/${maxRetries})`));
        console.log(chalk.gray(`    Error: ${err.message}`));
        console.log(chalk.gray(`    Workspace rolled back, retrying in ${delaySeconds}s...`));
        if (err.partialResults) {
          retryContext = `${context}\n\nPrevious partial results: ${JSON.stringify(err.partialResults)}`;
        }
        await new Promise(resolve => setTimeout(resolve, delay));
      } else {
        await rollbackGitWorkspace(sourceDir, 'final failure cleanup');
        console.log(chalk.red(`${description} failed after ${maxRetries} attempts`));
        console.log(chalk.red(`    Final error: ${err.message}`));
      }
    }
  }
  throw lastError;
 }
@@ -4,20 +4,19 @@
 // it under the terms of the GNU Affero General Public License version 3
 // as published by the Free Software Foundation.
-// Pure functions for processing SDK message types
+import { PentestError } from '../services/error-handling.js';
-
+import { ErrorCode } from '../types/errors.js';
-import { PentestError } from '../error-handling.js';
+import { matchesBillingTextPattern } from '../utils/billing-detection.js';
-import { filterJsonToolCalls } from '../utils/output-formatter.js';
+import { filterJsonToolCalls } from './output-formatters.js';
 import { formatTimestamp } from '../utils/formatting.js';
 import chalk from 'chalk';
 import { getActualModelName } from './router-utils.js';
 import type { ActivityLogger } from '../types/activity-logger.js';
 import {
  formatAssistantOutput,
  formatResultOutput,
  formatToolUseOutput,
  formatToolResultOutput,
 } from './output-formatters.js';
 import { costResults } from '../utils/metrics.js';
 import type { AuditLogger } from './audit-logger.js';
 import type { ProgressManager } from './progress-manager.js';
 import type {
@@ -35,10 +34,9 @@ import type {
  SystemInitMessage,
  ExecutionContext,
 } from './types.js';
 import type { ChalkInstance } from 'chalk';
 // Handles both array and string content formats from SDK
-export function extractMessageContent(message: AssistantMessage): string {
+function extractMessageContent(message: AssistantMessage): string {
  const messageContent = message.message;
  if (Array.isArray(messageContent.content)) {
@@ -51,7 +49,7 @@ export function extractMessageContent(message: AssistantMessage): string {
 }
 // Extracts only text content (no tool_use JSON) to avoid false positives in error detection
-export function extractTextOnlyContent(message: AssistantMessage): string {
+function extractTextOnlyContent(message: AssistantMessage): string {
  const messageContent = message.message;
  if (Array.isArray(messageContent.content)) {
@@ -64,7 +62,7 @@ export function extractTextOnlyContent(message: AssistantMessage): string {
  return String(messageContent.content);
 }
-export function detectApiError(content: string): ApiErrorDetection {
+function detectApiError(content: string): ApiErrorDetection {
  if (!content || typeof content !== 'string') {
    return { detected: false };
  }
@@ -75,25 +73,15 @@ export function detectApiError(content: string): ApiErrorDetection {
  // When Claude Code hits its spending cap, it returns a short message like
  // "Spending cap reached resets 8am" instead of throwing an error.
  // These should retry with 5-30 min backoff so workflows can recover when cap resets.
-  const BILLING_PATTERNS = [
+  if (matchesBillingTextPattern(content)) {
    'spending cap',
    'spending limit',
    'cap reached',
    'budget exceeded',
    'usage limit',
  ];
  const isBillingError = BILLING_PATTERNS.some((pattern) =>
    lowerContent.includes(pattern)
  );
  if (isBillingError) {
    return {
      detected: true,
      shouldThrow: new PentestError(
        `Billing limit reached: ${content.slice(0, 100)}`,
        'billing',
-        true // RETRYABLE - Temporal will use 5-30 min backoff
+        true, // RETRYABLE - Temporal will use 5-30 min backoff
        {},
        ErrorCode.SPENDING_CAP_REACHED
      ),
    };
  }
@@ -127,7 +115,9 @@ function handleStructuredError(
        shouldThrow: new PentestError(
          `Billing error (structured): ${content.slice(0, 100)}`,
          'billing',
-          true // Retryable with backoff
+          true, // Retryable with backoff
          {},
          ErrorCode.INSUFFICIENT_CREDITS
        ),
      };
    case 'rate_limit':
@@ -136,7 +126,9 @@ function handleStructuredError(
        shouldThrow: new PentestError(
          `Rate limit hit (structured): ${content.slice(0, 100)}`,
          'network',
-          true // Retryable with backoff
+          true, // Retryable with backoff
          {},
          ErrorCode.API_RATE_LIMITED
        ),
      };
    case 'authentication_failed':
@@ -181,7 +173,7 @@ function handleStructuredError(
  }
 }
-export function handleAssistantMessage(
+function handleAssistantMessage(
  message: AssistantMessage,
  turnCount: number
 ): AssistantResult {
@@ -219,7 +211,7 @@ export function handleAssistantMessage(
 }
 // Final message of a query with cost/duration info
-export function handleResultMessage(message: ResultMessage): ResultData {
+function handleResultMessage(message: ResultMessage): ResultData {
  const result: ResultData = {
    result: message.result || null,
    cost: message.total_cost_usd || 0,
@@ -236,14 +228,14 @@ export function handleResultMessage(message: ResultMessage): ResultData {
  if (message.stop_reason !== undefined) {
    result.stop_reason = message.stop_reason;
    if (message.stop_reason && message.stop_reason !== 'end_turn') {
-      console.log(chalk.yellow(`    Stop reason: ${message.stop_reason}`));
+      console.log(`    Stop reason: ${message.stop_reason}`);
    }
  }
  return result;
 }
-export function handleToolUseMessage(message: ToolUseMessage): ToolUseData {
+function handleToolUseMessage(message: ToolUseMessage): ToolUseData {
  return {
    toolName: message.name,
    parameters: message.input || {},
@@ -252,7 +244,7 @@ export function handleToolUseMessage(message: ToolUseMessage): ToolUseData {
 }
 // Truncates long results for display (500 char limit), preserves full content for logging
-export function handleToolResultMessage(message: ToolResultMessage): ToolResultData {
+function handleToolResultMessage(message: ToolResultMessage): ToolResultData {
  const content = message.content;
  const contentStr =
    typeof content === 'string' ? content : JSON.stringify(content, null, 2);
@@ -269,14 +261,12 @@ export function handleToolResultMessage(message: ToolResultMessage): ToolResultD
  };
 }
 // Output helper for console logging
 function outputLines(lines: string[]): void {
  for (const line of lines) {
    console.log(line);
  }
 }
 // Message dispatch result types
 export type MessageDispatchAction =
  | { type: 'continue'; apiErrorDetected?: boolean | undefined; model?: string | undefined }
  | { type: 'complete'; result: string | null; cost: number }
@@ -285,9 +275,9 @@ export type MessageDispatchAction =
 export interface MessageDispatchDeps {
  execContext: ExecutionContext;
  description: string;
  colorFn: ChalkInstance;
  progress: ProgressManager;
  auditLogger: AuditLogger;
  logger: ActivityLogger;
 }
 // Dispatches SDK messages to appropriate handlers and formatters
@@ -296,7 +286,7 @@ export async function dispatchMessage(
  turnCount: number,
  deps: MessageDispatchDeps
 ): Promise<MessageDispatchAction> {
-  const { execContext, description, colorFn, progress, auditLogger } = deps;
+  const { execContext, description, progress, auditLogger, logger } = deps;
  switch (message.type) {
    case 'assistant': {
@@ -312,8 +302,7 @@ export async function dispatchMessage(
          assistantResult.cleanedContent,
          execContext,
          turnCount,
-          description,
+          description
          colorFn
        ));
        progress.start();
      }
@@ -321,7 +310,7 @@ export async function dispatchMessage(
      await auditLogger.logLlmResponse(turnCount, assistantResult.content);
      if (assistantResult.apiErrorDetected) {
-        console.log(chalk.red(`    API Error detected in assistant response`));
+        logger.warn('API Error detected in assistant response');
        return { type: 'continue', apiErrorDetected: true };
      }
@@ -333,10 +322,10 @@ export async function dispatchMessage(
        const initMsg = message as SystemInitMessage;
        const actualModel = getActualModelName(initMsg.model);
        if (!execContext.useCleanOutput) {
-          console.log(chalk.blue(`    Model: ${actualModel}, Permission: ${initMsg.permissionMode}`));
+          logger.info(`Model: ${actualModel}, Permission: ${initMsg.permissionMode}`);
          if (initMsg.mcp_servers && initMsg.mcp_servers.length > 0) {
            const mcpStatus = initMsg.mcp_servers.map(s => `${s.name}(${s.status})`).join(', ');
-            console.log(chalk.blue(`    MCP: ${mcpStatus}`));
+            logger.info(`MCP: ${mcpStatus}`);
          }
        }
        // Return actual model for tracking in audit logs
@@ -368,13 +357,11 @@ export async function dispatchMessage(
    case 'result': {
      const resultData = handleResultMessage(message as ResultMessage);
      outputLines(formatResultOutput(resultData, !execContext.useCleanOutput));
      costResults.agents[execContext.agentKey] = resultData.cost;
      costResults.total += resultData.cost;
      return { type: 'complete', result: resultData.result, cost: resultData.cost };
    }
    default:
-      console.log(chalk.gray(`    ${message.type}: ${JSON.stringify(message, null, 2)}`));
+      logger.info(`Unhandled message type: ${message.type}`);
      return { type: 'continue' };
  }
 }
@@ -4,13 +4,267 @@
 // it under the terms of the GNU Affero General Public License version 3
 // as published by the Free Software Foundation.
 // Pure functions for formatting console output
 import chalk from 'chalk';
 import { extractAgentType, formatDuration } from '../utils/formatting.js';
-import { getAgentPrefix } from '../utils/output-formatter.js';
+import { AGENTS } from '../session-manager.js';
 import type { ExecutionContext, ResultData } from './types.js';
 interface ToolCallInput {
  url?: string;
  element?: string;
  key?: string;
  fields?: unknown[];
  text?: string;
  action?: string;
  description?: string;
  todos?: Array<{
    status: string;
    content: string;
  }>;
  [key: string]: unknown;
 }
 interface ToolCall {
  name: string;
  input?: ToolCallInput;
 }
 /**
 * Get agent prefix for parallel execution
 */
 export function getAgentPrefix(description: string): string {
  // Map agent names to their prefixes
  const agentPrefixes: Record<string, string> = {
    'injection-vuln': '[Injection]',
    'xss-vuln': '[XSS]',
    'auth-vuln': '[Auth]',
    'authz-vuln': '[Authz]',
    'ssrf-vuln': '[SSRF]',
    'injection-exploit': '[Injection]',
    'xss-exploit': '[XSS]',
    'auth-exploit': '[Auth]',
    'authz-exploit': '[Authz]',
    'ssrf-exploit': '[SSRF]',
  };
  // First try to match by agent name directly
  for (const [agentName, prefix] of Object.entries(agentPrefixes)) {
    const agent = AGENTS[agentName as keyof typeof AGENTS];
    if (agent && description.includes(agent.displayName)) {
      return prefix;
    }
  }
  // Fallback to partial matches for backwards compatibility
  if (description.includes('injection')) return '[Injection]';
  if (description.includes('xss')) return '[XSS]';
  if (description.includes('authz')) return '[Authz]'; // Check authz before auth
  if (description.includes('auth')) return '[Auth]';
  if (description.includes('ssrf')) return '[SSRF]';
  return '[Agent]';
 }
 /**
 * Extract domain from URL for display
 */
 function extractDomain(url: string): string {
  try {
    const urlObj = new URL(url);
    return urlObj.hostname || url.slice(0, 30);
  } catch {
    return url.slice(0, 30);
  }
 }
 /**
 * Summarize TodoWrite updates into clean progress indicators
 */
 function summarizeTodoUpdate(input: ToolCallInput | undefined): string | null {
  if (!input?.todos || !Array.isArray(input.todos)) {
    return null;
  }
  const todos = input.todos;
  const completed = todos.filter((t) => t.status === 'completed');
  const inProgress = todos.filter((t) => t.status === 'in_progress');
  // Show recently completed tasks
  if (completed.length > 0) {
    const recent = completed[completed.length - 1]!;
    return `✅ ${recent.content}`;
  }
  // Show current in-progress task
  if (inProgress.length > 0) {
    const current = inProgress[0]!;
    return `🔄 ${current.content}`;
  }
  return null;
 }
 /**
 * Format browser tool calls into clean progress indicators
 */
 function formatBrowserAction(toolCall: ToolCall): string {
  const toolName = toolCall.name;
  const input = toolCall.input || {};
  // Core Browser Operations
  if (toolName === 'mcp__playwright__browser_navigate') {
    const url = input.url || '';
    const domain = extractDomain(url);
    return `🌐 Navigating to ${domain}`;
  }
  if (toolName === 'mcp__playwright__browser_navigate_back') {
    return `⬅️ Going back`;
  }
  // Page Interaction
  if (toolName === 'mcp__playwright__browser_click') {
    const element = input.element || 'element';
    return `🖱️ Clicking ${element.slice(0, 25)}`;
  }
  if (toolName === 'mcp__playwright__browser_hover') {
    const element = input.element || 'element';
    return `👆 Hovering over ${element.slice(0, 20)}`;
  }
  if (toolName === 'mcp__playwright__browser_type') {
    const element = input.element || 'field';
    return `⌨️ Typing in ${element.slice(0, 20)}`;
  }
  if (toolName === 'mcp__playwright__browser_press_key') {
    const key = input.key || 'key';
    return `⌨️ Pressing ${key}`;
  }
  // Form Handling
  if (toolName === 'mcp__playwright__browser_fill_form') {
    const fieldCount = input.fields?.length || 0;
    return `📝 Filling ${fieldCount} form fields`;
  }
  if (toolName === 'mcp__playwright__browser_select_option') {
    return `📋 Selecting dropdown option`;
  }
  if (toolName === 'mcp__playwright__browser_file_upload') {
    return `📁 Uploading file`;
  }
  // Page Analysis
  if (toolName === 'mcp__playwright__browser_snapshot') {
    return `📸 Taking page snapshot`;
  }
  if (toolName === 'mcp__playwright__browser_take_screenshot') {
    return `📸 Taking screenshot`;
  }
  if (toolName === 'mcp__playwright__browser_evaluate') {
    return `🔍 Running JavaScript analysis`;
  }
  // Waiting & Monitoring
  if (toolName === 'mcp__playwright__browser_wait_for') {
    if (input.text) {
      return `⏳ Waiting for "${input.text.slice(0, 20)}"`;
    }
    return `⏳ Waiting for page response`;
  }
  if (toolName === 'mcp__playwright__browser_console_messages') {
    return `📜 Checking console logs`;
  }
  if (toolName === 'mcp__playwright__browser_network_requests') {
    return `🌐 Analyzing network traffic`;
  }
  // Tab Management
  if (toolName === 'mcp__playwright__browser_tabs') {
    const action = input.action || 'managing';
    return `🗂️ ${action} browser tab`;
  }
  // Dialog Handling
  if (toolName === 'mcp__playwright__browser_handle_dialog') {
    return `💬 Handling browser dialog`;
  }
  // Fallback for any missed tools
  const actionType = toolName.split('_').pop();
  return `🌐 Browser: ${actionType}`;
 }
 /**
 * Filter out JSON tool calls from content, with special handling for Task calls
 */
 export function filterJsonToolCalls(content: string | null | undefined): string {
  if (!content || typeof content !== 'string') {
    return content || '';
  }
  const lines = content.split('\n');
  const processedLines: string[] = [];
  for (const line of lines) {
    const trimmed = line.trim();
    // Skip empty lines
    if (trimmed === '') {
      continue;
    }
    // Check if this is a JSON tool call
    if (trimmed.startsWith('{"type":"tool_use"')) {
      try {
        const toolCall = JSON.parse(trimmed) as ToolCall;
        // Special handling for Task tool calls
        if (toolCall.name === 'Task') {
          const description = toolCall.input?.description || 'analysis agent';
          processedLines.push(`🚀 Launching ${description}`);
          continue;
        }
        // Special handling for TodoWrite tool calls
        if (toolCall.name === 'TodoWrite') {
          const summary = summarizeTodoUpdate(toolCall.input);
          if (summary) {
            processedLines.push(summary);
          }
          continue;
        }
        // Special handling for browser tool calls
        if (toolCall.name.startsWith('mcp__playwright__browser_')) {
          const browserAction = formatBrowserAction(toolCall);
          if (browserAction) {
            processedLines.push(browserAction);
          }
          continue;
        }
        // Hide all other tool calls (Read, Write, Grep, etc.)
        continue;
      } catch {
        // If JSON parsing fails, treat as regular text
        processedLines.push(line);
      }
    } else {
      // Keep non-JSON lines (assistant text)
      processedLines.push(line);
    }
  }
  return processedLines.join('\n');
 }
 export function detectExecutionContext(description: string): ExecutionContext {
  const isParallelExecution =
    description.includes('vuln agent') || description.includes('exploit agent');
@@ -33,8 +287,7 @@ export function formatAssistantOutput(
  cleanedContent: string,
  context: ExecutionContext,
  turnCount: number,
-  description: string,
+  description: string
  colorFn: typeof chalk.cyan = chalk.cyan
 ): string[] {
  if (!cleanedContent.trim()) {
    return [];
@@ -45,11 +298,11 @@ export function formatAssistantOutput(
  if (context.isParallelExecution) {
    // Compact output for parallel agents with prefixes
    const prefix = getAgentPrefix(description);
-    lines.push(colorFn(`${prefix} ${cleanedContent}`));
+    lines.push(`${prefix} ${cleanedContent}`);
  } else {
    // Full turn output for sequential agents
-    lines.push(colorFn(`\n    Turn ${turnCount} (${description}):`));
+    lines.push(`\n    Turn ${turnCount} (${description}):`);
-    lines.push(colorFn(`    ${cleanedContent}`));
+    lines.push(`    ${cleanedContent}`);
  }
  return lines;
@@ -58,28 +311,24 @@ export function formatAssistantOutput(
 export function formatResultOutput(data: ResultData, showFullResult: boolean): string[] {
  const lines: string[] = [];
-  lines.push(chalk.magenta(`\n    COMPLETED:`));
+  lines.push(`\n    COMPLETED:`);
-  lines.push(
+  lines.push(`    Duration: ${(data.duration_ms / 1000).toFixed(1)}s, Cost: $${data.cost.toFixed(4)}`);
    chalk.gray(
      `    Duration: ${(data.duration_ms / 1000).toFixed(1)}s, Cost: $${data.cost.toFixed(4)}`
    )
  );
  if (data.subtype === 'error_max_turns') {
-    lines.push(chalk.red(`    Stopped: Hit maximum turns limit`));
+    lines.push(`    Stopped: Hit maximum turns limit`);
  } else if (data.subtype === 'error_during_execution') {
-    lines.push(chalk.red(`    Stopped: Execution error`));
+    lines.push(`    Stopped: Execution error`);
  }
  if (data.permissionDenials > 0) {
-    lines.push(chalk.yellow(`    ${data.permissionDenials} permission denials`));
+    lines.push(`    ${data.permissionDenials} permission denials`);
  }
  if (showFullResult && data.result && typeof data.result === 'string') {
    if (data.result.length > 1000) {
-      lines.push(chalk.magenta(`    ${data.result.slice(0, 1000)}... [${data.result.length} total chars]`));
+      lines.push(`    ${data.result.slice(0, 1000)}... [${data.result.length} total chars]`);
    } else {
-      lines.push(chalk.magenta(`    ${data.result}`));
+      lines.push(`    ${data.result}`);
    }
  }
@@ -98,24 +347,24 @@ export function formatErrorOutput(
  if (context.isParallelExecution) {
    const prefix = getAgentPrefix(description);
-    lines.push(chalk.red(`${prefix} Failed (${formatDuration(duration)})`));
+    lines.push(`${prefix} Failed (${formatDuration(duration)})`);
  } else if (context.useCleanOutput) {
-    lines.push(chalk.red(`${context.agentType} failed (${formatDuration(duration)})`));
+    lines.push(`${context.agentType} failed (${formatDuration(duration)})`);
  } else {
-    lines.push(chalk.red(`  Claude Code failed: ${description} (${formatDuration(duration)})`));
+    lines.push(`  Claude Code failed: ${description} (${formatDuration(duration)})`);
  }
-  lines.push(chalk.red(`    Error Type: ${error.constructor.name}`));
+  lines.push(`    Error Type: ${error.constructor.name}`);
-  lines.push(chalk.red(`    Message: ${error.message}`));
+  lines.push(`    Message: ${error.message}`);
-  lines.push(chalk.gray(`    Agent: ${description}`));
+  lines.push(`    Agent: ${description}`);
-  lines.push(chalk.gray(`    Working Directory: ${sourceDir}`));
+  lines.push(`    Working Directory: ${sourceDir}`);
-  lines.push(chalk.gray(`    Retryable: ${isRetryable ? 'Yes' : 'No'}`));
+  lines.push(`    Retryable: ${isRetryable ? 'Yes' : 'No'}`);
  if (error.code) {
-    lines.push(chalk.gray(`    Error Code: ${error.code}`));
+    lines.push(`    Error Code: ${error.code}`);
  }
  if (error.status) {
-    lines.push(chalk.gray(`    HTTP Status: ${error.status}`));
+    lines.push(`    HTTP Status: ${error.status}`);
  }
  return lines;
@@ -129,18 +378,14 @@ export function formatCompletionMessage(
 ): string {
  if (context.isParallelExecution) {
    const prefix = getAgentPrefix(description);
-    return chalk.green(`${prefix} Complete (${turnCount} turns, ${formatDuration(duration)})`);
+    return `${prefix} Complete (${turnCount} turns, ${formatDuration(duration)})`;
  }
  if (context.useCleanOutput) {
-    return chalk.green(
+    return `${context.agentType.charAt(0).toUpperCase() + context.agentType.slice(1)} complete! (${turnCount} turns, ${formatDuration(duration)})`;
      `${context.agentType.charAt(0).toUpperCase() + context.agentType.slice(1)} complete! (${turnCount} turns, ${formatDuration(duration)})`
    );
  }
-  return chalk.green(
+  return `  Claude Code completed: ${description} (${turnCount} turns) in ${formatDuration(duration)}`;
    `  Claude Code completed: ${description} (${turnCount} turns) in ${formatDuration(duration)}`
  );
 }
 export function formatToolUseOutput(
@@ -149,9 +394,9 @@ export function formatToolUseOutput(
 ): string[] {
  const lines: string[] = [];
-  lines.push(chalk.yellow(`\n    Using Tool: ${toolName}`));
+  lines.push(`\n    Using Tool: ${toolName}`);
  if (input && Object.keys(input).length > 0) {
-    lines.push(chalk.gray(`    Input: ${JSON.stringify(input, null, 2)}`));
+    lines.push(`    Input: ${JSON.stringify(input, null, 2)}`);
  }
  return lines;
@@ -160,9 +405,9 @@ export function formatToolUseOutput(
 export function formatToolResultOutput(displayContent: string): string[] {
  const lines: string[] = [];
-  lines.push(chalk.green(`    Tool Result:`));
+  lines.push(`    Tool Result:`);
  if (displayContent) {
-    lines.push(chalk.gray(`    ${displayContent}`));
+    lines.push(`    ${displayContent}`);
  }
  return lines;
@@ -26,9 +26,3 @@ export function getActualModelName(sdkReportedModel?: string): string | undefine
  return sdkReportedModel;
 }
 /**
 * Check if router mode is active.
 */
 export function isRouterMode(): boolean {
  return !!process.env.ANTHROPIC_BASE_URL && !!process.env.ROUTER_DEFAULT;
 }
@@ -13,22 +13,6 @@ export interface ExecutionContext {
  agentKey: string;
 }
 export interface ProcessingState {
  turnCount: number;
  result: string | null;
  apiErrorDetected: boolean;
  totalCost: number;
  partialCost: number;
  lastHeartbeat: number;
 }
 export interface ProcessingResult {
  result: string | null;
  turnCount: number;
  apiErrorDetected: boolean;
  totalCost: number;
 }
 export interface AssistantResult {
  content: string;
  cleanedContent: string;
@@ -110,15 +94,6 @@ export interface ApiErrorDetection {
  shouldThrow?: Error;
 }
 // Message types from SDK stream
 export type SdkMessage =
  | AssistantMessage
  | ResultMessage
  | ToolUseMessage
  | ToolResultMessage
  | SystemInitMessage
  | UserMessage;
 export interface SystemInitMessage {
  type: 'system';
  subtype: 'init';
@@ -131,16 +106,3 @@ export interface UserMessage {
  type: 'user';
 }
 // Dispatch result types for message processing
 export type MessageDispatchResult =
  | { action: 'continue' }
  | { action: 'break'; result: string | null; cost: number }
  | { action: 'throw'; error: Error };
 export interface MessageDispatchContext {
  turnCount: number;
  execContext: ExecutionContext;
  description: string;
  colorFn: (text: string) => string;
  useCleanOutput: boolean;
 }
@@ -17,21 +17,13 @@ import { MetricsTracker } from './metrics-tracker.js';
 import { initializeAuditStructure, type SessionMetadata } from './utils.js';
 import { formatTimestamp } from '../utils/formatting.js';
 import { SessionMutex } from '../utils/concurrency.js';
 import type { AgentEndResult } from '../types/index.js';
 import { PentestError } from '../services/error-handling.js';
 import { ErrorCode } from '../types/errors.js';
 // Global mutex instance
 const sessionMutex = new SessionMutex();
 interface AgentEndResult {
  attemptNumber: number;
  duration_ms: number;
  cost_usd: number;
  success: boolean;
  model?: string | undefined;
  error?: string | undefined;
  checkpoint?: string | undefined;
  isFinalAttempt?: boolean | undefined;
 }
 /**
 * AuditSession - Main audit system facade
 */
@@ -50,10 +42,22 @@ export class AuditSession {
    // Validate required fields
    if (!this.sessionId) {
-      throw new Error('sessionMetadata.id is required');
+      throw new PentestError(
        'sessionMetadata.id is required',
        'config',
        false,
        { field: 'sessionMetadata.id' },
        ErrorCode.CONFIG_VALIDATION_FAILED
      );
    }
    if (!this.sessionMetadata.webUrl) {
-      throw new Error('sessionMetadata.webUrl is required');
+      throw new PentestError(
        'sessionMetadata.webUrl is required',
        'config',
        false,
        { field: 'sessionMetadata.webUrl' },
        ErrorCode.CONFIG_VALIDATION_FAILED
      );
    }
    // Components
@@ -103,29 +107,26 @@ export class AuditSession {
  ): Promise<void> {
    await this.ensureInitialized();
-    // Save prompt snapshot (only on first attempt)
+    // 1. Save prompt snapshot (only on first attempt)
    if (attemptNumber === 1) {
      await AgentLogger.savePrompt(this.sessionMetadata, agentName, promptContent);
    }
-    // Track current agent name for workflow logging
+    // 2. Create and initialize the per-agent logger
    this.currentAgentName = agentName;
    // Create and initialize logger for this attempt
    this.currentLogger = new AgentLogger(this.sessionMetadata, agentName, attemptNumber);
    await this.currentLogger.initialize();
-    // Start metrics tracking
+    // 3. Start metrics timer
    this.metricsTracker.startAgent(agentName, attemptNumber);
-    // Log start event
+    // 4. Log start event to both agent log and workflow log
    await this.currentLogger.logEvent('agent_start', {
      agentName,
      attemptNumber,
      timestamp: formatTimestamp(),
    });
    // Log to unified workflow log
    await this.workflowLogger.logAgent(agentName, 'start', { attemptNumber });
  }
@@ -134,7 +135,13 @@ export class AuditSession {
   */
  async logEvent(eventType: string, eventData: unknown): Promise<void> {
    if (!this.currentLogger) {
-      throw new Error('No active logger. Call startAgent() first.');
+      throw new PentestError(
        'No active logger. Call startAgent() first.',
        'validation',
        false,
        {},
        ErrorCode.AGENT_EXECUTION_FAILED
      );
    }
    // Log to agent-specific log file (JSON format)
@@ -167,7 +174,7 @@ export class AuditSession {
   * End agent execution (mutex-protected)
   */
  async endAgent(agentName: string, result: AgentEndResult): Promise<void> {
-    // Log end event
+    // 1. Finalize agent log and close the stream
    if (this.currentLogger) {
      await this.currentLogger.logEvent('agent_end', {
        agentName,
@@ -177,15 +184,13 @@ export class AuditSession {
        timestamp: formatTimestamp(),
      });
      // Close logger
      await this.currentLogger.close();
      this.currentLogger = null;
    }
-    // Reset current agent name
+    // 2. Log completion to the unified workflow log
    this.currentAgentName = null;
    // Log to unified workflow log
    const agentLogDetails: AgentLogDetails = {
      attemptNumber: result.attemptNumber,
      duration_ms: result.duration_ms,
@@ -195,13 +200,11 @@ export class AuditSession {
    };
    await this.workflowLogger.logAgent(agentName, 'end', agentLogDetails);
-    // Mutex-protected update to session.json
+    // 3. Acquire mutex before touching session.json
    const unlock = await sessionMutex.lock(this.sessionId);
    try {
-      // Reload inside mutex to prevent lost updates during parallel exploitation phase
+      // 4. Reload-then-write inside mutex to prevent lost updates during parallel phases
      await this.metricsTracker.reload();
      // Update metrics
      await this.metricsTracker.endAgent(agentName, result);
    } finally {
      unlock();
@@ -278,4 +281,18 @@ export class AuditSession {
      unlock();
    }
  }
  /**
   * Log resume header to workflow.log
   * Call this when a workflow is resuming to add a visual separator
   */
  async logResumeHeader(resumeInfo: {
    previousWorkflowId: string;
    newWorkflowId: string;
    checkpointHash: string;
    completedAgents: string[];
  }): Promise<void> {
    await this.ensureInitialized();
    await this.workflowLogger.logResumeHeader(resumeInfo);
  }
 }
@@ -17,7 +17,3 @@
 */
 export { AuditSession } from './audit-session.js';
 export { AgentLogger } from './logger.js';
 export { WorkflowLogger } from './workflow-logger.js';
 export { MetricsTracker } from './metrics-tracker.js';
 export * as AuditUtils from './utils.js';
@@ -0,0 +1,127 @@
 // Copyright (C) 2025 Keygraph, Inc.
 //
 // This program is free software: you can redistribute it and/or modify
 // it under the terms of the GNU Affero General Public License version 3
 // as published by the Free Software Foundation.
 /**
 * LogStream - Stream composition utility for append-only logging
 *
 * Encapsulates the common stream management pattern used by AgentLogger
 * and WorkflowLogger: opening streams in append mode, handling backpressure,
 * and proper cleanup.
 */
 import fs from 'fs';
 import path from 'path';
 import { ensureDirectory } from '../utils/file-io.js';
 /**
 * LogStream - Manages a single append-only log file stream
 */
 export class LogStream {
  private readonly filePath: string;
  private stream: fs.WriteStream | null = null;
  private _isOpen: boolean = false;
  constructor(filePath: string) {
    this.filePath = filePath;
  }
  /**
   * Open the stream for writing (creates parent directories, opens in append mode)
   */
  async open(): Promise<void> {
    if (this._isOpen) {
      return;
    }
    // Ensure parent directory exists
    await ensureDirectory(path.dirname(this.filePath));
    // Create write stream in append mode
    this.stream = fs.createWriteStream(this.filePath, {
      flags: 'a',
      encoding: 'utf8',
      autoClose: true,
    });
    // Handle stream errors to prevent crashes (log and mark closed)
    this.stream.on('error', (err) => {
      console.error(`LogStream error for ${this.filePath}:`, err.message);
      this._isOpen = false;
    });
    this._isOpen = true;
  }
  /**
   * Write text to the stream with backpressure handling
   */
  async write(text: string): Promise<void> {
    return new Promise((resolve, reject) => {
      if (!this._isOpen || !this.stream) {
        reject(new Error('LogStream not open'));
        return;
      }
      const stream = this.stream;
      let drainHandler: (() => void) | null = null;
      const cleanup = () => {
        if (drainHandler) {
          stream.removeListener('drain', drainHandler);
          drainHandler = null;
        }
      };
      const needsDrain = !stream.write(text, 'utf8', (error) => {
        cleanup();
        if (error) {
          reject(error);
        } else if (!needsDrain) {
          resolve();
        }
      });
      if (needsDrain) {
        drainHandler = () => {
          cleanup();
          resolve();
        };
        stream.once('drain', drainHandler);
      }
    });
  }
  /**
   * Close the stream (flush and close)
   */
  async close(): Promise<void> {
    if (!this._isOpen || !this.stream) {
      return;
    }
    return new Promise((resolve) => {
      this.stream!.end(() => {
        this._isOpen = false;
        this.stream = null;
        resolve();
      });
    });
  }
  /**
   * Check if the stream is currently open
   */
  get isOpen(): boolean {
    return this._isOpen;
  }
  /**
   * Get the file path this stream writes to
   */
  get path(): string {
    return this.filePath;
  }
 }
@@ -8,10 +8,9 @@
 * Append-Only Agent Logger
 *
 * Provides crash-safe, append-only logging for agent execution.
- * Uses file streams with immediate flush to prevent data loss.
+ * Uses LogStream for stream management with backpressure handling.
 */
 import fs from 'fs';
 import {
  generateLogPath,
  generatePromptPath,
@@ -19,6 +18,7 @@ import {
 } from './utils.js';
 import { atomicWrite } from '../utils/file-io.js';
 import { formatTimestamp } from '../utils/formatting.js';
 import { LogStream } from './log-stream.js';
 interface LogEvent {
  type: string;
@@ -30,13 +30,11 @@ interface LogEvent {
 * AgentLogger - Manages append-only logging for a single agent execution
 */
 export class AgentLogger {
-  private sessionMetadata: SessionMetadata;
+  private readonly sessionMetadata: SessionMetadata;
-  private agentName: string;
+  private readonly agentName: string;
-  private attemptNumber: number;
+  private readonly attemptNumber: number;
-  private timestamp: number;
+  private readonly timestamp: number;
-  private logPath: string;
+  private readonly logStream: LogStream;
  private stream: fs.WriteStream | null = null;
  private isOpen: boolean = false;
  constructor(sessionMetadata: SessionMetadata, agentName: string, attemptNumber: number) {
    this.sessionMetadata = sessionMetadata;
@@ -44,26 +42,19 @@ export class AgentLogger {
    this.attemptNumber = attemptNumber;
    this.timestamp = Date.now();
-    // Generate log file path
+    const logPath = generateLogPath(sessionMetadata, agentName, this.timestamp, attemptNumber);
-    this.logPath = generateLogPath(sessionMetadata, agentName, this.timestamp, attemptNumber);
+    this.logStream = new LogStream(logPath);
  }
  /**
   * Initialize the log stream (creates file and opens stream)
   */
  async initialize(): Promise<void> {
-    if (this.isOpen) {
+    if (this.logStream.isOpen) {
      return; // Already initialized
    }
-    // Create write stream with append mode and auto-flush
+    await this.logStream.open();
    this.stream = fs.createWriteStream(this.logPath, {
      flags: 'a', // Append mode
      encoding: 'utf8',
      autoClose: true,
    });
    this.isOpen = true;
    // Write header
    await this.writeHeader();
@@ -83,29 +74,7 @@ export class AgentLogger {
      `========================================\n`,
    ].join('\n');
-    return this.writeRaw(header);
+    return this.logStream.write(header);
  }
  /**
   * Write raw text to log file with immediate flush
   */
  private writeRaw(text: string): Promise<void> {
    return new Promise((resolve, reject) => {
      if (!this.isOpen || !this.stream) {
        reject(new Error('Logger not initialized'));
        return;
      }
      const needsDrain = !this.stream.write(text, 'utf8', (error) => {
        if (error) reject(error);
      });
      if (needsDrain) {
        this.stream.once('drain', resolve);
      } else {
        resolve();
      }
    });
  }
  /**
@@ -120,23 +89,14 @@ export class AgentLogger {
    };
    const eventLine = `${JSON.stringify(event)}\n`;
-    return this.writeRaw(eventLine);
+    return this.logStream.write(eventLine);
  }
  /**
   * Close the log stream
   */
  async close(): Promise<void> {
-    if (!this.isOpen || !this.stream) {
+    return this.logStream.close();
      return;
    }
    return new Promise((resolve) => {
      this.stream!.end(() => {
        this.isOpen = false;
        resolve();
      });
    });
  }
  /**
@@ -18,7 +18,9 @@ import {
 import { atomicWrite, readJson, fileExists } from '../utils/file-io.js';
 import { formatTimestamp, calculatePercentage } from '../utils/formatting.js';
 import { AGENT_PHASE_MAP, type PhaseName } from '../session-manager.js';
-import type { AgentName } from '../types/index.js';
+import { PentestError } from '../services/error-handling.js';
 import { ErrorCode } from '../types/errors.js';
 import type { AgentName, AgentEndResult } from '../types/index.js';
 interface AttemptData {
  attempt_number: number;
@@ -30,7 +32,7 @@ interface AttemptData {
  error?: string | undefined;
 }
-interface AgentMetrics {
+interface AgentAuditMetrics {
  status: 'in-progress' | 'success' | 'failed';
  attempts: AttemptData[];
  final_duration_ms: number;
@@ -68,21 +70,10 @@ interface SessionData {
    total_duration_ms: number;
    total_cost_usd: number;
    phases: Record<string, PhaseMetrics>;
-    agents: Record<string, AgentMetrics>;
+    agents: Record<string, AgentAuditMetrics>;
  };
 }
 interface AgentEndResult {
  attemptNumber: number;
  duration_ms: number;
  cost_usd: number;
  success: boolean;
  model?: string | undefined;
  error?: string | undefined;
  checkpoint?: string | undefined;
  isFinalAttempt?: boolean | undefined;
 }
 interface ActiveTimer {
  startTime: number;
  attemptNumber: number;
@@ -170,10 +161,16 @@ export class MetricsTracker {
   */
  async endAgent(agentName: string, result: AgentEndResult): Promise<void> {
    if (!this.data) {
-      throw new Error('MetricsTracker not initialized');
+      throw new PentestError(
        'MetricsTracker not initialized',
        'validation',
        false,
        {},
        ErrorCode.AGENT_EXECUTION_FAILED
      );
    }
-    // Initialize agent metrics if not exists
+    // 1. Initialize agent metrics if first time seeing this agent
    const existingAgent = this.data.metrics.agents[agentName];
    const agent = existingAgent ?? {
      status: 'in-progress' as const,
@@ -183,7 +180,7 @@ export class MetricsTracker {
    };
    this.data.metrics.agents[agentName] = agent;
-    // Add attempt to array
+    // 2. Build attempt record with optional model/error fields
    const attempt: AttemptData = {
      attempt_number: result.attemptNumber,
      duration_ms: result.duration_ms,
@@ -200,16 +197,18 @@ export class MetricsTracker {
      attempt.error = result.error;
    }
    // 3. Append attempt to history
    agent.attempts.push(attempt);
-    // Update total cost (includes failed attempts)
+    // 4. Recalculate total cost across all attempts (includes failures)
    agent.total_cost_usd = agent.attempts.reduce((sum, a) => sum + a.cost_usd, 0);
-    // If successful, update final metrics and status
+    // 5. Update agent status based on outcome
    if (result.success) {
      agent.status = 'success';
      agent.final_duration_ms = result.duration_ms;
      // 6. Attach model and checkpoint metadata on success
      if (result.model) {
        agent.model = result.model;
      }
@@ -218,19 +217,18 @@ export class MetricsTracker {
        agent.checkpoint = result.checkpoint;
      }
    } else {
      // If this was the last attempt, mark as failed
      if (result.isFinalAttempt) {
        agent.status = 'failed';
      }
    }
-    // Clear active timer
+    // 7. Clear active timer
    this.activeTimers.delete(agentName);
-    // Recalculate aggregations
+    // 8. Recalculate phase and session-level aggregations
    this.recalculateAggregations();
-    // Save to disk
+    // 9. Persist to session.json
    await this.save();
  }
@@ -262,7 +260,13 @@ export class MetricsTracker {
    checkpointHash?: string
  ): Promise<void> {
    if (!this.data) {
-      throw new Error('MetricsTracker not initialized');
+      throw new PentestError(
        'MetricsTracker not initialized',
        'validation',
        false,
        {},
        ErrorCode.AGENT_EXECUTION_FAILED
      );
    }
    // Ensure originalWorkflowId is set (backfill if missing from old sessions)
@@ -326,9 +330,9 @@ export class MetricsTracker {
   * Calculate phase-level metrics
   */
  private calculatePhaseMetrics(
-    successfulAgents: Array<[string, AgentMetrics]>
+    successfulAgents: Array<[string, AgentAuditMetrics]>
  ): Record<string, PhaseMetrics> {
-    const phases: Record<PhaseName, AgentMetrics[]> = {
+    const phases: Record<PhaseName, AgentAuditMetrics[]> = {
      'pre-recon': [],
      'recon': [],
      'vulnerability-analysis': [],
@@ -15,20 +15,17 @@ import fs from 'fs/promises';
 import path from 'path';
 import { fileURLToPath } from 'url';
 import { ensureDirectory } from '../utils/file-io.js';
 export type { SessionMetadata } from '../types/audit.js';
 import type { SessionMetadata } from '../types/audit.js';
 const __filename = fileURLToPath(import.meta.url);
 const __dirname = path.dirname(__filename);
 // Get Shannon repository root
-export const SHANNON_ROOT = path.resolve(__dirname, '..', '..');
+const SHANNON_ROOT = path.resolve(__dirname, '..', '..');
-export const AUDIT_LOGS_DIR = path.join(SHANNON_ROOT, 'audit-logs');
+const AUDIT_LOGS_DIR = path.join(SHANNON_ROOT, 'audit-logs');
 export interface SessionMetadata {
  id: string;
  webUrl: string;
  repoPath?: string;
  outputPath?: string;
  [key: string]: unknown;
 }
 /**
 * Extract and sanitize hostname from URL for use in identifiers
@@ -93,98 +90,6 @@ export function generateWorkflowLogPath(sessionMetadata: SessionMetadata): strin
  return path.join(auditPath, 'workflow.log');
 }
 /**
 * Ensure directory exists (idempotent, race-safe)
 */
 export async function ensureDirectory(dirPath: string): Promise<void> {
  try {
    await fs.mkdir(dirPath, { recursive: true });
  } catch (error) {
    // Ignore EEXIST errors (race condition safe)
    if ((error as NodeJS.ErrnoException).code !== 'EEXIST') {
      throw error;
    }
  }
 }
 /**
 * Atomic write using temp file + rename pattern
 * Guarantees no partial writes or corruption on crash
 */
 export async function atomicWrite(filePath: string, data: object | string): Promise<void> {
  const tempPath = `${filePath}.tmp`;
  const content = typeof data === 'string' ? data : JSON.stringify(data, null, 2);
  try {
    // Write to temp file
    await fs.writeFile(tempPath, content, 'utf8');
    // Atomic rename (POSIX guarantee: atomic on same filesystem)
    await fs.rename(tempPath, filePath);
  } catch (error) {
    // Clean up temp file on failure
    try {
      await fs.unlink(tempPath);
    } catch {
      // Ignore cleanup errors
    }
    throw error;
  }
 }
 /**
 * Format duration in milliseconds to human-readable string
 */
 export function formatDuration(ms: number): string {
  if (ms < 1000) {
    return `${ms}ms`;
  }
  const seconds = ms / 1000;
  if (seconds < 60) {
    return `${seconds.toFixed(1)}s`;
  }
  const minutes = Math.floor(seconds / 60);
  const remainingSeconds = Math.floor(seconds % 60);
  return `${minutes}m ${remainingSeconds}s`;
 }
 /**
 * Format timestamp to ISO 8601 string
 */
 export function formatTimestamp(timestamp: number = Date.now()): string {
  return new Date(timestamp).toISOString();
 }
 /**
 * Calculate percentage
 */
 export function calculatePercentage(part: number, total: number): number {
  if (total === 0) return 0;
  return (part / total) * 100;
 }
 /**
 * Read and parse JSON file
 */
 export async function readJson<T = unknown>(filePath: string): Promise<T> {
  const content = await fs.readFile(filePath, 'utf8');
  return JSON.parse(content) as T;
 }
 /**
 * Check if file exists
 */
 export async function fileExists(filePath: string): Promise<boolean> {
  try {
    await fs.access(filePath);
    return true;
  } catch {
    return false;
  }
 }
 /**
 * Initialize audit directory structure for a session
 * Creates: audit-logs/{sessionId}/, agents/, prompts/, deliverables/
@@ -11,10 +11,10 @@
 * Optimized for `tail -f` viewing during concurrent workflow execution.
 */
-import fs from 'fs';
+import fs from 'fs/promises';
-import path from 'path';
+import { generateWorkflowLogPath, type SessionMetadata } from './utils.js';
 import { generateWorkflowLogPath, ensureDirectory, type SessionMetadata } from './utils.js';
 import { formatDuration, formatTimestamp } from '../utils/formatting.js';
 import { LogStream } from './log-stream.js';
 export interface AgentLogDetails {
  attemptNumber?: number;
@@ -42,38 +42,27 @@ export interface WorkflowSummary {
 * WorkflowLogger - Manages the unified workflow log file
 */
 export class WorkflowLogger {
-  private sessionMetadata: SessionMetadata;
+  private readonly sessionMetadata: SessionMetadata;
-  private logPath: string;
+  private readonly logStream: LogStream;
  private stream: fs.WriteStream | null = null;
  private initialized: boolean = false;
  constructor(sessionMetadata: SessionMetadata) {
    this.sessionMetadata = sessionMetadata;
-    this.logPath = generateWorkflowLogPath(sessionMetadata);
+    const logPath = generateWorkflowLogPath(sessionMetadata);
    this.logStream = new LogStream(logPath);
  }
  /**
   * Initialize the log stream (creates file and writes header)
   */
  async initialize(): Promise<void> {
-    if (this.initialized) {
+    if (this.logStream.isOpen) {
      return;
    }
-    // Ensure directory exists
+    await this.logStream.open();
    await ensureDirectory(path.dirname(this.logPath));
    // Create write stream with append mode
    this.stream = fs.createWriteStream(this.logPath, {
      flags: 'a',
      encoding: 'utf8',
      autoClose: true,
    });
    this.initialized = true;
    // Write header only if file is new (empty)
-    const stats = await fs.promises.stat(this.logPath).catch(() => null);
+    const stats = await fs.stat(this.logStream.path).catch(() => null);
    if (!stats || stats.size === 0) {
      await this.writeHeader();
    }
@@ -94,29 +83,35 @@ export class WorkflowLogger {
      ``,
    ].join('\n');
-    return this.writeRaw(header);
+    return this.logStream.write(header);
  }
  /**
-   * Write raw text to log file with immediate flush
+   * Write resume header to log file when workflow is resumed
   */
-  private writeRaw(text: string): Promise<void> {
+  async logResumeHeader(resumeInfo: {
-    return new Promise((resolve, reject) => {
+    previousWorkflowId: string;
-      if (!this.initialized || !this.stream) {
+    newWorkflowId: string;
-        reject(new Error('WorkflowLogger not initialized'));
+    checkpointHash: string;
-        return;
+    completedAgents: string[];
-      }
+  }): Promise<void> {
    await this.ensureInitialized();
-      const needsDrain = !this.stream.write(text, 'utf8', (error) => {
+    const header = [
-        if (error) reject(error);
+      ``,
-      });
+      `================================================================================`,
      `RESUMED`,
      `================================================================================`,
      `Previous Workflow ID: ${resumeInfo.previousWorkflowId}`,
      `New Workflow ID:      ${resumeInfo.newWorkflowId}`,
      `Resumed At:           ${formatTimestamp()}`,
      `Checkpoint:           ${resumeInfo.checkpointHash}`,
      `Completed:            ${resumeInfo.completedAgents.length} agents (${resumeInfo.completedAgents.join(', ')})`,
      `================================================================================`,
      ``,
    ].join('\n');
-      if (needsDrain) {
+    return this.logStream.write(header);
        this.stream.once('drain', resolve);
      } else {
        resolve();
      }
    });
  }
  /**
@@ -138,10 +133,10 @@ export class WorkflowLogger {
    // Add blank line before phase start for readability
    if (event === 'start') {
-      await this.writeRaw('\n');
+      await this.logStream.write('\n');
    }
-    await this.writeRaw(line);
+    await this.logStream.write(line);
  }
  /**
@@ -184,7 +179,7 @@ export class WorkflowLogger {
    }
    const line = `[${this.formatLogTime()}] [AGENT] ${message}\n`;
-    await this.writeRaw(line);
+    await this.logStream.write(line);
  }
  /**
@@ -194,7 +189,7 @@ export class WorkflowLogger {
    await this.ensureInitialized();
    const line = `[${this.formatLogTime()}] [${eventType.toUpperCase()}] ${message}\n`;
-    await this.writeRaw(line);
+    await this.logStream.write(line);
  }
  /**
@@ -205,7 +200,7 @@ export class WorkflowLogger {
    const contextStr = context ? ` (${context})` : '';
    const line = `[${this.formatLogTime()}] [ERROR] ${error.message}${contextStr}\n`;
-    await this.writeRaw(line);
+    await this.logStream.write(line);
  }
  /**
@@ -301,7 +296,7 @@ export class WorkflowLogger {
    const params = this.formatToolParams(toolName, parameters);
    const paramStr = params ? `: ${params}` : '';
    const line = `[${this.formatLogTime()}] [${agentName}] [TOOL] ${toolName}${paramStr}\n`;
-    await this.writeRaw(line);
+    await this.logStream.write(line);
  }
  /**
@@ -313,7 +308,7 @@ export class WorkflowLogger {
    // Show full content, replacing newlines with escaped version for single-line output
    const escaped = content.replace(/\n/g, '\\n');
    const line = `[${this.formatLogTime()}] [${agentName}] [LLM] Turn ${turn}: ${escaped}\n`;
-    await this.writeRaw(line);
+    await this.logStream.write(line);
  }
  /**
@@ -324,42 +319,42 @@ export class WorkflowLogger {
    const status = summary.status === 'completed' ? 'COMPLETED' : 'FAILED';
-    await this.writeRaw('\n');
+    await this.logStream.write('\n');
-    await this.writeRaw(`================================================================================\n`);
+    await this.logStream.write(`================================================================================\n`);
-    await this.writeRaw(`Workflow ${status}\n`);
+    await this.logStream.write(`Workflow ${status}\n`);
-    await this.writeRaw(`────────────────────────────────────────\n`);
+    await this.logStream.write(`────────────────────────────────────────\n`);
-    await this.writeRaw(`Workflow ID: ${this.sessionMetadata.id}\n`);
+    await this.logStream.write(`Workflow ID: ${this.sessionMetadata.id}\n`);
-    await this.writeRaw(`Status:      ${summary.status}\n`);
+    await this.logStream.write(`Status:      ${summary.status}\n`);
-    await this.writeRaw(`Duration:    ${formatDuration(summary.totalDurationMs)}\n`);
+    await this.logStream.write(`Duration:    ${formatDuration(summary.totalDurationMs)}\n`);
-    await this.writeRaw(`Total Cost:  $${summary.totalCostUsd.toFixed(4)}\n`);
+    await this.logStream.write(`Total Cost:  $${summary.totalCostUsd.toFixed(4)}\n`);
-    await this.writeRaw(`Agents:      ${summary.completedAgents.length} completed\n`);
+    await this.logStream.write(`Agents:      ${summary.completedAgents.length} completed\n`);
    if (summary.error) {
-      await this.writeRaw(`Error:       ${summary.error}\n`);
+      await this.logStream.write(`Error:       ${summary.error}\n`);
    }
-    await this.writeRaw(`\n`);
+    await this.logStream.write(`\n`);
-    await this.writeRaw(`Agent Breakdown:\n`);
+    await this.logStream.write(`Agent Breakdown:\n`);
    for (const agentName of summary.completedAgents) {
      const metrics = summary.agentMetrics[agentName];
      if (metrics) {
        const duration = formatDuration(metrics.durationMs);
        const cost = metrics.costUsd !== null ? `$${metrics.costUsd.toFixed(4)}` : 'N/A';
-        await this.writeRaw(`  - ${agentName} (${duration}, ${cost})\n`);
+        await this.logStream.write(`  - ${agentName} (${duration}, ${cost})\n`);
      } else {
-        await this.writeRaw(`  - ${agentName}\n`);
+        await this.logStream.write(`  - ${agentName}\n`);
      }
    }
-    await this.writeRaw(`================================================================================\n`);
+    await this.logStream.write(`================================================================================\n`);
  }
  /**
   * Ensure initialized (helper for lazy initialization)
   */
  private async ensureInitialized(): Promise<void> {
-    if (!this.initialized) {
+    if (!this.logStream.isOpen) {
      await this.initialize();
    }
  }
@@ -368,15 +363,6 @@ export class WorkflowLogger {
   * Close the log stream
   */
  async close(): Promise<void> {
-    if (!this.initialized || !this.stream) {
+    return this.logStream.close();
      return;
    }
    return new Promise((resolve) => {
      this.stream!.end(() => {
        this.initialized = false;
        resolve();
      });
    });
  }
 }
@@ -1,59 +0,0 @@
 // Copyright (C) 2025 Keygraph, Inc.
 //
 // This program is free software: you can redistribute it and/or modify
 // it under the terms of the GNU Affero General Public License version 3
 // as published by the Free Software Foundation.
 import { fs, path } from 'zx';
 interface ValidationResult {
  valid: boolean;
  error?: string;
  path?: string;
 }
 // Helper function: Validate web URL
 export function validateWebUrl(url: string): ValidationResult {
  try {
    const parsed = new URL(url);
    if (!['http:', 'https:'].includes(parsed.protocol)) {
      return { valid: false, error: 'Web URL must use HTTP or HTTPS protocol' };
    }
    if (!parsed.hostname) {
      return { valid: false, error: 'Web URL must have a valid hostname' };
    }
    return { valid: true };
  } catch {
    return { valid: false, error: 'Invalid web URL format' };
  }
 }
 // Helper function: Validate local repository path
 export async function validateRepoPath(repoPath: string): Promise<ValidationResult> {
  try {
    // Check if path exists
    if (!(await fs.pathExists(repoPath))) {
      return { valid: false, error: 'Repository path does not exist' };
    }
    // Check if it's a directory
    const stats = await fs.stat(repoPath);
    if (!stats.isDirectory()) {
      return { valid: false, error: 'Repository path must be a directory' };
    }
    // Check if it's readable
    try {
      await fs.access(repoPath, fs.constants.R_OK);
    } catch {
      return { valid: false, error: 'Repository path is not readable' };
    }
    // Convert to absolute path
    const absolutePath = path.resolve(repoPath);
    return { valid: true, path: absolutePath };
  } catch (error) {
    const errMsg = error instanceof Error ? error.message : String(error);
    return { valid: false, error: `Invalid repository path: ${errMsg}` };
  }
 }
@@ -1,49 +0,0 @@
 // Copyright (C) 2025 Keygraph, Inc.
 //
 // This program is free software: you can redistribute it and/or modify
 // it under the terms of the GNU Affero General Public License version 3
 // as published by the Free Software Foundation.
 import chalk from 'chalk';
 import { displaySplashScreen } from '../splash-screen.js';
 // Helper function: Display help information
 export function showHelp(): void {
  console.log(chalk.cyan.bold('AI Penetration Testing Agent'));
  console.log(chalk.gray('Automated security assessment tool\n'));
  console.log(chalk.yellow.bold('USAGE:'));
  console.log('  shannon <WEB_URL> <REPO_PATH> [--config config.yaml] [--output /path/to/reports]\n');
  console.log(chalk.yellow.bold('OPTIONS:'));
  console.log(
    '  --config <file>      YAML configuration file for authentication and testing parameters'
  );
  console.log(
    '  --output <path>      Custom output directory for session folder (default: ./audit-logs/)'
  );
  console.log(
    '  --pipeline-testing   Use minimal prompts for fast pipeline testing (creates minimal deliverables)'
  );
  console.log(
    '  --disable-loader     Disable the animated progress loader (useful when logs interfere with spinner)'
  );
  console.log('  --help               Show this help message\n');
  console.log(chalk.yellow.bold('EXAMPLES:'));
  console.log('  shannon "https://example.com" "/path/to/local/repo"');
  console.log('  shannon "https://example.com" "/path/to/local/repo" --config auth.yaml');
  console.log('  shannon "https://example.com" "/path/to/local/repo" --output /path/to/reports');
  console.log('  shannon "https://example.com" "/path/to/local/repo" --pipeline-testing\n');
  console.log(chalk.yellow.bold('REQUIREMENTS:'));
  console.log('  • WEB_URL must start with http:// or https://');
  console.log('  • REPO_PATH must be an accessible local directory');
  console.log('  • Only test systems you own or have permission to test\n');
  console.log(chalk.yellow.bold('ENVIRONMENT VARIABLES:'));
  console.log('  PENTEST_MAX_RETRIES    Number of retries for AI agents (default: 3)');
 }
 // Export the splash screen function for use in main
 export { displaySplashScreen };
@@ -7,13 +7,13 @@
 import { createRequire } from 'module';
 import { fs } from 'zx';
 import yaml from 'js-yaml';
-import { Ajv, type ValidateFunction } from 'ajv';
+import { Ajv, type ValidateFunction, type ErrorObject } from 'ajv';
 import type { FormatsPlugin } from 'ajv-formats';
-import { PentestError } from './error-handling.js';
+import { PentestError } from './services/error-handling.js';
 import { ErrorCode } from './types/errors.js';
 import type {
  Config,
  Rule,
  Rules,
  Authentication,
  DistributedConfig,
 } from './types/config.js';
@@ -22,11 +22,9 @@ import type {
 const require = createRequire(import.meta.url);
 const addFormats: FormatsPlugin = require('ajv-formats');
 // Initialize AJV with formats
 const ajv = new Ajv({ allErrors: true, verbose: true });
 addFormats(ajv);
 // Load JSON Schema
 let configSchema: object;
 let validateSchema: ValidateFunction;
@@ -45,7 +43,6 @@ try {
  );
 }
 // Security patterns to block
 const DANGEROUS_PATTERNS: RegExp[] = [
  /\.\.\//, // Path traversal
  /[<>]/, // HTML/XML injection
@@ -54,32 +51,171 @@ const DANGEROUS_PATTERNS: RegExp[] = [
  /file:/i, // File URLs
 ];
-// Parse and load YAML configuration file with enhanced safety
+/**
-export const parseConfig = async (configPath: string): Promise<Config> => {
+ * Format a single AJV error into a human-readable message.
-  try {
+ * Translates AJV error keywords into plain English descriptions.
-    // File existence check
+ */
-    if (!(await fs.pathExists(configPath))) {
+function formatAjvError(error: ErrorObject): string {
-      throw new Error(`Configuration file not found: ${configPath}`);
+  const path = error.instancePath || 'root';
  const params = error.params as Record<string, unknown>;
  switch (error.keyword) {
    case 'required': {
      const missingProperty = params.missingProperty as string;
      return `Missing required field: "${missingProperty}" at ${path || 'root'}`;
    }
-    // File size check (prevent extremely large files)
+    case 'type': {
-    const stats = await fs.stat(configPath);
+      const expectedType = params.type as string;
-    const maxFileSize = 1024 * 1024; // 1MB
+      return `Invalid type at ${path}: expected ${expectedType}`;
-    if (stats.size > maxFileSize) {
+    }
-      throw new Error(
+
-        `Configuration file too large: ${stats.size} bytes (maximum: ${maxFileSize} bytes)`
+    case 'enum': {
      const allowedValues = params.allowedValues as unknown[];
      const formattedValues = allowedValues.map((v) => `"${v}"`).join(', ');
      return `Invalid value at ${path}: must be one of [${formattedValues}]`;
    }
    case 'additionalProperties': {
      const additionalProperty = params.additionalProperty as string;
      return `Unknown field at ${path}: "${additionalProperty}" is not allowed`;
    }
    case 'minLength': {
      const limit = params.limit as number;
      return `Value at ${path} is too short: must have at least ${limit} character(s)`;
    }
    case 'maxLength': {
      const limit = params.limit as number;
      return `Value at ${path} is too long: must have at most ${limit} character(s)`;
    }
    case 'minimum': {
      const limit = params.limit as number;
      return `Value at ${path} is too small: must be >= ${limit}`;
    }
    case 'maximum': {
      const limit = params.limit as number;
      return `Value at ${path} is too large: must be <= ${limit}`;
    }
    case 'minItems': {
      const limit = params.limit as number;
      return `Array at ${path} has too few items: must have at least ${limit} item(s)`;
    }
    case 'maxItems': {
      const limit = params.limit as number;
      return `Array at ${path} has too many items: must have at most ${limit} item(s)`;
    }
    case 'pattern': {
      const pattern = params.pattern as string;
      return `Value at ${path} does not match required pattern: ${pattern}`;
    }
    case 'format': {
      const format = params.format as string;
      return `Value at ${path} must be a valid ${format}`;
    }
    case 'const': {
      const allowedValue = params.allowedValue as unknown;
      return `Value at ${path} must be exactly "${allowedValue}"`;
    }
    case 'oneOf': {
      return `Value at ${path} must match exactly one schema (matched ${params.passingSchemas ?? 0})`;
    }
    case 'anyOf': {
      return `Value at ${path} must match at least one of the allowed schemas`;
    }
    case 'not': {
      return `Value at ${path} matches a schema it should not match`;
    }
    case 'if': {
      return `Value at ${path} does not satisfy conditional schema requirements`;
    }
    case 'uniqueItems': {
      const i = params.i as number;
      const j = params.j as number;
      return `Array at ${path} contains duplicate items at positions ${j} and ${i}`;
    }
    case 'propertyNames': {
      const propertyName = params.propertyName as string;
      return `Invalid property name at ${path}: "${propertyName}" does not match naming requirements`;
    }
    case 'dependencies':
    case 'dependentRequired': {
      const property = params.property as string;
      const missingProperty = params.missingProperty as string;
      return `Missing dependent field at ${path}: "${missingProperty}" is required when "${property}" is present`;
    }
    default: {
      // Fallback for any unhandled keywords - use AJV's message if available
      const message = error.message || `validation failed for keyword "${error.keyword}"`;
      return `${path}: ${message}`;
    }
  }
 }
 /**
 * Format all AJV errors into a list of human-readable messages.
 * Returns an array of formatted error strings.
 */
 function formatAjvErrors(errors: ErrorObject[]): string[] {
  return errors.map(formatAjvError);
 }
 export const parseConfig = async (configPath: string): Promise<Config> => {
  try {
    // 1. Verify file exists
    if (!(await fs.pathExists(configPath))) {
      throw new PentestError(
        `Configuration file not found: ${configPath}`,
        'config',
        false,
        { configPath },
        ErrorCode.CONFIG_NOT_FOUND
      );
    }
-    // Read file content
+    // 2. Check file size
-    const configContent = await fs.readFile(configPath, 'utf8');
+    const stats = await fs.stat(configPath);
-
+    const maxFileSize = 1024 * 1024; // 1MB
-    // Basic content validation
+    if (stats.size > maxFileSize) {
-    if (!configContent.trim()) {
+      throw new PentestError(
-      throw new Error('Configuration file is empty');
+        `Configuration file too large: ${stats.size} bytes (maximum: ${maxFileSize} bytes)`,
        'config',
        false,
        { configPath, fileSize: stats.size, maxFileSize },
        ErrorCode.CONFIG_VALIDATION_FAILED
      );
    }
-    // Parse YAML with safety options
+    // 3. Read and check for empty content
    const configContent = await fs.readFile(configPath, 'utf8');
    if (!configContent.trim()) {
      throw new PentestError(
        'Configuration file is empty',
        'config',
        false,
        { configPath },
        ErrorCode.CONFIG_VALIDATION_FAILED
      );
    }
    // 4. Parse YAML with safe schema
    let config: unknown;
    try {
      config = yaml.load(configContent, {
@@ -89,67 +225,82 @@ export const parseConfig = async (configPath: string): Promise<Config> => {
      });
    } catch (yamlError) {
      const errMsg = yamlError instanceof Error ? yamlError.message : String(yamlError);
-      throw new Error(`YAML parsing failed: ${errMsg}`);
+      throw new PentestError(
        `YAML parsing failed: ${errMsg}`,
        'config',
        false,
        { configPath, originalError: errMsg },
        ErrorCode.CONFIG_PARSE_ERROR
      );
    }
-    // Additional safety check
+    // 5. Guard against null/undefined parse result
    if (config === null || config === undefined) {
-      throw new Error('Configuration file resulted in null/undefined after parsing');
+      throw new PentestError(
        'Configuration file resulted in null/undefined after parsing',
        'config',
        false,
        { configPath },
        ErrorCode.CONFIG_PARSE_ERROR
      );
    }
-    // Validate the configuration structure and content
+    // 6. Validate schema, security rules, and return
    validateConfig(config as Config);
    return config as Config;
  } catch (error) {
-    const errMsg = error instanceof Error ? error.message : String(error);
+    // PentestError instances are already well-formatted, re-throw as-is
-    // Enhance error message with context
+    if (error instanceof PentestError) {
    if (
      errMsg.startsWith('Configuration file not found') ||
      errMsg.startsWith('YAML parsing failed') ||
      errMsg.includes('must be') ||
      errMsg.includes('exceeds maximum')
    ) {
      // These are already well-formatted errors, re-throw as-is
      throw error;
    } else {
      // Wrap other errors with context
      throw new Error(`Failed to parse configuration file '${configPath}': ${errMsg}`);
    }
    const errMsg = error instanceof Error ? error.message : String(error);
    throw new PentestError(
      `Failed to parse configuration file '${configPath}': ${errMsg}`,
      'config',
      false,
      { configPath, originalError: errMsg },
      ErrorCode.CONFIG_PARSE_ERROR
    );
  }
 };
 // Validate overall configuration structure using JSON Schema
 const validateConfig = (config: Config): void => {
  // Basic structure validation
  if (!config || typeof config !== 'object') {
-    throw new Error('Configuration must be a valid object');
+    throw new PentestError(
      'Configuration must be a valid object',
      'config',
      false,
      {},
      ErrorCode.CONFIG_VALIDATION_FAILED
    );
  }
  if (Array.isArray(config)) {
-    throw new Error('Configuration must be an object, not an array');
+    throw new PentestError(
      'Configuration must be an object, not an array',
      'config',
      false,
      {},
      ErrorCode.CONFIG_VALIDATION_FAILED
    );
  }
  // JSON Schema validation
  const isValid = validateSchema(config);
  if (!isValid) {
    const errors = validateSchema.errors || [];
-    const errorMessages = errors.map((err) => {
+    const errorMessages = formatAjvErrors(errors);
-      const path = err.instancePath || 'root';
+    throw new PentestError(
-      return `${path}: ${err.message}`;
+      `Configuration validation failed:\n  - ${errorMessages.join('\n  - ')}`,
-    });
+      'config',
-    throw new Error(`Configuration validation failed:\n  - ${errorMessages.join('\n  - ')}`);
+      false,
      { validationErrors: errorMessages },
      ErrorCode.CONFIG_VALIDATION_FAILED
    );
  }
  // Additional security validation
  performSecurityValidation(config);
  // Warn if deprecated fields are used
  if (config.login) {
    console.warn('⚠️  The "login" section is deprecated. Please use "authentication" instead.');
  }
  // Ensure at least some configuration is provided
  if (!config.rules && !config.authentication) {
    console.warn(
      '⚠️  Configuration file contains no rules or authentication. The pentest will run without any scoping restrictions or login capabilities.'
@@ -161,35 +312,58 @@ const validateConfig = (config: Config): void => {
  }
 };
 // Perform additional security validation beyond JSON Schema
 const performSecurityValidation = (config: Config): void => {
  // Validate authentication section for security issues
  if (config.authentication) {
    const auth = config.authentication;
-    // Check for dangerous patterns in credentials
+    // Check login_url for dangerous patterns (AJV's "uri" format allows javascript: per RFC 3986)
-    if (auth.credentials) {
+    if (auth.login_url) {
      for (const pattern of DANGEROUS_PATTERNS) {
-        if (pattern.test(auth.credentials.username)) {
+        if (pattern.test(auth.login_url)) {
-          throw new Error(
+          throw new PentestError(
-            'authentication.credentials.username contains potentially dangerous pattern'
+            `authentication.login_url contains potentially dangerous pattern: ${pattern.source}`,
-          );
+            'config',
-        }
+            false,
-        if (pattern.test(auth.credentials.password)) {
+            { field: 'login_url', pattern: pattern.source },
-          throw new Error(
+            ErrorCode.CONFIG_VALIDATION_FAILED
-            'authentication.credentials.password contains potentially dangerous pattern'
+          );
        }
      }
    }
    if (auth.credentials) {
      for (const pattern of DANGEROUS_PATTERNS) {
        if (pattern.test(auth.credentials.username)) {
          throw new PentestError(
            `authentication.credentials.username contains potentially dangerous pattern: ${pattern.source}`,
            'config',
            false,
            { field: 'credentials.username', pattern: pattern.source },
            ErrorCode.CONFIG_VALIDATION_FAILED
          );
        }
        if (pattern.test(auth.credentials.password)) {
          throw new PentestError(
            `authentication.credentials.password contains potentially dangerous pattern: ${pattern.source}`,
            'config',
            false,
            { field: 'credentials.password', pattern: pattern.source },
            ErrorCode.CONFIG_VALIDATION_FAILED
          );
        }
      }
    }
    // Check login flow for dangerous patterns
    if (auth.login_flow) {
      auth.login_flow.forEach((step, index) => {
        for (const pattern of DANGEROUS_PATTERNS) {
          if (pattern.test(step)) {
-            throw new Error(
+            throw new PentestError(
-              `authentication.login_flow[${index}] contains potentially dangerous pattern: ${pattern.source}`
+              `authentication.login_flow[${index}] contains potentially dangerous pattern: ${pattern.source}`,
              'config',
              false,
              { field: `login_flow[${index}]`, pattern: pattern.source },
              ErrorCode.CONFIG_VALIDATION_FAILED
            );
          }
        }
@@ -197,48 +371,58 @@ const performSecurityValidation = (config: Config): void => {
    }
  }
  // Validate rules section for security issues
  if (config.rules) {
    validateRulesSecurity(config.rules.avoid, 'avoid');
    validateRulesSecurity(config.rules.focus, 'focus');
    // Check for duplicate and conflicting rules
    checkForDuplicates(config.rules.avoid || [], 'avoid');
    checkForDuplicates(config.rules.focus || [], 'focus');
    checkForConflicts(config.rules.avoid, config.rules.focus);
  }
 };
 // Validate rules for security issues
 const validateRulesSecurity = (rules: Rule[] | undefined, ruleType: string): void => {
  if (!rules) return;
  rules.forEach((rule, index) => {
    // Security validation
    for (const pattern of DANGEROUS_PATTERNS) {
      if (pattern.test(rule.url_path)) {
-        throw new Error(
+        throw new PentestError(
-          `rules.${ruleType}[${index}].url_path contains potentially dangerous pattern: ${pattern.source}`
+          `rules.${ruleType}[${index}].url_path contains potentially dangerous pattern: ${pattern.source}`,
          'config',
          false,
          { field: `rules.${ruleType}[${index}].url_path`, pattern: pattern.source },
          ErrorCode.CONFIG_VALIDATION_FAILED
        );
      }
      if (pattern.test(rule.description)) {
-        throw new Error(
+        throw new PentestError(
-          `rules.${ruleType}[${index}].description contains potentially dangerous pattern: ${pattern.source}`
+          `rules.${ruleType}[${index}].description contains potentially dangerous pattern: ${pattern.source}`,
          'config',
          false,
          { field: `rules.${ruleType}[${index}].description`, pattern: pattern.source },
          ErrorCode.CONFIG_VALIDATION_FAILED
        );
      }
    }
    // Type-specific validation
    validateRuleTypeSpecific(rule, ruleType, index);
  });
 };
 // Validate rule based on its specific type
 const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number): void => {
  const field = `rules.${ruleType}[${index}].url_path`;
  switch (rule.type) {
    case 'path':
      if (!rule.url_path.startsWith('/')) {
-        throw new Error(`rules.${ruleType}[${index}].url_path for type 'path' must start with '/'`);
+        throw new PentestError(
          `${field} for type 'path' must start with '/'`,
          'config',
          false,
          { field, ruleType: rule.type },
          ErrorCode.CONFIG_VALIDATION_FAILED
        );
      }
      break;
@@ -246,14 +430,22 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number):
    case 'domain':
      // Basic domain validation - no slashes allowed
      if (rule.url_path.includes('/')) {
-        throw new Error(
+        throw new PentestError(
-          `rules.${ruleType}[${index}].url_path for type '${rule.type}' cannot contain '/' characters`
+          `${field} for type '${rule.type}' cannot contain '/' characters`,
          'config',
          false,
          { field, ruleType: rule.type },
          ErrorCode.CONFIG_VALIDATION_FAILED
        );
      }
      // Must contain at least one dot for domains
      if (rule.type === 'domain' && !rule.url_path.includes('.')) {
-        throw new Error(
+        throw new PentestError(
-          `rules.${ruleType}[${index}].url_path for type 'domain' must be a valid domain name`
+          `${field} for type 'domain' must be a valid domain name`,
          'config',
          false,
          { field, ruleType: rule.type },
          ErrorCode.CONFIG_VALIDATION_FAILED
        );
      }
      break;
@@ -261,62 +453,77 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number):
    case 'method': {
      const allowedMethods = ['GET', 'POST', 'PUT', 'DELETE', 'PATCH', 'HEAD', 'OPTIONS'];
      if (!allowedMethods.includes(rule.url_path.toUpperCase())) {
-        throw new Error(
+        throw new PentestError(
-          `rules.${ruleType}[${index}].url_path for type 'method' must be one of: ${allowedMethods.join(', ')}`
+          `${field} for type 'method' must be one of: ${allowedMethods.join(', ')}`,
          'config',
          false,
          { field, ruleType: rule.type, allowedMethods },
          ErrorCode.CONFIG_VALIDATION_FAILED
        );
      }
      break;
    }
    case 'header':
      // Header name validation (basic)
      if (!rule.url_path.match(/^[a-zA-Z0-9\-_]+$/)) {
-        throw new Error(
+        throw new PentestError(
-          `rules.${ruleType}[${index}].url_path for type 'header' must be a valid header name (alphanumeric, hyphens, underscores only)`
+          `${field} for type 'header' must be a valid header name (alphanumeric, hyphens, underscores only)`,
          'config',
          false,
          { field, ruleType: rule.type },
          ErrorCode.CONFIG_VALIDATION_FAILED
        );
      }
      break;
    case 'parameter':
      // Parameter name validation (basic)
      if (!rule.url_path.match(/^[a-zA-Z0-9\-_]+$/)) {
-        throw new Error(
+        throw new PentestError(
-          `rules.${ruleType}[${index}].url_path for type 'parameter' must be a valid parameter name (alphanumeric, hyphens, underscores only)`
+          `${field} for type 'parameter' must be a valid parameter name (alphanumeric, hyphens, underscores only)`,
          'config',
          false,
          { field, ruleType: rule.type },
          ErrorCode.CONFIG_VALIDATION_FAILED
        );
      }
      break;
  }
 };
 // Check for duplicate rules
 const checkForDuplicates = (rules: Rule[], ruleType: string): void => {
  const seen = new Set<string>();
  rules.forEach((rule, index) => {
    const key = `${rule.type}:${rule.url_path}`;
    if (seen.has(key)) {
-      throw new Error(
+      throw new PentestError(
-        `Duplicate rule found in rules.${ruleType}[${index}]: ${rule.type} '${rule.url_path}'`
+        `Duplicate rule found in rules.${ruleType}[${index}]: ${rule.type} '${rule.url_path}'`,
        'config',
        false,
        { field: `rules.${ruleType}[${index}]`, ruleType: rule.type, urlPath: rule.url_path },
        ErrorCode.CONFIG_VALIDATION_FAILED
      );
    }
    seen.add(key);
  });
 };
 // Check for conflicting rules between avoid and focus
 const checkForConflicts = (avoidRules: Rule[] = [], focusRules: Rule[] = []): void => {
  const avoidSet = new Set(avoidRules.map((rule) => `${rule.type}:${rule.url_path}`));
  focusRules.forEach((rule, index) => {
    const key = `${rule.type}:${rule.url_path}`;
    if (avoidSet.has(key)) {
-      throw new Error(
+      throw new PentestError(
-        `Conflicting rule found: rules.focus[${index}] '${rule.url_path}' also exists in rules.avoid`
+        `Conflicting rule found: rules.focus[${index}] '${rule.url_path}' also exists in rules.avoid`,
        'config',
        false,
        { field: `rules.focus[${index}]`, urlPath: rule.url_path },
        ErrorCode.CONFIG_VALIDATION_FAILED
      );
    }
  });
 };
 // Sanitize and normalize rule values
 const sanitizeRule = (rule: Rule): Rule => {
  return {
    description: rule.description.trim(),
@@ -325,7 +532,6 @@ const sanitizeRule = (rule: Rule): Rule => {
  };
 };
 // Distribute configuration sections to different agents with sanitization
 export const distributeConfig = (config: Config | null): DistributedConfig => {
  const avoid = config?.rules?.avoid || [];
  const focus = config?.rules?.focus || [];
@@ -338,7 +544,6 @@ export const distributeConfig = (config: Config | null): DistributedConfig => {
  };
 };
 // Sanitize and normalize authentication values
 const sanitizeAuthentication = (auth: Authentication): Authentication => {
  return {
    login_type: auth.login_type.toLowerCase().trim() as Authentication['login_type'],
@@ -348,7 +553,7 @@ const sanitizeAuthentication = (auth: Authentication): Authentication => {
      password: auth.credentials.password,
      ...(auth.credentials.totp_secret && { totp_secret: auth.credentials.totp_secret.trim() }),
    },
-    login_flow: auth.login_flow.map((step) => step.trim()),
+    ...(auth.login_flow && { login_flow: auth.login_flow.map((step) => step.trim()) }),
    success_condition: {
      type: auth.success_condition.type.toLowerCase().trim() as Authentication['success_condition']['type'],
      value: auth.success_condition.value.trim(),
@@ -1,110 +0,0 @@
 // Copyright (C) 2025 Keygraph, Inc.
 //
 // This program is free software: you can redistribute it and/or modify
 // it under the terms of the GNU Affero General Public License version 3
 // as published by the Free Software Foundation.
 import { path, fs } from 'zx';
 import chalk from 'chalk';
 import { validateQueueAndDeliverable, type VulnType } from './queue-validation.js';
 import type { AgentName, PromptName, PlaywrightAgent, AgentValidator } from './types/agents.js';
 // Factory function for vulnerability queue validators
 function createVulnValidator(vulnType: VulnType): AgentValidator {
  return async (sourceDir: string): Promise<boolean> => {
    try {
      await validateQueueAndDeliverable(vulnType, sourceDir);
      return true;
    } catch (error) {
      const errMsg = error instanceof Error ? error.message : String(error);
      console.log(chalk.yellow(`   Queue validation failed for ${vulnType}: ${errMsg}`));
      return false;
    }
  };
 }
 // Factory function for exploit deliverable validators
 function createExploitValidator(vulnType: VulnType): AgentValidator {
  return async (sourceDir: string): Promise<boolean> => {
    const evidenceFile = path.join(sourceDir, 'deliverables', `${vulnType}_exploitation_evidence.md`);
    return await fs.pathExists(evidenceFile);
  };
 }
 // MCP agent mapping - assigns each agent to a specific Playwright instance to prevent conflicts
 export const MCP_AGENT_MAPPING: Record<PromptName, PlaywrightAgent> = Object.freeze({
  // Phase 1: Pre-reconnaissance (actual prompt name is 'pre-recon-code')
  // NOTE: Pre-recon is pure code analysis and doesn't use browser automation,
  // but assigning MCP server anyway for consistency and future extensibility
  'pre-recon-code': 'playwright-agent1',
  // Phase 2: Reconnaissance (actual prompt name is 'recon')
  recon: 'playwright-agent2',
  // Phase 3: Vulnerability Analysis (5 parallel agents)
  'vuln-injection': 'playwright-agent1',
  'vuln-xss': 'playwright-agent2',
  'vuln-auth': 'playwright-agent3',
  'vuln-ssrf': 'playwright-agent4',
  'vuln-authz': 'playwright-agent5',
  // Phase 4: Exploitation (5 parallel agents - same as vuln counterparts)
  'exploit-injection': 'playwright-agent1',
  'exploit-xss': 'playwright-agent2',
  'exploit-auth': 'playwright-agent3',
  'exploit-ssrf': 'playwright-agent4',
  'exploit-authz': 'playwright-agent5',
  // Phase 5: Reporting (actual prompt name is 'report-executive')
  // NOTE: Report generation is typically text-based and doesn't use browser automation,
  // but assigning MCP server anyway for potential screenshot inclusion or future needs
  'report-executive': 'playwright-agent3',
 });
 // Direct agent-to-validator mapping - much simpler than pattern matching
 export const AGENT_VALIDATORS: Record<AgentName, AgentValidator> = Object.freeze({
  // Pre-reconnaissance agent - validates the code analysis deliverable created by the agent
  'pre-recon': async (sourceDir: string): Promise<boolean> => {
    const codeAnalysisFile = path.join(sourceDir, 'deliverables', 'code_analysis_deliverable.md');
    return await fs.pathExists(codeAnalysisFile);
  },
  // Reconnaissance agent
  recon: async (sourceDir: string): Promise<boolean> => {
    const reconFile = path.join(sourceDir, 'deliverables', 'recon_deliverable.md');
    return await fs.pathExists(reconFile);
  },
  // Vulnerability analysis agents
  'injection-vuln': createVulnValidator('injection'),
  'xss-vuln': createVulnValidator('xss'),
  'auth-vuln': createVulnValidator('auth'),
  'ssrf-vuln': createVulnValidator('ssrf'),
  'authz-vuln': createVulnValidator('authz'),
  // Exploitation agents
  'injection-exploit': createExploitValidator('injection'),
  'xss-exploit': createExploitValidator('xss'),
  'auth-exploit': createExploitValidator('auth'),
  'ssrf-exploit': createExploitValidator('ssrf'),
  'authz-exploit': createExploitValidator('authz'),
  // Executive report agent
  report: async (sourceDir: string): Promise<boolean> => {
    const reportFile = path.join(
      sourceDir,
      'deliverables',
      'comprehensive_security_assessment_report.md'
    );
    const reportExists = await fs.pathExists(reportFile);
    if (!reportExists) {
      console.log(
        chalk.red(`    ❌ Missing required deliverable: comprehensive_security_assessment_report.md`)
      );
    }
    return reportExists;
  },
 });
@@ -1,381 +0,0 @@
 // Copyright (C) 2025 Keygraph, Inc.
 //
 // This program is free software: you can redistribute it and/or modify
 // it under the terms of the GNU Affero General Public License version 3
 // as published by the Free Software Foundation.
 import { $, fs, path } from 'zx';
 import chalk from 'chalk';
 import { Timer } from '../utils/metrics.js';
 import { formatDuration } from '../utils/formatting.js';
 import { handleToolError, PentestError } from '../error-handling.js';
 import { AGENTS } from '../session-manager.js';
 import { runClaudePromptWithRetry } from '../ai/claude-executor.js';
 import { loadPrompt } from '../prompts/prompt-manager.js';
 import type { ToolAvailability } from '../tool-checker.js';
 import type { DistributedConfig } from '../types/config.js';
 interface AgentResult {
  success: boolean;
  duration: number;
  cost?: number | undefined;
  error?: string | undefined;
  retryable?: boolean | undefined;
 }
 type ToolName = 'nmap' | 'subfinder' | 'whatweb' | 'schemathesis';
 type ToolStatus = 'success' | 'skipped' | 'error';
 interface TerminalScanResult {
  tool: ToolName;
  output: string;
  status: ToolStatus;
  duration: number;
  success?: boolean;
  error?: Error;
 }
 interface PromptVariables {
  webUrl: string;
  repoPath: string;
 }
 // Discriminated union for Wave1 tool results - clearer than loose union types
 type Wave1ToolResult =
  | { kind: 'scan'; result: TerminalScanResult }
  | { kind: 'skipped'; message: string }
  | { kind: 'agent'; result: AgentResult };
 interface Wave1Results {
  nmap: Wave1ToolResult;
  subfinder: Wave1ToolResult;
  whatweb: Wave1ToolResult;
  naabu?: Wave1ToolResult;
  codeAnalysis: AgentResult;
 }
 interface Wave2Results {
  schemathesis: TerminalScanResult;
 }
 interface PreReconResult {
  duration: number;
  report: string;
 }
 // Runs external security tools (nmap, whatweb, etc). Schemathesis requires schemas from code analysis.
 async function runTerminalScan(tool: ToolName, target: string, sourceDir: string | null = null): Promise<TerminalScanResult> {
  const timer = new Timer(`command-${tool}`);
  try {
    let result;
    switch (tool) {
      case 'nmap': {
        console.log(chalk.blue(`    🔍 Running ${tool} scan...`));
        const nmapHostname = new URL(target).hostname;
        result = await $({ silent: true, stdio: ['ignore', 'pipe', 'ignore'] })`nmap -sV -sC ${nmapHostname}`;
        const duration = timer.stop();
        console.log(chalk.green(`    ✅ ${tool} completed in ${formatDuration(duration)}`));
        return { tool: 'nmap', output: result.stdout, status: 'success', duration };
      }
      case 'subfinder': {
        console.log(chalk.blue(`    🔍 Running ${tool} scan...`));
        const hostname = new URL(target).hostname;
        result = await $({ silent: true, stdio: ['ignore', 'pipe', 'ignore'] })`subfinder -d ${hostname}`;
        const subfinderDuration = timer.stop();
        console.log(chalk.green(`    ✅ ${tool} completed in ${formatDuration(subfinderDuration)}`));
        return { tool: 'subfinder', output: result.stdout, status: 'success', duration: subfinderDuration };
      }
      case 'whatweb': {
        console.log(chalk.blue(`    🔍 Running ${tool} scan...`));
        const command = `whatweb --open-timeout 30 --read-timeout 60 ${target}`;
        console.log(chalk.gray(`    Command: ${command}`));
        result = await $({ silent: true, stdio: ['ignore', 'pipe', 'ignore'] })`whatweb --open-timeout 30 --read-timeout 60 ${target}`;
        const whatwebDuration = timer.stop();
        console.log(chalk.green(`    ✅ ${tool} completed in ${formatDuration(whatwebDuration)}`));
        return { tool: 'whatweb', output: result.stdout, status: 'success', duration: whatwebDuration };
      }
      case 'schemathesis': {
        // Schemathesis depends on code analysis output - skip if no schemas found
        const schemasDir = path.join(sourceDir || '.', 'outputs', 'schemas');
        if (await fs.pathExists(schemasDir)) {
          const schemaFiles = await fs.readdir(schemasDir) as string[];
          const apiSchemas = schemaFiles.filter((f: string) => f.endsWith('.json') || f.endsWith('.yml') || f.endsWith('.yaml'));
          if (apiSchemas.length > 0) {
            console.log(chalk.blue(`    🔍 Running ${tool} scan...`));
            const allResults: string[] = [];
            // Run schemathesis on each schema file
            for (const schemaFile of apiSchemas) {
              const schemaPath = path.join(schemasDir, schemaFile);
              try {
                result = await $({ silent: true, stdio: ['ignore', 'pipe', 'ignore'] })`schemathesis run ${schemaPath} -u ${target} --max-failures=5`;
                allResults.push(`Schema: ${schemaFile}\n${result.stdout}`);
              } catch (schemaError) {
                const err = schemaError as { stdout?: string; message?: string };
                allResults.push(`Schema: ${schemaFile}\nError: ${err.stdout || err.message}`);
              }
            }
            const schemaDuration = timer.stop();
            console.log(chalk.green(`    ✅ ${tool} completed in ${formatDuration(schemaDuration)}`));
            return { tool: 'schemathesis', output: allResults.join('\n\n'), status: 'success', duration: schemaDuration };
          } else {
            console.log(chalk.gray(`    ⏭️ ${tool} - no API schemas found`));
            return { tool: 'schemathesis', output: 'No API schemas found', status: 'skipped', duration: timer.stop() };
          }
        } else {
          console.log(chalk.gray(`    ⏭️ ${tool} - schemas directory not found`));
          return { tool: 'schemathesis', output: 'Schemas directory not found', status: 'skipped', duration: timer.stop() };
        }
      }
      default:
        throw new Error(`Unknown tool: ${tool}`);
    }
  } catch (error) {
    const duration = timer.stop();
    console.log(chalk.red(`    ❌ ${tool} failed in ${formatDuration(duration)}`));
    return handleToolError(tool, error as Error & { code?: string }) as TerminalScanResult;
  }
 }
 // Wave 1: Initial footprinting + authentication
 async function runPreReconWave1(
  webUrl: string,
  sourceDir: string,
  variables: PromptVariables,
  config: DistributedConfig | null,
  pipelineTestingMode: boolean = false,
  sessionId: string | null = null,
  outputPath: string | null = null
 ): Promise<Wave1Results> {
  console.log(chalk.blue('    → Launching Wave 1 operations in parallel...'));
  const operations: Promise<TerminalScanResult | AgentResult>[] = [];
  const skippedResult = (message: string): Wave1ToolResult => ({ kind: 'skipped', message });
  // Skip external commands in pipeline testing mode
  if (pipelineTestingMode) {
    console.log(chalk.gray('    ⏭️ Skipping external tools (pipeline testing mode)'));
    operations.push(
      runClaudePromptWithRetry(
        await loadPrompt('pre-recon-code', variables, null, pipelineTestingMode),
        sourceDir,
        '*',
        '',
        AGENTS['pre-recon'].displayName,
        'pre-recon',  // Agent name for snapshot creation
        chalk.cyan,
        { id: sessionId!, webUrl, repoPath: sourceDir, ...(outputPath && { outputPath }) }  // Session metadata for audit logging (STANDARD: use 'id' field)
      )
    );
    const [codeAnalysis] = await Promise.all(operations);
    return {
      nmap: skippedResult('Skipped (pipeline testing mode)'),
      subfinder: skippedResult('Skipped (pipeline testing mode)'),
      whatweb: skippedResult('Skipped (pipeline testing mode)'),
      codeAnalysis: codeAnalysis as AgentResult
    };
  } else {
    operations.push(
      runTerminalScan('nmap', webUrl),
      runTerminalScan('subfinder', webUrl),
      runTerminalScan('whatweb', webUrl),
      runClaudePromptWithRetry(
        await loadPrompt('pre-recon-code', variables, null, pipelineTestingMode),
        sourceDir,
        '*',
        '',
        AGENTS['pre-recon'].displayName,
        'pre-recon',  // Agent name for snapshot creation
        chalk.cyan,
        { id: sessionId!, webUrl, repoPath: sourceDir, ...(outputPath && { outputPath }) }  // Session metadata for audit logging (STANDARD: use 'id' field)
      )
    );
  }
  // Check if authentication config is provided for login instructions injection
  console.log(chalk.gray(`    → Config check: ${config ? 'present' : 'missing'}, Auth: ${config?.authentication ? 'present' : 'missing'}`));
  const [nmap, subfinder, whatweb, codeAnalysis] = await Promise.all(operations);
  return {
    nmap: { kind: 'scan', result: nmap as TerminalScanResult },
    subfinder: { kind: 'scan', result: subfinder as TerminalScanResult },
    whatweb: { kind: 'scan', result: whatweb as TerminalScanResult },
    codeAnalysis: codeAnalysis as AgentResult
  };
 }
 // Wave 2: Additional scanning
 async function runPreReconWave2(
  webUrl: string,
  sourceDir: string,
  toolAvailability: ToolAvailability,
  pipelineTestingMode: boolean = false
 ): Promise<Wave2Results> {
  console.log(chalk.blue('    → Running Wave 2 additional scans in parallel...'));
  // Skip external commands in pipeline testing mode
  if (pipelineTestingMode) {
    console.log(chalk.gray('    ⏭️ Skipping external tools (pipeline testing mode)'));
    return {
      schemathesis: { tool: 'schemathesis', output: 'Skipped (pipeline testing mode)', status: 'skipped', duration: 0 }
    };
  }
  const operations: Promise<TerminalScanResult>[] = [];
  // Parallel additional scans (only run if tools are available)
  if (toolAvailability.schemathesis) {
    operations.push(runTerminalScan('schemathesis', webUrl, sourceDir));
  }
  // If no tools are available, return early
  if (operations.length === 0) {
    console.log(chalk.gray('    ⏭️ No Wave 2 tools available'));
    return {
      schemathesis: { tool: 'schemathesis', output: 'Tool not available', status: 'skipped', duration: 0 }
    };
  }
  // Run all operations in parallel
  const results = await Promise.all(operations);
  // Map results back to named properties
  const response: Wave2Results = {
    schemathesis: { tool: 'schemathesis', output: 'Tool not available', status: 'skipped', duration: 0 }
  };
  let resultIndex = 0;
  if (toolAvailability.schemathesis) {
    response.schemathesis = results[resultIndex++]!;
  } else {
    console.log(chalk.gray('    ⏭️ schemathesis - tool not available'));
  }
  return response;
 }
 // Extracts status and output from a Wave1 tool result
 function extractResult(r: Wave1ToolResult | undefined): { status: string; output: string } {
  if (!r) return { status: 'Skipped', output: 'No output' };
  switch (r.kind) {
    case 'scan':
      return { status: r.result.status || 'Skipped', output: r.result.output || 'No output' };
    case 'skipped':
      return { status: 'Skipped', output: r.message };
    case 'agent':
      return { status: r.result.success ? 'success' : 'error', output: 'See agent output' };
  }
 }
 // Combines tool outputs into single deliverable. Falls back to reference if file missing.
 async function stitchPreReconOutputs(wave1: Wave1Results, additionalScans: TerminalScanResult[], sourceDir: string): Promise<string> {
  // Try to read the code analysis deliverable file
  let codeAnalysisContent = 'No analysis available';
  try {
    const codeAnalysisPath = path.join(sourceDir, 'deliverables', 'code_analysis_deliverable.md');
    codeAnalysisContent = await fs.readFile(codeAnalysisPath, 'utf8');
  } catch (error) {
    const err = error as Error;
    console.log(chalk.yellow(`⚠️ Could not read code analysis deliverable: ${err.message}`));
    codeAnalysisContent = 'Analysis located in deliverables/code_analysis_deliverable.md';
  }
  // Build additional scans section
  let additionalSection = '';
  if (additionalScans.length > 0) {
    additionalSection = '\n## Authenticated Scans\n';
    for (const scan of additionalScans) {
      additionalSection += `
 ### ${scan.tool.toUpperCase()}
 Status: ${scan.status}
 ${scan.output}
 `;
    }
  }
  const nmap = extractResult(wave1.nmap);
  const subfinder = extractResult(wave1.subfinder);
  const whatweb = extractResult(wave1.whatweb);
  const naabu = extractResult(wave1.naabu);
  const report = `
 # Pre-Reconnaissance Report
 ## Port Discovery (naabu)
 Status: ${naabu.status}
 ${naabu.output}
 ## Network Scanning (nmap)
 Status: ${nmap.status}
 ${nmap.output}
 ## Subdomain Discovery (subfinder)
 Status: ${subfinder.status}
 ${subfinder.output}
 ## Technology Detection (whatweb)
 Status: ${whatweb.status}
 ${whatweb.output}
 ## Code Analysis
 ${codeAnalysisContent}
 ${additionalSection}
 ---
 Report generated at: ${new Date().toISOString()}
  `.trim();
  // Ensure deliverables directory exists in the cloned repo
  try {
    const deliverablePath = path.join(sourceDir, 'deliverables', 'pre_recon_deliverable.md');
    await fs.ensureDir(path.join(sourceDir, 'deliverables'));
    // Write to file in the cloned repository
    await fs.writeFile(deliverablePath, report);
  } catch (error) {
    const err = error as Error;
    throw new PentestError(
      `Failed to write pre-recon report: ${err.message}`,
      'filesystem',
      false,
      { sourceDir, originalError: err.message }
    );
  }
  return report;
 }
 // Main pre-recon phase execution function
 export async function executePreReconPhase(
  webUrl: string,
  sourceDir: string,
  variables: PromptVariables,
  config: DistributedConfig | null,
  toolAvailability: ToolAvailability,
  pipelineTestingMode: boolean,
  sessionId: string | null = null,
  outputPath: string | null = null
 ): Promise<PreReconResult> {
  console.log(chalk.yellow.bold('\n🔍 PHASE 1: PRE-RECONNAISSANCE'));
  const timer = new Timer('phase-1-pre-recon');
  console.log(chalk.yellow('Wave 1: Initial footprinting...'));
  const wave1Results = await runPreReconWave1(webUrl, sourceDir, variables, config, pipelineTestingMode, sessionId, outputPath);
  console.log(chalk.green('  ✅ Wave 1 operations completed'));
  console.log(chalk.yellow('Wave 2: Additional scanning...'));
  const wave2Results = await runPreReconWave2(webUrl, sourceDir, toolAvailability, pipelineTestingMode);
  console.log(chalk.green('  ✅ Wave 2 operations completed'));
  console.log(chalk.blue('📝 Stitching pre-recon outputs...'));
  const additionalScans = wave2Results.schemathesis ? [wave2Results.schemathesis] : [];
  const preReconReport = await stitchPreReconOutputs(wave1Results, additionalScans, sourceDir);
  const duration = timer.stop();
  console.log(chalk.green(`✅ Pre-reconnaissance complete in ${formatDuration(duration)}`));
  console.log(chalk.green(`💾 Saved to ${sourceDir}/deliverables/pre_recon_deliverable.md`));
  return { duration, report: preReconReport };
 }
@@ -4,8 +4,6 @@
 // it under the terms of the GNU Affero General Public License version 3
 // as published by the Free Software Foundation.
 import chalk from 'chalk';
 export class ProgressIndicator {
  private message: string;
  private frames: string[] = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'];
@@ -25,9 +23,7 @@ export class ProgressIndicator {
    this.interval = setInterval(() => {
      // Clear the line and write the spinner
-      process.stdout.write(
+      process.stdout.write(`\r${this.frames[this.frameIndex]} ${this.message}`);
        `\r${chalk.cyan(this.frames[this.frameIndex])} ${chalk.dim(this.message)}`
      );
      this.frameIndex = (this.frameIndex + 1) % this.frames.length;
    }, 100);
  }
@@ -47,6 +43,6 @@ export class ProgressIndicator {
  finish(successMessage: string = 'Complete'): void {
    this.stop();
-    console.log(chalk.green(`✓ ${successMessage}`));
+    console.log(`✓ ${successMessage}`);
  }
 }
@@ -0,0 +1,291 @@
 // Copyright (C) 2025 Keygraph, Inc.
 //
 // This program is free software: you can redistribute it and/or modify
 // it under the terms of the GNU Affero General Public License version 3
 // as published by the Free Software Foundation.
 /**
 * Agent Execution Service
 *
 * Handles the full agent lifecycle:
 * - Load config via ConfigLoaderService
 * - Load prompt template using AGENTS[agentName].promptTemplate
 * - Create git checkpoint
 * - Start audit logging
 * - Invoke Claude SDK via runClaudePrompt
 * - Spending cap check using isSpendingCapBehavior
 * - Handle failure (rollback, audit)
 * - Validate output using AGENTS[agentName].deliverableFilename
 * - Commit on success, log metrics
 *
 * No Temporal dependencies - pure domain logic.
 */
 import type { ActivityLogger } from '../types/activity-logger.js';
 import { Result, ok, err, isErr } from '../types/result.js';
 import { ErrorCode, type PentestErrorType } from '../types/errors.js';
 import { PentestError } from './error-handling.js';
 import { isSpendingCapBehavior } from '../utils/billing-detection.js';
 import { AGENTS } from '../session-manager.js';
 import { loadPrompt } from './prompt-manager.js';
 import {
  runClaudePrompt,
  validateAgentOutput,
  type ClaudePromptResult,
 } from '../ai/claude-executor.js';
 import {
  createGitCheckpoint,
  commitGitSuccess,
  rollbackGitWorkspace,
  getGitCommitHash,
 } from './git-manager.js';
 import { AuditSession } from '../audit/index.js';
 import type { AgentEndResult } from '../types/audit.js';
 import type { AgentName } from '../types/agents.js';
 import type { ConfigLoaderService } from './config-loader.js';
 import type { AgentMetrics } from '../types/metrics.js';
 /**
 * Input for agent execution.
 */
 export interface AgentExecutionInput {
  webUrl: string;
  repoPath: string;
  configPath?: string | undefined;
  pipelineTestingMode?: boolean | undefined;
  attemptNumber: number;
 }
 interface FailAgentOpts {
  attemptNumber: number;
  result: ClaudePromptResult;
  rollbackReason: string;
  errorMessage: string;
  errorCode: ErrorCode;
  category: PentestErrorType;
  retryable: boolean;
  context: Record<string, unknown>;
 }
 /**
 * Service for executing agents with full lifecycle management.
 *
 * NOTE: AuditSession is passed per-execution, NOT stored on the service.
 * This is critical for parallel agent execution - each agent needs its own
 * AuditSession instance because AuditSession uses instance state (currentAgentName)
 * to track which agent is currently logging.
 */
 export class AgentExecutionService {
  private readonly configLoader: ConfigLoaderService;
  constructor(configLoader: ConfigLoaderService) {
    this.configLoader = configLoader;
  }
  /**
   * Execute an agent with full lifecycle management.
   *
   * @param agentName - Name of the agent to execute
   * @param input - Execution input parameters
   * @param auditSession - Audit session for this specific agent execution
   * @returns Result containing AgentEndResult on success, PentestError on failure
   */
  async execute(
    agentName: AgentName,
    input: AgentExecutionInput,
    auditSession: AuditSession,
    logger: ActivityLogger
  ): Promise<Result<AgentEndResult, PentestError>> {
    const { webUrl, repoPath, configPath, pipelineTestingMode = false, attemptNumber } = input;
    // 1. Load config (if provided)
    const configResult = await this.configLoader.loadOptional(configPath);
    if (isErr(configResult)) {
      return configResult;
    }
    const distributedConfig = configResult.value;
    // 2. Load prompt
    const promptTemplate = AGENTS[agentName].promptTemplate;
    let prompt: string;
    try {
      prompt = await loadPrompt(
        promptTemplate,
        { webUrl, repoPath },
        distributedConfig,
        pipelineTestingMode,
        logger
      );
    } catch (error) {
      const errorMessage = error instanceof Error ? error.message : String(error);
      return err(
        new PentestError(
          `Failed to load prompt for ${agentName}: ${errorMessage}`,
          'prompt',
          false,
          { agentName, promptTemplate, originalError: errorMessage },
          ErrorCode.PROMPT_LOAD_FAILED
        )
      );
    }
    // 3. Create git checkpoint before execution
    try {
      await createGitCheckpoint(repoPath, agentName, attemptNumber, logger);
    } catch (error) {
      const errorMessage = error instanceof Error ? error.message : String(error);
      return err(
        new PentestError(
          `Failed to create git checkpoint for ${agentName}: ${errorMessage}`,
          'filesystem',
          false,
          { agentName, repoPath, originalError: errorMessage },
          ErrorCode.GIT_CHECKPOINT_FAILED
        )
      );
    }
    // 4. Start audit logging
    await auditSession.startAgent(agentName, prompt, attemptNumber);
    // 5. Execute agent
    const result: ClaudePromptResult = await runClaudePrompt(
      prompt,
      repoPath,
      '', // context
      agentName, // description
      agentName,
      auditSession,
      logger
    );
    // 6. Spending cap check - defense-in-depth
    if (result.success && (result.turns ?? 0) <= 2 && (result.cost || 0) === 0) {
      const resultText = result.result || '';
      if (isSpendingCapBehavior(result.turns ?? 0, result.cost || 0, resultText)) {
        return this.failAgent(agentName, repoPath, auditSession, logger, {
          attemptNumber, result,
          rollbackReason: 'spending cap detected',
          errorMessage: `Spending cap likely reached: ${resultText.slice(0, 100)}`,
          errorCode: ErrorCode.SPENDING_CAP_REACHED,
          category: 'billing',
          retryable: true,
          context: { agentName, turns: result.turns, cost: result.cost },
        });
      }
    }
    // 7. Handle execution failure
    if (!result.success) {
      return this.failAgent(agentName, repoPath, auditSession, logger, {
        attemptNumber, result,
        rollbackReason: 'execution failure',
        errorMessage: result.error || 'Agent execution failed',
        errorCode: ErrorCode.AGENT_EXECUTION_FAILED,
        category: 'validation',
        retryable: result.retryable ?? true,
        context: { agentName, originalError: result.error },
      });
    }
    // 8. Validate output
    const validationPassed = await validateAgentOutput(result, agentName, repoPath, logger);
    if (!validationPassed) {
      return this.failAgent(agentName, repoPath, auditSession, logger, {
        attemptNumber, result,
        rollbackReason: 'validation failure',
        errorMessage: `Agent ${agentName} failed output validation`,
        errorCode: ErrorCode.OUTPUT_VALIDATION_FAILED,
        category: 'validation',
        retryable: true,
        context: { agentName, deliverableFilename: AGENTS[agentName].deliverableFilename },
      });
    }
    // 9. Success - commit deliverables, then capture checkpoint hash
    await commitGitSuccess(repoPath, agentName, logger);
    const commitHash = await getGitCommitHash(repoPath);
    const endResult: AgentEndResult = {
      attemptNumber,
      duration_ms: result.duration,
      cost_usd: result.cost || 0,
      success: true,
      model: result.model,
      ...(commitHash && { checkpoint: commitHash }),
    };
    await auditSession.endAgent(agentName, endResult);
    return ok(endResult);
  }
  private async failAgent(
    agentName: AgentName,
    repoPath: string,
    auditSession: AuditSession,
    logger: ActivityLogger,
    opts: FailAgentOpts
  ): Promise<Result<AgentEndResult, PentestError>> {
    await rollbackGitWorkspace(repoPath, opts.rollbackReason, logger);
    const endResult: AgentEndResult = {
      attemptNumber: opts.attemptNumber,
      duration_ms: opts.result.duration,
      cost_usd: opts.result.cost || 0,
      success: false,
      model: opts.result.model,
      error: opts.errorMessage,
    };
    await auditSession.endAgent(agentName, endResult);
    return err(
      new PentestError(
        opts.errorMessage,
        opts.category,
        opts.retryable,
        opts.context,
        opts.errorCode
      )
    );
  }
  /**
   * Execute an agent, throwing PentestError on failure.
   *
   * This is the preferred method for Temporal activities, which need to
   * catch errors and classify them into ApplicationFailure. Avoids requiring
   * activities to import Result utilities, keeping the boundary clean.
   *
   * @param agentName - Name of the agent to execute
   * @param input - Execution input parameters
   * @param auditSession - Audit session for this specific agent execution
   * @returns AgentEndResult on success
   * @throws PentestError on failure
   */
  async executeOrThrow(
    agentName: AgentName,
    input: AgentExecutionInput,
    auditSession: AuditSession,
    logger: ActivityLogger
  ): Promise<AgentEndResult> {
    const result = await this.execute(agentName, input, auditSession, logger);
    if (isErr(result)) {
      throw result.error;
    }
    return result.value;
  }
  /**
   * Convert AgentEndResult to AgentMetrics for workflow state.
   */
  static toMetrics(endResult: AgentEndResult, result: ClaudePromptResult): AgentMetrics {
    return {
      durationMs: endResult.duration_ms,
      inputTokens: null, // Not currently exposed by SDK wrapper
      outputTokens: null,
      costUsd: endResult.cost_usd,
      numTurns: result.turns ?? null,
      model: result.model,
    };
  }
 }
@@ -0,0 +1,75 @@
 // Copyright (C) 2025 Keygraph, Inc.
 //
 // This program is free software: you can redistribute it and/or modify
 // it under the terms of the GNU Affero General Public License version 3
 // as published by the Free Software Foundation.
 /**
 * Config Loader Service
 *
 * Wraps parseConfig + distributeConfig with Result type for explicit error handling.
 * Pure service with no Temporal dependencies.
 */
 import { parseConfig, distributeConfig } from '../config-parser.js';
 import { PentestError } from './error-handling.js';
 import { Result, ok, err } from '../types/result.js';
 import { ErrorCode } from '../types/errors.js';
 import type { DistributedConfig } from '../types/config.js';
 /**
 * Service for loading and distributing configuration files.
 *
 * Provides a Result-based API for explicit error handling,
 * allowing callers to decide how to handle failures.
 */
 export class ConfigLoaderService {
  /**
   * Load and distribute a configuration file.
   *
   * @param configPath - Path to the YAML configuration file
   * @returns Result containing DistributedConfig on success, PentestError on failure
   */
  async load(configPath: string): Promise<Result<DistributedConfig, PentestError>> {
    try {
      const config = await parseConfig(configPath);
      const distributed = distributeConfig(config);
      return ok(distributed);
    } catch (error) {
      const errorMessage = error instanceof Error ? error.message : String(error);
      // Determine appropriate error code based on error message
      let errorCode = ErrorCode.CONFIG_PARSE_ERROR;
      if (errorMessage.includes('not found') || errorMessage.includes('ENOENT')) {
        errorCode = ErrorCode.CONFIG_NOT_FOUND;
      } else if (errorMessage.includes('validation failed')) {
        errorCode = ErrorCode.CONFIG_VALIDATION_FAILED;
      }
      return err(
        new PentestError(
          `Failed to load config ${configPath}: ${errorMessage}`,
          'config',
          false,
          { configPath, originalError: errorMessage },
          errorCode
        )
      );
    }
  }
  /**
   * Load config if path is provided, otherwise return null config.
   *
   * @param configPath - Optional path to the YAML configuration file
   * @returns Result containing DistributedConfig (or null) on success, PentestError on failure
   */
  async loadOptional(
    configPath: string | undefined
  ): Promise<Result<DistributedConfig | null, PentestError>> {
    if (!configPath) {
      return ok(null);
    }
    return this.load(configPath);
  }
 }
@@ -0,0 +1,117 @@
 // Copyright (C) 2025 Keygraph, Inc.
 //
 // This program is free software: you can redistribute it and/or modify
 // it under the terms of the GNU Affero General Public License version 3
 // as published by the Free Software Foundation.
 /**
 * Dependency Injection Container
 *
 * Provides a per-workflow container for service instances.
 * Services are wired with explicit constructor injection.
 *
 * Usage:
 *   const container = getOrCreateContainer(workflowId, sessionMetadata);
 *   const auditSession = new AuditSession(sessionMetadata);  // Per-agent
 *   await auditSession.initialize(workflowId);
 *   const result = await container.agentExecution.executeOrThrow(agentName, input, auditSession);
 */
 import type { SessionMetadata } from '../audit/utils.js';
 import { AgentExecutionService } from './agent-execution.js';
 import { ConfigLoaderService } from './config-loader.js';
 import { ExploitationCheckerService } from './exploitation-checker.js';
 /**
 * Dependencies required to create a Container.
 *
 * NOTE: AuditSession is NOT stored in the container.
 * Each agent execution receives its own AuditSession instance
 * because AuditSession uses instance state (currentAgentName) that
 * cannot be shared across parallel agents.
 */
 export interface ContainerDependencies {
  readonly sessionMetadata: SessionMetadata;
 }
 /**
 * DI Container for a single workflow.
 *
 * Holds all service instances for the workflow lifecycle.
 * Services are instantiated once and reused across agent executions.
 *
 * NOTE: AuditSession is NOT stored here - it's passed per agent execution
 * to support parallel agents each having their own logging context.
 */
 export class Container {
  readonly sessionMetadata: SessionMetadata;
  readonly agentExecution: AgentExecutionService;
  readonly configLoader: ConfigLoaderService;
  readonly exploitationChecker: ExploitationCheckerService;
  constructor(deps: ContainerDependencies) {
    this.sessionMetadata = deps.sessionMetadata;
    // Wire services with explicit constructor injection
    this.configLoader = new ConfigLoaderService();
    this.exploitationChecker = new ExploitationCheckerService();
    this.agentExecution = new AgentExecutionService(this.configLoader);
  }
 }
 /**
 * Map of workflowId to Container instance.
 * Each workflow gets its own container scoped to its lifecycle.
 */
 const containers = new Map<string, Container>();
 /**
 * Get or create a Container for a workflow.
 *
 * If a container already exists for the workflowId, returns it.
 * Otherwise, creates a new container with the provided dependencies.
 *
 * @param workflowId - Unique workflow identifier
 * @param sessionMetadata - Session metadata for audit paths
 * @returns Container instance for the workflow
 */
 export function getOrCreateContainer(
  workflowId: string,
  sessionMetadata: SessionMetadata
 ): Container {
  let container = containers.get(workflowId);
  if (!container) {
    container = new Container({ sessionMetadata });
    containers.set(workflowId, container);
  }
  return container;
 }
 /**
 * Remove a Container when a workflow completes.
 *
 * Should be called in logWorkflowComplete to clean up resources.
 *
 * @param workflowId - Unique workflow identifier
 */
 export function removeContainer(workflowId: string): void {
  containers.delete(workflowId);
 }
 /**
 * Get an existing Container for a workflow, if one exists.
 *
 * Unlike getOrCreateContainer, this does NOT create a new container.
 * Returns undefined if no container exists for the workflowId.
 *
 * Useful for lightweight activities that can benefit from an existing
 * container but don't need to create one.
 *
 * @param workflowId - Unique workflow identifier
 * @returns Container instance or undefined
 */
 export function getContainer(workflowId: string): Container | undefined {
  return containers.get(workflowId);
 }
@@ -4,116 +4,44 @@
 // it under the terms of the GNU Affero General Public License version 3
 // as published by the Free Software Foundation.
-import chalk from 'chalk';
+import {
-import { fs, path } from 'zx';
+  ErrorCode,
-import type {
+  type PentestErrorType,
-  PentestErrorType,
+  type PentestErrorContext,
-  PentestErrorContext,
+  type PromptErrorResult,
-  LogEntry,
+} from '../types/errors.js';
-  ToolErrorResult,
+import {
-  PromptErrorResult,
+  matchesBillingApiPattern,
-} from './types/errors.js';
+  matchesBillingTextPattern,
 } from '../utils/billing-detection.js';
 // Temporal error classification for ApplicationFailure wrapping
 export interface TemporalErrorClassification {
  type: string;
  retryable: boolean;
 }
 // Custom error class for pentest operations
 export class PentestError extends Error {
-  name = 'PentestError' as const;
+  override name = 'PentestError' as const;
  type: PentestErrorType;
  retryable: boolean;
  context: PentestErrorContext;
  timestamp: string;
  /** Optional specific error code for reliable classification */
  code?: ErrorCode;
  constructor(
    message: string,
    type: PentestErrorType,
    retryable: boolean = false,
-    context: PentestErrorContext = {}
+    context: PentestErrorContext = {},
    code?: ErrorCode
  ) {
    super(message);
    this.type = type;
    this.retryable = retryable;
    this.context = context;
    this.timestamp = new Date().toISOString();
-  }
+    if (code !== undefined) {
-}
+      this.code = code;
 // Centralized error logging function
 export async function logError(
  error: Error & { type?: PentestErrorType; retryable?: boolean; context?: PentestErrorContext },
  contextMsg: string,
  sourceDir: string | null = null
 ): Promise<LogEntry> {
  const timestamp = new Date().toISOString();
  const logEntry: LogEntry = {
    timestamp,
    context: contextMsg,
    error: {
      name: error.name || error.constructor.name,
      message: error.message,
      type: error.type || 'unknown',
      retryable: error.retryable || false,
    },
  };
  // Only add stack if it exists
  if (error.stack) {
    logEntry.error.stack = error.stack;
  }
  // Console logging with color
  const prefix = error.retryable ? '⚠️' : '❌';
  const color = error.retryable ? chalk.yellow : chalk.red;
  console.log(color(`${prefix} ${contextMsg}:`));
  console.log(color(`   ${error.message}`));
  if (error.context && Object.keys(error.context).length > 0) {
    console.log(chalk.gray(`   Context: ${JSON.stringify(error.context)}`));
  }
  // File logging (if source directory available)
  if (sourceDir) {
    try {
      const logPath = path.join(sourceDir, 'error.log');
      await fs.appendFile(logPath, JSON.stringify(logEntry) + '\n');
    } catch (logErr) {
      const errMsg = logErr instanceof Error ? logErr.message : String(logErr);
      console.log(chalk.gray(`   (Failed to write error log: ${errMsg})`));
    }
  }
  return logEntry;
 }
 // Handle tool execution errors
 export function handleToolError(
  toolName: string,
  error: Error & { code?: string }
 ): ToolErrorResult {
  const isRetryable =
    error.code === 'ECONNRESET' ||
    error.code === 'ETIMEDOUT' ||
    error.code === 'ENOTFOUND';
  return {
    tool: toolName,
    output: `Error: ${error.message}`,
    status: 'error',
    duration: 0,
    success: false,
    error: new PentestError(
      `${toolName} execution failed: ${error.message}`,
      'tool',
      isRetryable,
      { toolName, originalError: error.message, errorCode: error.code }
    ),
  };
 }
 // Handle prompt loading errors
 export function handlePromptError(
  promptName: string,
  error: Error
@@ -129,7 +57,6 @@ export function handlePromptError(
  };
 }
 // Patterns that indicate retryable errors
 const RETRYABLE_PATTERNS = [
  // Network and connection errors
  'network',
@@ -173,28 +100,58 @@ const NON_RETRYABLE_PATTERNS = [
 export function isRetryableError(error: Error): boolean {
  const message = error.message.toLowerCase();
  // Check for explicit non-retryable patterns first
  if (NON_RETRYABLE_PATTERNS.some((pattern) => message.includes(pattern))) {
    return false;
  }
  // Check for retryable patterns
  return RETRYABLE_PATTERNS.some((pattern) => message.includes(pattern));
 }
-// Rate limit errors get longer base delay (30s) vs standard exponential backoff (2s)
+/**
-export function getRetryDelay(error: Error, attempt: number): number {
+ * Classifies errors by ErrorCode for reliable, code-based classification.
-  const message = error.message.toLowerCase();
+ * Used when error is a PentestError with a specific ErrorCode.
 */
 function classifyByErrorCode(
  code: ErrorCode,
  retryableFromError: boolean
 ): { type: string; retryable: boolean } {
  switch (code) {
    // Billing errors - retryable (wait for cap reset or credits added)
    case ErrorCode.SPENDING_CAP_REACHED:
    case ErrorCode.INSUFFICIENT_CREDITS:
      return { type: 'BillingError', retryable: true };
-  // Rate limiting gets longer delays
+    case ErrorCode.API_RATE_LIMITED:
-  if (message.includes('rate limit') || message.includes('429')) {
+      return { type: 'RateLimitError', retryable: true };
-    return Math.min(30000 + attempt * 10000, 120000); // 30s, 40s, 50s, max 2min
+
    // Config errors - non-retryable (need manual fix)
    case ErrorCode.CONFIG_NOT_FOUND:
    case ErrorCode.CONFIG_VALIDATION_FAILED:
    case ErrorCode.CONFIG_PARSE_ERROR:
      return { type: 'ConfigurationError', retryable: false };
    // Prompt errors - non-retryable (need manual fix)
    case ErrorCode.PROMPT_LOAD_FAILED:
      return { type: 'ConfigurationError', retryable: false };
    // Git errors - non-retryable (indicates workspace corruption)
    case ErrorCode.GIT_CHECKPOINT_FAILED:
    case ErrorCode.GIT_ROLLBACK_FAILED:
      return { type: 'GitError', retryable: false };
    // Validation errors - retryable (agent may succeed on retry)
    case ErrorCode.OUTPUT_VALIDATION_FAILED:
    case ErrorCode.DELIVERABLE_NOT_FOUND:
      return { type: 'OutputValidationError', retryable: true };
    // Agent execution - use the retryable flag from the error
    case ErrorCode.AGENT_EXECUTION_FAILED:
      return { type: 'AgentExecutionError', retryable: retryableFromError };
    default:
      // Unknown code - fall through to string matching
      return { type: 'UnknownError', retryable: retryableFromError };
  }
  // Exponential backoff with jitter for other retryable errors
  const baseDelay = Math.pow(2, attempt) * 1000; // 2s, 4s, 8s
  const jitter = Math.random() * 1000; // 0-1s random
  return Math.min(baseDelay + jitter, 30000); // Max 30s
 }
 /**
@@ -204,31 +161,25 @@ export function getRetryDelay(error: Error, attempt: number): number {
 * Used by activities to wrap errors in ApplicationFailure:
 * - Retryable errors: Temporal retries with configured backoff
 * - Non-retryable errors: Temporal fails immediately
 *
 * Classification priority:
 * 1. If error is PentestError with ErrorCode, classify by code (reliable)
 * 2. Fall through to string matching for external errors (SDK, network, etc.)
 */
-export function classifyErrorForTemporal(error: unknown): TemporalErrorClassification {
+export function classifyErrorForTemporal(error: unknown): { type: string; retryable: boolean } {
  // === CODE-BASED CLASSIFICATION (Preferred for internal errors) ===
  if (error instanceof PentestError && error.code !== undefined) {
    return classifyByErrorCode(error.code, error.retryable);
  }
  // === STRING-BASED CLASSIFICATION (Fallback for external errors) ===
  const message = (error instanceof Error ? error.message : String(error)).toLowerCase();
  // === BILLING ERRORS (Retryable with long backoff) ===
  // Anthropic returns billing as 400 invalid_request_error
  // Human can add credits OR wait for spending cap to reset (5-30 min backoff)
-  if (
+  // Check both API patterns and text patterns for comprehensive detection
-    message.includes('billing_error') ||
+  if (matchesBillingApiPattern(message) || matchesBillingTextPattern(message)) {
    message.includes('credit balance is too low') ||
    message.includes('insufficient credits') ||
    message.includes('usage is blocked due to insufficient credits') ||
    message.includes('please visit plans & billing') ||
    message.includes('please visit plans and billing') ||
    message.includes('usage limit reached') ||
    message.includes('quota exceeded') ||
    message.includes('daily rate limit') ||
    message.includes('limit will reset') ||
    // Claude Code spending cap patterns (returns short message instead of error)
    message.includes('spending cap') ||
    message.includes('spending limit') ||
    message.includes('cap reached') ||
    message.includes('budget exceeded') ||
    message.includes('billing limit reached')
  ) {
    return { type: 'BillingError', retryable: true };
  }
@@ -0,0 +1,71 @@
 // Copyright (C) 2025 Keygraph, Inc.
 //
 // This program is free software: you can redistribute it and/or modify
 // it under the terms of the GNU Affero General Public License version 3
 // as published by the Free Software Foundation.
 /**
 * Exploitation Checker Service
 *
 * Pure domain logic for determining whether exploitation should run.
 * Reads queue file, parses JSON, returns decision.
 *
 * No Temporal dependencies - this is pure business logic.
 */
 import {
  validateQueueSafe,
  type VulnType,
  type ExploitationDecision,
 } from './queue-validation.js';
 import { isOk } from '../types/result.js';
 import type { ActivityLogger } from '../types/activity-logger.js';
 /**
 * Service for checking exploitation queue decisions.
 *
 * Determines whether an exploit agent should run based on
 * the vulnerability analysis deliverables and queue files.
 */
 export class ExploitationCheckerService {
  /**
   * Check if exploitation should run for a given vulnerability type.
   *
   * Reads the vulnerability queue file and returns the decision.
   * This is pure domain logic - reads queue file, parses JSON, returns decision.
   *
   * @param vulnType - Type of vulnerability (injection, xss, auth, ssrf, authz)
   * @param repoPath - Path to the repository containing deliverables
   * @param logger - ActivityLogger for structured logging
   * @returns ExploitationDecision indicating whether to exploit
   * @throws PentestError if validation fails and is retryable
   */
  async checkQueue(vulnType: VulnType, repoPath: string, logger: ActivityLogger): Promise<ExploitationDecision> {
    const result = await validateQueueSafe(vulnType, repoPath);
    if (isOk(result)) {
      const decision = result.value;
      logger.info(
        `${vulnType}: ${decision.shouldExploit ? `${decision.vulnerabilityCount} vulnerabilities found` : 'no vulnerabilities, skipping exploitation'}`
      );
      return decision;
    }
    // Validation failed - check if we should retry or skip
    const error = result.error;
    if (error.retryable) {
      // Re-throw retryable errors so caller can handle retry
      logger.warn(`${vulnType}: ${error.message} (retryable)`);
      throw error;
    }
    // Non-retryable error - skip exploitation gracefully
    logger.warn(`${vulnType}: ${error.message}, skipping exploitation`);
    return {
      shouldExploit: false,
      shouldRetry: false,
      vulnerabilityCount: 0,
      vulnType,
    };
  }
 }
@@ -5,7 +5,9 @@
 // as published by the Free Software Foundation.
 import { $ } from 'zx';
-import chalk from 'chalk';
+import { PentestError } from './error-handling.js';
 import { ErrorCode } from '../types/errors.js';
 import type { ActivityLogger } from '../types/activity-logger.js';
 /**
 * Check if a directory is a git repository.
@@ -51,17 +53,19 @@ function logChangeSummary(
  changes: string[],
  messageWithChanges: string,
  messageWithoutChanges: string,
-  color: typeof chalk.green,
+  logger: ActivityLogger,
  level: 'info' | 'warn' = 'info',
  maxToShow: number = 5
 ): void {
  if (changes.length > 0) {
-    console.log(color(messageWithChanges.replace('{count}', String(changes.length))));
+    const msg = messageWithChanges.replace('{count}', String(changes.length));
-    changes.slice(0, maxToShow).forEach((change) => console.log(chalk.gray(`       ${change}`)));
+    const fileList = changes.slice(0, maxToShow).map((c) => `  ${c}`).join(', ');
-    if (changes.length > maxToShow) {
+    const suffix = changes.length > maxToShow
-      console.log(chalk.gray(`       ... and ${changes.length - maxToShow} more files`));
+      ? ` ... and ${changes.length - maxToShow} more files`
-    }
+      : '';
    logger[level](`${msg} ${fileList}${suffix}`);
  } else {
-    console.log(color(messageWithoutChanges));
+    logger[level](messageWithoutChanges);
  }
 }
@@ -136,10 +140,10 @@ export async function executeGitCommandWithRetry(
        if (isGitLockError(errMsg) && attempt < maxRetries) {
          const delay = Math.pow(2, attempt - 1) * 1000;
-          console.log(
+          // executeGitCommandWithRetry is also called outside activity context
-            chalk.yellow(
+          // (e.g., from resume logic), so we use console.warn as a fallback here
-              `    ⚠️ Git lock conflict during ${description} (attempt ${attempt}/${maxRetries}). Retrying in ${delay}ms...`
+          console.warn(
-            )
+            `Git lock conflict during ${description} (attempt ${attempt}/${maxRetries}). Retrying in ${delay}ms...`
          );
          await new Promise((resolve) => setTimeout(resolve, delay));
          continue;
@@ -148,7 +152,13 @@ export async function executeGitCommandWithRetry(
        throw error;
      }
    }
-    throw new Error(`Git command failed after ${maxRetries} retries`);
+    throw new PentestError(
      `Git command failed after ${maxRetries} retries`,
      'filesystem',
      true, // Retryable - transient git lock issues
      { maxRetries, description },
      ErrorCode.GIT_CHECKPOINT_FAILED
    );
  } finally {
    gitSemaphore.release();
  }
@@ -157,15 +167,16 @@ export async function executeGitCommandWithRetry(
 // Two-phase reset: hard reset (tracked files) + clean (untracked files)
 export async function rollbackGitWorkspace(
  sourceDir: string,
-  reason: string = 'retry preparation'
+  reason: string = 'retry preparation',
  logger: ActivityLogger
 ): Promise<GitOperationResult> {
  // Skip git operations if not a git repository
  if (!(await isGitRepository(sourceDir))) {
-    console.log(chalk.gray(`    ⏭️  Skipping git rollback (not a git repository)`));
+    logger.info('Skipping git rollback (not a git repository)');
    return { success: true };
  }
-  console.log(chalk.yellow(`    🔄 Rolling back workspace for ${reason}`));
+  logger.info(`Rolling back workspace for ${reason}`);
  try {
    const changes = await getChangedFiles(sourceDir, 'status check for rollback');
@@ -182,16 +193,26 @@ export async function rollbackGitWorkspace(
    logChangeSummary(
      changes,
-      '    ✅ Rollback completed - removed {count} contaminated changes:',
+      'Rollback completed - removed {count} contaminated changes:',
-      '    ✅ Rollback completed - no changes to remove',
+      'Rollback completed - no changes to remove',
-      chalk.yellow,
+      logger,
      'info',
      3
    );
    return { success: true };
  } catch (error) {
-    const result = toErrorResult(error);
+    const errMsg = error instanceof Error ? error.message : String(error);
-    console.log(chalk.red(`    ❌ Rollback failed after retries: ${result.error?.message}`));
+    logger.error(`Rollback failed after retries: ${errMsg}`);
-    return result;
+    return {
      success: false,
      error: new PentestError(
        `Git rollback failed: ${errMsg}`,
        'filesystem',
        false, // Non-retryable - rollback is best-effort cleanup
        { sourceDir, reason },
        ErrorCode.GIT_ROLLBACK_FAILED
      ),
    };
  }
 }
@@ -199,29 +220,30 @@ export async function rollbackGitWorkspace(
 export async function createGitCheckpoint(
  sourceDir: string,
  description: string,
-  attempt: number
+  attempt: number,
  logger: ActivityLogger
 ): Promise<GitOperationResult> {
  // Skip git operations if not a git repository
  if (!(await isGitRepository(sourceDir))) {
-    console.log(chalk.gray(`    ⏭️  Skipping git checkpoint (not a git repository)`));
+    logger.info('Skipping git checkpoint (not a git repository)');
    return { success: true };
  }
-  console.log(chalk.blue(`    📍 Creating checkpoint for ${description} (attempt ${attempt})`));
+  logger.info(`Creating checkpoint for ${description} (attempt ${attempt})`);
  try {
-    // First attempt: preserve existing deliverables. Retries: clean workspace to prevent pollution
+    // 1. On retries, clean workspace to prevent pollution from previous attempt
    if (attempt > 1) {
-      const cleanResult = await rollbackGitWorkspace(sourceDir, `${description} (retry cleanup)`);
+      const cleanResult = await rollbackGitWorkspace(sourceDir, `${description} (retry cleanup)`, logger);
      if (!cleanResult.success) {
-        console.log(
+        logger.warn(`Workspace cleanup failed, continuing anyway: ${cleanResult.error?.message}`);
          chalk.yellow(`    ⚠️ Workspace cleanup failed, continuing anyway: ${cleanResult.error?.message}`)
        );
      }
    }
    // 2. Detect existing changes
    const changes = await getChangedFiles(sourceDir, 'status check');
    const hasChanges = changes.length > 0;
    // 3. Stage and commit checkpoint
    await executeGitCommandWithRetry(['git', 'add', '-A'], sourceDir, 'staging changes');
    await executeGitCommandWithRetry(
      ['git', 'commit', '-m', `📍 Checkpoint: ${description} (attempt ${attempt})`, '--allow-empty'],
@@ -229,30 +251,32 @@ export async function createGitCheckpoint(
      'creating commit'
    );
    // 4. Log result
    if (hasChanges) {
-      console.log(chalk.blue(`    ✅ Checkpoint created with uncommitted changes staged`));
+      logger.info('Checkpoint created with uncommitted changes staged');
    } else {
-      console.log(chalk.blue(`    ✅ Empty checkpoint created (no workspace changes)`));
+      logger.info('Empty checkpoint created (no workspace changes)');
    }
    return { success: true };
  } catch (error) {
    const result = toErrorResult(error);
-    console.log(chalk.yellow(`    ⚠️ Checkpoint creation failed after retries: ${result.error?.message}`));
+    logger.warn(`Checkpoint creation failed after retries: ${result.error?.message}`);
    return result;
  }
 }
 export async function commitGitSuccess(
  sourceDir: string,
-  description: string
+  description: string,
  logger: ActivityLogger
 ): Promise<GitOperationResult> {
  // Skip git operations if not a git repository
  if (!(await isGitRepository(sourceDir))) {
-    console.log(chalk.gray(`    ⏭️  Skipping git commit (not a git repository)`));
+    logger.info('Skipping git commit (not a git repository)');
    return { success: true };
  }
-  console.log(chalk.green(`    💾 Committing successful results for ${description}`));
+  logger.info(`Committing successful results for ${description}`);
  try {
    const changes = await getChangedFiles(sourceDir, 'status check for success commit');
@@ -269,15 +293,14 @@ export async function commitGitSuccess(
    logChangeSummary(
      changes,
-      '    ✅ Success commit created with {count} file changes:',
+      'Success commit created with {count} file changes:',
-      '    ✅ Empty success commit created (agent made no file changes)',
+      'Empty success commit created (agent made no file changes)',
-      chalk.green,
+      logger
      5
    );
    return { success: true };
  } catch (error) {
    const result = toErrorResult(error);
-    console.log(chalk.yellow(`    ⚠️ Success commit failed after retries: ${result.error?.message}`));
+    logger.warn(`Success commit failed after retries: ${result.error?.message}`);
    return result;
  }
 }
@@ -0,0 +1,23 @@
 // Copyright (C) 2025 Keygraph, Inc.
 //
 // This program is free software: you can redistribute it and/or modify
 // it under the terms of the GNU Affero General Public License version 3
 // as published by the Free Software Foundation.
 /**
 * Services Module
 *
 * Exports DI container and service classes for Shannon agent execution.
 * Services are pure domain logic with no Temporal dependencies.
 */
 export { Container, getOrCreateContainer, removeContainer } from './container.js';
 export type { ContainerDependencies } from './container.js';
 export { ConfigLoaderService } from './config-loader.js';
 export { ExploitationCheckerService } from './exploitation-checker.js';
 export { AgentExecutionService } from './agent-execution.js';
 export type { AgentExecutionInput } from './agent-execution.js';
 export { assembleFinalReport, injectModelIntoReport } from './reporting.js';
 export { loadPrompt } from './prompt-manager.js';
@@ -5,10 +5,10 @@
 // as published by the Free Software Foundation.
 import { fs, path } from 'zx';
-import chalk from 'chalk';
+import { PentestError, handlePromptError } from './error-handling.js';
-import { PentestError, handlePromptError } from '../error-handling.js';
+import { MCP_AGENT_MAPPING } from '../session-manager.js';
 import { MCP_AGENT_MAPPING } from '../constants.js';
 import type { Authentication, DistributedConfig } from '../types/config.js';
 import type { ActivityLogger } from '../types/activity-logger.js';
 interface PromptVariables {
  webUrl: string;
@@ -22,9 +22,9 @@ interface IncludeReplacement {
 }
 // Pure function: Build complete login instructions from config
-async function buildLoginInstructions(authentication: Authentication): Promise<string> {
+async function buildLoginInstructions(authentication: Authentication, logger: ActivityLogger): Promise<string> {
  try {
-    // Load the login instructions template
+    // 1. Load the login instructions template
    const loginInstructionsPath = path.join(import.meta.dirname, '..', '..', 'prompts', 'shared', 'login-instructions.txt');
    if (!await fs.pathExists(loginInstructionsPath)) {
@@ -38,37 +38,33 @@ async function buildLoginInstructions(authentication: Authentication): Promise<s
    const fullTemplate = await fs.readFile(loginInstructionsPath, 'utf8');
    // Helper function to extract sections based on markers
    const getSection = (content: string, sectionName: string): string => {
      const regex = new RegExp(`<!-- BEGIN:${sectionName} -->([\\s\\S]*?)<!-- END:${sectionName} -->`, 'g');
      const match = regex.exec(content);
      return match ? match[1]!.trim() : '';
    };
-    // Extract sections based on login type
+    // 2. Extract sections based on login type
    const loginType = authentication.login_type?.toUpperCase();
    let loginInstructions = '';
    // Build instructions with only relevant sections
    const commonSection = getSection(fullTemplate, 'COMMON');
    const authSection = loginType ? getSection(fullTemplate, loginType) : ''; // FORM or SSO
    const verificationSection = getSection(fullTemplate, 'VERIFICATION');
-    // Fallback to full template if markers are missing (backward compatibility)
+    // 3. Assemble instructions from sections (fallback to full template if markers missing)
    if (!commonSection && !authSection && !verificationSection) {
-      console.log(chalk.yellow('⚠️ Section markers not found, using full login instructions template'));
+      logger.warn('Section markers not found, using full login instructions template');
      loginInstructions = fullTemplate;
    } else {
      // Combine relevant sections
      loginInstructions = [commonSection, authSection, verificationSection]
-        .filter(section => section) // Remove empty sections
+        .filter(section => section)
        .join('\n\n');
    }
-    // Replace the user instructions placeholder with the login flow from config
+    // 4. Interpolate login flow and credential placeholders
    let userInstructions = (authentication.login_flow ?? []).join('\n');
    // Replace credential placeholders within the user instructions
    if (authentication.credentials) {
      if (authentication.credentials.username) {
        userInstructions = userInstructions.replace(/\$username/g, authentication.credentials.username);
@@ -83,7 +79,7 @@ async function buildLoginInstructions(authentication: Authentication): Promise<s
    loginInstructions = loginInstructions.replace(/{{user_instructions}}/g, userInstructions);
-    // Replace TOTP secret placeholder if present in template
+    // 5. Replace TOTP secret placeholder if present in template
    if (authentication.credentials?.totp_secret) {
      loginInstructions = loginInstructions.replace(/{{totp_secret}}/g, authentication.credentials.totp_secret);
    }
@@ -128,7 +124,8 @@ async function processIncludes(content: string, baseDir: string): Promise<string
 async function interpolateVariables(
  template: string,
  variables: PromptVariables,
-  config: DistributedConfig | null = null
+  config: DistributedConfig | null = null,
  logger: ActivityLogger
 ): Promise<string> {
  try {
    if (!template || typeof template !== 'string') {
@@ -174,7 +171,7 @@ async function interpolateVariables(
      // Extract and inject login instructions from config
      if (config.authentication?.login_flow) {
-        const loginInstructions = await buildLoginInstructions(config.authentication);
+        const loginInstructions = await buildLoginInstructions(config.authentication, logger);
        result = result.replace(/{{LOGIN_INSTRUCTIONS}}/g, loginInstructions);
      } else {
        result = result.replace(/{{LOGIN_INSTRUCTIONS}}/g, '');
@@ -189,7 +186,7 @@ async function interpolateVariables(
    // Validate that all placeholders have been replaced (excluding instructional text)
    const remainingPlaceholders = result.match(/\{\{[^}]+\}\}/g);
    if (remainingPlaceholders) {
-      console.log(chalk.yellow(`⚠️ Warning: Found unresolved placeholders in prompt: ${remainingPlaceholders.join(', ')}`));
+      logger.warn(`Found unresolved placeholders in prompt: ${remainingPlaceholders.join(', ')}`);
    }
    return result;
@@ -212,20 +209,19 @@ export async function loadPrompt(
  promptName: string,
  variables: PromptVariables,
  config: DistributedConfig | null = null,
-  pipelineTestingMode: boolean = false
+  pipelineTestingMode: boolean = false,
  logger: ActivityLogger
 ): Promise<string> {
  try {
-    // Use pipeline testing prompts if pipeline testing mode is enabled
+    // 1. Resolve prompt file path
    const baseDir = pipelineTestingMode ? 'prompts/pipeline-testing' : 'prompts';
    const promptsDir = path.join(import.meta.dirname, '..', '..', baseDir);
    const promptPath = path.join(promptsDir, `${promptName}.txt`);
    // Debug message for pipeline testing mode
    if (pipelineTestingMode) {
-      console.log(chalk.yellow(`⚡ Using pipeline testing prompt: ${promptPath}`));
+      logger.info(`Using pipeline testing prompt: ${promptPath}`);
    }
    // Check if file exists first
    if (!await fs.pathExists(promptPath)) {
      throw new PentestError(
        `Prompt file not found: ${promptPath}`,
@@ -235,26 +231,26 @@ export async function loadPrompt(
      );
    }
-    // Add MCP server assignment to variables
+    // 2. Assign MCP server based on agent name
    const enhancedVariables: PromptVariables = { ...variables };
    // Assign MCP server based on prompt name (agent name)
    const mcpServer = MCP_AGENT_MAPPING[promptName as keyof typeof MCP_AGENT_MAPPING];
    if (mcpServer) {
      enhancedVariables.MCP_SERVER = mcpServer;
-      console.log(chalk.gray(`    🎭 Assigned ${promptName} → ${enhancedVariables.MCP_SERVER}`));
+      logger.info(`Assigned ${promptName} -> ${enhancedVariables.MCP_SERVER}`);
    } else {
      // Fallback for unknown agents
      enhancedVariables.MCP_SERVER = 'playwright-agent1';
-      console.log(chalk.yellow(`    🎭 Unknown agent ${promptName}, using fallback → ${enhancedVariables.MCP_SERVER}`));
+      logger.warn(`Unknown agent ${promptName}, using fallback -> ${enhancedVariables.MCP_SERVER}`);
    }
    // 3. Read template file
    let template = await fs.readFile(promptPath, 'utf8');
-    // Pre-process the template to handle @include directives
+    // 4. Process @include directives
    template = await processIncludes(template, promptsDir);
-    return await interpolateVariables(template, enhancedVariables, config);
+    // 5. Interpolate variables and return final prompt
    return await interpolateVariables(template, enhancedVariables, config, logger);
  } catch (error) {
    if (error instanceof PentestError) {
      throw error;
@@ -6,9 +6,12 @@
 import { fs, path } from 'zx';
 import { PentestError } from './error-handling.js';
-import { asyncPipe } from './utils/functional.js';
+import { ErrorCode } from '../types/errors.js';
 import { type Result, ok, err } from '../types/result.js';
 import { asyncPipe } from '../utils/functional.js';
 import type { VulnType, ExploitationDecision } from '../types/agents.js';
-export type VulnType = 'injection' | 'xss' | 'auth' | 'ssrf' | 'authz';
+export type { VulnType, ExploitationDecision } from '../types/agents.js';
 interface VulnTypeConfigItem {
  deliverable: string;
@@ -60,18 +63,11 @@ interface QueueValidationResult {
  error: string | null;
 }
 export interface ExploitationDecision {
  shouldExploit: boolean;
  shouldRetry: boolean;
  vulnerabilityCount: number;
  vulnType: VulnType;
 }
-export interface SafeValidationResult {
+/**
-  success: boolean;
+ * Result type for safe validation - explicit error handling.
-  data?: ExploitationDecision;
+ */
-  error?: PentestError;
+export type SafeValidationResult = Result<ExploitationDecision, PentestError>;
 }
 // Vulnerability type configuration as immutable data
 const VULN_TYPE_CONFIG: VulnTypeConfig = Object.freeze({
@@ -196,7 +192,8 @@ const validateExistenceRules = (
          deliverablePath: pathsWithExistence.deliverable,
          queuePath: pathsWithExistence.queue,
          existence,
-        }
+        },
        ErrorCode.DELIVERABLE_NOT_FOUND
      ),
    };
  }
@@ -311,15 +308,18 @@ export async function validateQueueAndDeliverable(
  );
 }
-// Pure function to safely validate (returns result instead of throwing)
+/**
-export const safeValidateQueueAndDeliverable = async (
+ * Safely validate queue and deliverable files.
 * Returns Result<ExploitationDecision, PentestError> for explicit error handling.
 */
 export async function validateQueueSafe(
  vulnType: VulnType,
  sourceDir: string
-): Promise<SafeValidationResult> => {
+): Promise<SafeValidationResult> {
  try {
    const result = await validateQueueAndDeliverable(vulnType, sourceDir);
-    return { success: true, data: result };
+    return ok(result);
  } catch (error) {
-    return { success: false, error: error as PentestError };
+    return err(error as PentestError);
  }
-};
+}
@@ -5,8 +5,9 @@
 // as published by the Free Software Foundation.
 import { fs, path } from 'zx';
-import chalk from 'chalk';
+import { PentestError } from './error-handling.js';
-import { PentestError } from '../error-handling.js';
+import { ErrorCode } from '../types/errors.js';
 import type { ActivityLogger } from '../types/activity-logger.js';
 interface DeliverableFile {
  name: string;
@@ -15,7 +16,7 @@ interface DeliverableFile {
 }
 // Pure function: Assemble final report from specialist deliverables
-export async function assembleFinalReport(sourceDir: string): Promise<string> {
+export async function assembleFinalReport(sourceDir: string, logger: ActivityLogger): Promise<string> {
  const deliverableFiles: DeliverableFile[] = [
    { name: 'Injection', path: 'injection_exploitation_evidence.md', required: false },
    { name: 'XSS', path: 'xss_exploitation_evidence.md', required: false },
@@ -32,18 +33,24 @@ export async function assembleFinalReport(sourceDir: string): Promise<string> {
      if (await fs.pathExists(filePath)) {
        const content = await fs.readFile(filePath, 'utf8');
        sections.push(content);
-        console.log(chalk.green(`✅ Added ${file.name} findings`));
+        logger.info(`Added ${file.name} findings`);
      } else if (file.required) {
-        throw new Error(`Required file ${file.path} not found`);
+        throw new PentestError(
          `Required deliverable file not found: ${file.path}`,
          'filesystem',
          false,
          { deliverableFile: file.path, sourceDir },
          ErrorCode.DELIVERABLE_NOT_FOUND
        );
      } else {
-        console.log(chalk.gray(`⏭️  No ${file.name} deliverable found`));
+        logger.info(`No ${file.name} deliverable found`);
      }
    } catch (error) {
      if (file.required) {
        throw error;
      }
      const err = error as Error;
-      console.log(chalk.yellow(`⚠️ Could not read ${file.path}: ${err.message}`));
+      logger.warn(`Could not read ${file.path}: ${err.message}`);
    }
  }
@@ -55,7 +62,7 @@ export async function assembleFinalReport(sourceDir: string): Promise<string> {
    // Ensure deliverables directory exists
    await fs.ensureDir(deliverablesDir);
    await fs.writeFile(finalReportPath, finalContent);
-    console.log(chalk.green(`✅ Final report assembled at ${finalReportPath}`));
+    logger.info(`Final report assembled at ${finalReportPath}`);
  } catch (error) {
    const err = error as Error;
    throw new PentestError(
@@ -76,13 +83,14 @@ export async function assembleFinalReport(sourceDir: string): Promise<string> {
 */
 export async function injectModelIntoReport(
  repoPath: string,
-  outputPath: string
+  outputPath: string,
  logger: ActivityLogger
 ): Promise<void> {
  // 1. Read session.json to get model information
  const sessionJsonPath = path.join(outputPath, 'session.json');
  if (!(await fs.pathExists(sessionJsonPath))) {
-    console.log(chalk.yellow('⚠️ session.json not found, skipping model injection'));
+    logger.warn('session.json not found, skipping model injection');
    return;
  }
@@ -103,18 +111,18 @@ export async function injectModelIntoReport(
  }
  if (models.size === 0) {
-    console.log(chalk.yellow('⚠️ No model information found in session.json'));
+    logger.warn('No model information found in session.json');
    return;
  }
  const modelStr = Array.from(models).join(', ');
-  console.log(chalk.blue(`📝 Injecting model info into report: ${modelStr}`));
+  logger.info(`Injecting model info into report: ${modelStr}`);
  // 3. Read the final report
  const reportPath = path.join(repoPath, 'deliverables', 'comprehensive_security_assessment_report.md');
  if (!(await fs.pathExists(reportPath))) {
-    console.log(chalk.yellow('⚠️ Final report not found, skipping model injection'));
+    logger.warn('Final report not found, skipping model injection');
    return;
  }
@@ -132,7 +140,7 @@ export async function injectModelIntoReport(
      assessmentDatePattern,
      `$1\n${modelLine}`
    );
-    console.log(chalk.green('✅ Model info injected into Executive Summary'));
+    logger.info('Model info injected into Executive Summary');
  } else {
    // If no Assessment Date line found, try to add after Executive Summary header
    const execSummaryPattern = /^## Executive Summary$/m;
@@ -142,9 +150,9 @@ export async function injectModelIntoReport(
        execSummaryPattern,
        `## Executive Summary\n- Model: ${modelStr}`
      );
-      console.log(chalk.green('✅ Model info added to Executive Summary header'));
+      logger.info('Model info added to Executive Summary header');
    } else {
-      console.log(chalk.yellow('⚠️ Could not find Executive Summary section'));
+      logger.warn('Could not find Executive Summary section');
      return;
    }
  }
@@ -4,106 +4,105 @@
 // it under the terms of the GNU Affero General Public License version 3
 // as published by the Free Software Foundation.
-import { path } from 'zx';
+import { path, fs } from 'zx';
-import type { AgentName } from './types/index.js';
+import { validateQueueAndDeliverable } from './services/queue-validation.js';
-
+import type { AgentName, AgentDefinition, PlaywrightAgent, AgentValidator, VulnType } from './types/index.js';
-// Agent definition interface
+import type { ActivityLogger } from './types/activity-logger.js';
 export interface AgentDefinition {
  name: AgentName;
  displayName: string;
  prerequisites: AgentName[];
 }
 // Agent definitions according to PRD
 // NOTE: deliverableFilename values must match mcp-server/src/types/deliverables.ts:DELIVERABLE_FILENAMES
 export const AGENTS: Readonly<Record<AgentName, AgentDefinition>> = Object.freeze({
  'pre-recon': {
    name: 'pre-recon',
    displayName: 'Pre-recon agent',
-    prerequisites: []
+    prerequisites: [],
    promptTemplate: 'pre-recon-code',
    deliverableFilename: 'code_analysis_deliverable.md',
  },
  'recon': {
    name: 'recon',
    displayName: 'Recon agent',
-    prerequisites: ['pre-recon']
+    prerequisites: ['pre-recon'],
    promptTemplate: 'recon',
    deliverableFilename: 'recon_deliverable.md',
  },
  'injection-vuln': {
    name: 'injection-vuln',
    displayName: 'Injection vuln agent',
-    prerequisites: ['recon']
+    prerequisites: ['recon'],
    promptTemplate: 'vuln-injection',
    deliverableFilename: 'injection_analysis_deliverable.md',
  },
  'xss-vuln': {
    name: 'xss-vuln',
    displayName: 'XSS vuln agent',
-    prerequisites: ['recon']
+    prerequisites: ['recon'],
    promptTemplate: 'vuln-xss',
    deliverableFilename: 'xss_analysis_deliverable.md',
  },
  'auth-vuln': {
    name: 'auth-vuln',
    displayName: 'Auth vuln agent',
-    prerequisites: ['recon']
+    prerequisites: ['recon'],
    promptTemplate: 'vuln-auth',
    deliverableFilename: 'auth_analysis_deliverable.md',
  },
  'ssrf-vuln': {
    name: 'ssrf-vuln',
    displayName: 'SSRF vuln agent',
-    prerequisites: ['recon']
+    prerequisites: ['recon'],
    promptTemplate: 'vuln-ssrf',
    deliverableFilename: 'ssrf_analysis_deliverable.md',
  },
  'authz-vuln': {
    name: 'authz-vuln',
    displayName: 'Authz vuln agent',
-    prerequisites: ['recon']
+    prerequisites: ['recon'],
    promptTemplate: 'vuln-authz',
    deliverableFilename: 'authz_analysis_deliverable.md',
  },
  'injection-exploit': {
    name: 'injection-exploit',
    displayName: 'Injection exploit agent',
-    prerequisites: ['injection-vuln']
+    prerequisites: ['injection-vuln'],
    promptTemplate: 'exploit-injection',
    deliverableFilename: 'injection_exploitation_evidence.md',
  },
  'xss-exploit': {
    name: 'xss-exploit',
    displayName: 'XSS exploit agent',
-    prerequisites: ['xss-vuln']
+    prerequisites: ['xss-vuln'],
    promptTemplate: 'exploit-xss',
    deliverableFilename: 'xss_exploitation_evidence.md',
  },
  'auth-exploit': {
    name: 'auth-exploit',
    displayName: 'Auth exploit agent',
-    prerequisites: ['auth-vuln']
+    prerequisites: ['auth-vuln'],
    promptTemplate: 'exploit-auth',
    deliverableFilename: 'auth_exploitation_evidence.md',
  },
  'ssrf-exploit': {
    name: 'ssrf-exploit',
    displayName: 'SSRF exploit agent',
-    prerequisites: ['ssrf-vuln']
+    prerequisites: ['ssrf-vuln'],
    promptTemplate: 'exploit-ssrf',
    deliverableFilename: 'ssrf_exploitation_evidence.md',
  },
  'authz-exploit': {
    name: 'authz-exploit',
    displayName: 'Authz exploit agent',
-    prerequisites: ['authz-vuln']
+    prerequisites: ['authz-vuln'],
    promptTemplate: 'exploit-authz',
    deliverableFilename: 'authz_exploitation_evidence.md',
  },
  'report': {
    name: 'report',
    displayName: 'Report agent',
-    prerequisites: ['injection-exploit', 'xss-exploit', 'auth-exploit', 'ssrf-exploit', 'authz-exploit']
+    prerequisites: ['injection-exploit', 'xss-exploit', 'auth-exploit', 'ssrf-exploit', 'authz-exploit'],
-  }
+    promptTemplate: 'report-executive',
-});
+    deliverableFilename: 'comprehensive_security_assessment_report.md',
-
+  },
 // Agent execution order
 export const AGENT_ORDER: readonly AgentName[] = Object.freeze([
  'pre-recon',
  'recon',
  'injection-vuln',
  'xss-vuln',
  'auth-vuln',
  'ssrf-vuln',
  'authz-vuln',
  'injection-exploit',
  'xss-exploit',
  'auth-exploit',
  'ssrf-exploit',
  'authz-exploit',
  'report'
 ] as const);
 // Parallel execution groups
 export const getParallelGroups = (): Readonly<{ vuln: AgentName[]; exploit: AgentName[] }> => Object.freeze({
  vuln: ['injection-vuln', 'xss-vuln', 'auth-vuln', 'ssrf-vuln', 'authz-vuln'],
  exploit: ['injection-exploit', 'xss-exploit', 'auth-exploit', 'ssrf-exploit', 'authz-exploit']
 });
 // Phase names for metrics aggregation
@@ -126,4 +125,101 @@ export const AGENT_PHASE_MAP: Readonly<Record<AgentName, PhaseName>> = Object.fr
  'report': 'reporting',
 });
 // Factory function for vulnerability queue validators
 function createVulnValidator(vulnType: VulnType): AgentValidator {
  return async (sourceDir: string, logger: ActivityLogger): Promise<boolean> => {
    try {
      await validateQueueAndDeliverable(vulnType, sourceDir);
      return true;
    } catch (error) {
      const errMsg = error instanceof Error ? error.message : String(error);
      logger.warn(`Queue validation failed for ${vulnType}: ${errMsg}`);
      return false;
    }
  };
 }
 // Factory function for exploit deliverable validators
 function createExploitValidator(vulnType: VulnType): AgentValidator {
  return async (sourceDir: string): Promise<boolean> => {
    const evidenceFile = path.join(sourceDir, 'deliverables', `${vulnType}_exploitation_evidence.md`);
    return await fs.pathExists(evidenceFile);
  };
 }
 // MCP agent mapping - assigns each agent to a specific Playwright instance to prevent conflicts
 // Keys are promptTemplate values from AGENTS registry
 export const MCP_AGENT_MAPPING: Record<string, PlaywrightAgent> = Object.freeze({
  // Phase 1: Pre-reconnaissance (actual prompt name is 'pre-recon-code')
  // NOTE: Pre-recon is pure code analysis and doesn't use browser automation,
  // but assigning MCP server anyway for consistency and future extensibility
  'pre-recon-code': 'playwright-agent1',
  // Phase 2: Reconnaissance (actual prompt name is 'recon')
  recon: 'playwright-agent2',
  // Phase 3: Vulnerability Analysis (5 parallel agents)
  'vuln-injection': 'playwright-agent1',
  'vuln-xss': 'playwright-agent2',
  'vuln-auth': 'playwright-agent3',
  'vuln-ssrf': 'playwright-agent4',
  'vuln-authz': 'playwright-agent5',
  // Phase 4: Exploitation (5 parallel agents - same as vuln counterparts)
  'exploit-injection': 'playwright-agent1',
  'exploit-xss': 'playwright-agent2',
  'exploit-auth': 'playwright-agent3',
  'exploit-ssrf': 'playwright-agent4',
  'exploit-authz': 'playwright-agent5',
  // Phase 5: Reporting (actual prompt name is 'report-executive')
  // NOTE: Report generation is typically text-based and doesn't use browser automation,
  // but assigning MCP server anyway for potential screenshot inclusion or future needs
  'report-executive': 'playwright-agent3',
 });
 // Direct agent-to-validator mapping - much simpler than pattern matching
 export const AGENT_VALIDATORS: Record<AgentName, AgentValidator> = Object.freeze({
  // Pre-reconnaissance agent - validates the code analysis deliverable created by the agent
  'pre-recon': async (sourceDir: string): Promise<boolean> => {
    const codeAnalysisFile = path.join(sourceDir, 'deliverables', 'code_analysis_deliverable.md');
    return await fs.pathExists(codeAnalysisFile);
  },
  // Reconnaissance agent
  recon: async (sourceDir: string): Promise<boolean> => {
    const reconFile = path.join(sourceDir, 'deliverables', 'recon_deliverable.md');
    return await fs.pathExists(reconFile);
  },
  // Vulnerability analysis agents
  'injection-vuln': createVulnValidator('injection'),
  'xss-vuln': createVulnValidator('xss'),
  'auth-vuln': createVulnValidator('auth'),
  'ssrf-vuln': createVulnValidator('ssrf'),
  'authz-vuln': createVulnValidator('authz'),
  // Exploitation agents
  'injection-exploit': createExploitValidator('injection'),
  'xss-exploit': createExploitValidator('xss'),
  'auth-exploit': createExploitValidator('auth'),
  'ssrf-exploit': createExploitValidator('ssrf'),
  'authz-exploit': createExploitValidator('authz'),
  // Executive report agent
  report: async (sourceDir: string, logger: ActivityLogger): Promise<boolean> => {
    const reportFile = path.join(
      sourceDir,
      'deliverables',
      'comprehensive_security_assessment_report.md'
    );
    const reportExists = await fs.pathExists(reportFile);
    if (!reportExists) {
      logger.error('Missing required deliverable: comprehensive_security_assessment_report.md');
    }
    return reportExists;
  },
 });
@@ -1,56 +0,0 @@
 // Copyright (C) 2025 Keygraph, Inc.
 //
 // This program is free software: you can redistribute it and/or modify
 // it under the terms of the GNU Affero General Public License version 3
 // as published by the Free Software Foundation.
 import { $, fs, path } from 'zx';
 import chalk from 'chalk';
 import { PentestError } from '../error-handling.js';
 // Pure function: Setup local repository for testing
 export async function setupLocalRepo(repoPath: string): Promise<string> {
  try {
    const sourceDir = path.resolve(repoPath);
    // MCP servers are now configured via mcpServers option in claude-executor.js
    // No need for pre-setup with claude CLI
    // Initialize git repository if not already initialized and create checkpoint
    try {
      // Check if it's already a git repository
      const isGitRepo = await fs.pathExists(path.join(sourceDir, '.git'));
      if (!isGitRepo) {
        await $`cd ${sourceDir} && git init`;
        console.log(chalk.blue('✅ Git repository initialized'));
      }
      // Configure git for pentest agent
      await $`cd ${sourceDir} && git config user.name "Pentest Agent"`;
      await $`cd ${sourceDir} && git config user.email "agent@localhost"`;
      // Create initial checkpoint
      await $`cd ${sourceDir} && git add -A && git commit -m "Initial checkpoint: Local repository setup" --allow-empty`;
      console.log(chalk.green('✅ Initial checkpoint created'));
    } catch (gitError) {
      const errMsg = gitError instanceof Error ? gitError.message : String(gitError);
      console.log(chalk.yellow(`⚠️ Git setup warning: ${errMsg}`));
      // Non-fatal - continue without Git setup
    }
    // MCP tools (save_deliverable, generate_totp) are now available natively via shannon-helper MCP server
    // No need to copy bash scripts to target repository
    return sourceDir;
  } catch (error) {
    if (error instanceof PentestError) {
      throw error;
    }
    const errMsg = error instanceof Error ? error.message : String(error);
    throw new PentestError(`Local repository setup failed: ${errMsg}`, 'filesystem', false, {
      repoPath,
      originalError: errMsg,
    });
  }
 }
@@ -7,28 +7,58 @@
 /**
 * Temporal activities for Shannon agent execution.
 *
- * Each activity wraps a single agent execution with:
+ * Each activity wraps service calls with Temporal-specific concerns:
 * - Heartbeat loop (2s interval) to signal worker liveness
- * - Git checkpoint/rollback/commit per attempt
+ * - Error classification into ApplicationFailure
- * - Error classification for Temporal retry behavior
+ * - Container lifecycle management
 * - Audit session logging
 *
- * Temporal handles retries based on error classification:
+ * Business logic is delegated to services in src/services/.
 * - Retryable: BillingError, TransientError (429, 5xx, network)
 * - Non-retryable: AuthenticationError, PermissionError, ConfigurationError, etc.
 */
 import { heartbeat, ApplicationFailure, Context } from '@temporalio/activity';
-import chalk from 'chalk';
+import path from 'path';
 import fs from 'fs/promises';
 import { classifyErrorForTemporal, PentestError } from '../services/error-handling.js';
 import { ErrorCode } from '../types/errors.js';
 import { getOrCreateContainer, getContainer, removeContainer } from '../services/container.js';
 import { ExploitationCheckerService } from '../services/exploitation-checker.js';
 import type { VulnType, ExploitationDecision } from '../services/queue-validation.js';
 import { AuditSession } from '../audit/index.js';
 import type { WorkflowSummary } from '../audit/workflow-logger.js';
 import type { AgentName } from '../types/agents.js';
 import { ALL_AGENTS } from '../types/agents.js';
 import type { AgentMetrics, ResumeState } from './shared.js';
 import { copyDeliverablesToAudit, type SessionMetadata } from '../audit/utils.js';
 import { readJson, fileExists } from '../utils/file-io.js';
 import { assembleFinalReport, injectModelIntoReport } from '../services/reporting.js';
 import { AGENTS } from '../session-manager.js';
 import { executeGitCommandWithRetry } from '../services/git-manager.js';
 import type { ResumeAttempt } from '../audit/metrics-tracker.js';
 import { createActivityLogger } from './activity-logger.js';
 // Max lengths to prevent Temporal protobuf buffer overflow
 const MAX_ERROR_MESSAGE_LENGTH = 2000;
 const MAX_STACK_TRACE_LENGTH = 1000;
 // Max retries for output validation errors (agent didn't save deliverables)
 // Lower than default 50 since this is unlikely to self-heal
 const MAX_OUTPUT_VALIDATION_RETRIES = 3;
 const HEARTBEAT_INTERVAL_MS = 2000;
 /**
 * Input for all agent activities.
 */
 export interface ActivityInput {
  webUrl: string;
  repoPath: string;
  configPath?: string;
  outputPath?: string;
  pipelineTestingMode?: boolean;
  workflowId: string;
  sessionId: string;
 }
 /**
 * Truncate error message to prevent buffer overflow in Temporal serialization.
 */
@@ -48,85 +78,34 @@ function truncateStackTrace(failure: ApplicationFailure): void {
  }
 }
 import {
  runClaudePrompt,
  validateAgentOutput,
  type ClaudePromptResult,
 } from '../ai/claude-executor.js';
 import { loadPrompt } from '../prompts/prompt-manager.js';
 import { parseConfig, distributeConfig } from '../config-parser.js';
 import { classifyErrorForTemporal } from '../error-handling.js';
 import {
  safeValidateQueueAndDeliverable,
  type VulnType,
  type ExploitationDecision,
 } from '../queue-validation.js';
 import {
  createGitCheckpoint,
  commitGitSuccess,
  rollbackGitWorkspace,
  getGitCommitHash,
 } from '../utils/git-manager.js';
 import { assembleFinalReport, injectModelIntoReport } from '../phases/reporting.js';
 import { getPromptNameForAgent } from '../types/agents.js';
 import { AuditSession } from '../audit/index.js';
 import type { WorkflowSummary } from '../audit/workflow-logger.js';
 import type { AgentName } from '../types/agents.js';
 import { getDeliverablePath, ALL_AGENTS } from '../types/agents.js';
 import type { AgentMetrics, ResumeState } from './shared.js';
 import type { DistributedConfig } from '../types/config.js';
 import { copyDeliverablesToAudit, type SessionMetadata, readJson, fileExists } from '../audit/utils.js';
 import type { ResumeAttempt } from '../audit/metrics-tracker.js';
 import { executeGitCommandWithRetry } from '../utils/git-manager.js';
 import path from 'path';
 import fs from 'fs/promises';
 const HEARTBEAT_INTERVAL_MS = 2000; // Must be < heartbeatTimeout (10min production, 5min testing)
 /**
- * Input for all agent activities.
+ * Build SessionMetadata from ActivityInput.
 * Matches PipelineInput but with required workflowId for audit correlation.
 */
-export interface ActivityInput {
+function buildSessionMetadata(input: ActivityInput): SessionMetadata {
-  webUrl: string;
+  const { webUrl, repoPath, outputPath, sessionId } = input;
-  repoPath: string;
+  return {
-  configPath?: string;
+    id: sessionId,
-  outputPath?: string;
+    webUrl,
-  pipelineTestingMode?: boolean;
+    repoPath,
-  workflowId: string;
+    ...(outputPath && { outputPath }),
-  sessionId: string; // Workspace name (for resume) or workflowId (for new runs)
+  };
 }
 /**
- * Core activity implementation.
+ * Core activity implementation using services.
 *
 * Executes a single agent with:
 * 1. Heartbeat loop for worker liveness
- * 2. Config loading (if configPath provided)
+ * 2. Container creation/reuse
- * 3. Audit session initialization
+ * 3. Service-based agent execution
- * 4. Prompt loading
+ * 4. Error classification for Temporal retry
 * 5. Git checkpoint before execution
 * 6. Agent execution (single attempt)
 * 7. Output validation
 * 8. Git commit on success, rollback on failure
 * 9. Error classification for Temporal retry
 */
 async function runAgentActivity(
  agentName: AgentName,
  input: ActivityInput
 ): Promise<AgentMetrics> {
-  const {
+  const { repoPath, configPath, pipelineTestingMode = false, workflowId, webUrl } = input;
    webUrl,
    repoPath,
    configPath,
    outputPath,
    pipelineTestingMode = false,
    workflowId,
  } = input;
  const startTime = Date.now();
  // Get attempt number from Temporal context (tracks retries automatically)
  const attemptNumber = Context.current().info.attempt;
  // Heartbeat loop - signals worker is alive to Temporal server
@@ -136,160 +115,66 @@ async function runAgentActivity(
  }, HEARTBEAT_INTERVAL_MS);
  try {
-    // 1. Load config (if provided)
+    const logger = createActivityLogger();
    let distributedConfig: DistributedConfig | null = null;
    if (configPath) {
      try {
        const config = await parseConfig(configPath);
        distributedConfig = distributeConfig(config);
      } catch (err) {
        throw new Error(`Failed to load config ${configPath}: ${err instanceof Error ? err.message : String(err)}`);
      }
    }
-    // 2. Build session metadata for audit
+    // 1. Build session metadata and get/create container
-    // Use sessionId (workspace name) for directory, workflowId for tracking
+    const sessionMetadata = buildSessionMetadata(input);
-    const sessionMetadata: SessionMetadata = {
+    const container = getOrCreateContainer(workflowId, sessionMetadata);
      id: input.sessionId,
      webUrl,
      repoPath,
      ...(outputPath && { outputPath }),
    };
-    // 3. Initialize audit session (idempotent, safe across retries)
+    // 2. Create audit session for THIS agent execution
    // NOTE: Each agent needs its own AuditSession because AuditSession uses
    // instance state (currentAgentName) that cannot be shared across parallel agents
    const auditSession = new AuditSession(sessionMetadata);
    await auditSession.initialize(workflowId);
-    // 4. Load prompt
+    // 3. Execute agent via service (throws PentestError on failure)
-    const promptName = getPromptNameForAgent(agentName);
+    const endResult = await container.agentExecution.executeOrThrow(
    const prompt = await loadPrompt(
      promptName,
      { webUrl, repoPath },
      distributedConfig,
      pipelineTestingMode
    );
    // 5. Create git checkpoint before execution
    await createGitCheckpoint(repoPath, agentName, attemptNumber);
    await auditSession.startAgent(agentName, prompt, attemptNumber);
    // 6. Execute agent (single attempt - Temporal handles retries)
    const result: ClaudePromptResult = await runClaudePrompt(
      prompt,
      repoPath,
      '', // context
      agentName, // description
      agentName,
-      chalk.cyan,
+      {
-      sessionMetadata,
+        webUrl,
        repoPath,
        configPath,
        pipelineTestingMode,
        attemptNumber,
      },
      auditSession,
-      attemptNumber
+      logger
    );
-    // 6.5. Sanity check: Detect spending cap that slipped through all detection layers
+    // 4. Return metrics
    // Defense-in-depth: A successful agent execution should never have ≤2 turns with $0 cost
    if (result.success && (result.turns ?? 0) <= 2 && (result.cost || 0) === 0) {
      const resultText = result.result || '';
      const looksLikeBillingError = /spending|cap|limit|budget|resets/i.test(resultText);
      if (looksLikeBillingError) {
        await rollbackGitWorkspace(repoPath, 'spending cap detected');
        await auditSession.endAgent(agentName, {
          attemptNumber,
          duration_ms: result.duration,
          cost_usd: 0,
          success: false,
          model: result.model,
          error: `Spending cap likely reached: ${resultText.slice(0, 100)}`,
        });
        // Throw as billing error so Temporal retries with long backoff
        throw new Error(`Spending cap likely reached: ${resultText.slice(0, 100)}`);
      }
    }
    // 7. Handle execution failure
    if (!result.success) {
      await rollbackGitWorkspace(repoPath, 'execution failure');
      await auditSession.endAgent(agentName, {
        attemptNumber,
        duration_ms: result.duration,
        cost_usd: result.cost || 0,
        success: false,
        model: result.model,
        error: result.error || 'Execution failed',
      });
      throw new Error(result.error || 'Agent execution failed');
    }
    // 8. Validate output
    const validationPassed = await validateAgentOutput(result, agentName, repoPath);
    if (!validationPassed) {
      await rollbackGitWorkspace(repoPath, 'validation failure');
      await auditSession.endAgent(agentName, {
        attemptNumber,
        duration_ms: result.duration,
        cost_usd: result.cost || 0,
        success: false,
        model: result.model,
        error: 'Output validation failed',
      });
      // Limit output validation retries (unlikely to self-heal)
      if (attemptNumber >= MAX_OUTPUT_VALIDATION_RETRIES) {
        throw ApplicationFailure.nonRetryable(
          `Agent ${agentName} failed output validation after ${attemptNumber} attempts`,
          'OutputValidationError',
          [{ agentName, attemptNumber, elapsed: Date.now() - startTime }]
        );
      }
      // Let Temporal retry (will be classified as OutputValidationError)
      throw new Error(`Agent ${agentName} failed output validation`);
    }
    // 9. Success - commit deliverables, then capture checkpoint hash
    await commitGitSuccess(repoPath, agentName);
    const commitHash = await getGitCommitHash(repoPath);
    await auditSession.endAgent(agentName, {
      attemptNumber,
      duration_ms: result.duration,
      cost_usd: result.cost || 0,
      success: true,
      model: result.model,
      ...(commitHash && { checkpoint: commitHash }),
    });
    // 10. Return metrics
    return {
      durationMs: Date.now() - startTime,
-      inputTokens: null, // Not currently exposed by SDK wrapper
+      inputTokens: null,
      outputTokens: null,
-      costUsd: result.cost ?? null,
+      costUsd: endResult.cost_usd,
-      numTurns: result.turns ?? null,
+      numTurns: null,
-      model: result.model,
+      model: endResult.model,
    };
  } catch (error) {
-    // Rollback git workspace before Temporal retry to ensure clean state
+    // If error is already an ApplicationFailure, re-throw directly
    try {
      await rollbackGitWorkspace(repoPath, 'error recovery');
    } catch (rollbackErr) {
      // Log but don't fail - rollback is best-effort
      console.error(`Failed to rollback git workspace for ${agentName}:`, rollbackErr);
    }
    // If error is already an ApplicationFailure (e.g., from our retry limit logic),
    // re-throw it directly without re-classifying
    if (error instanceof ApplicationFailure) {
      throw error;
    }
    // Check if output validation retry limit reached (PentestError with code)
    if (
      error instanceof PentestError &&
      error.code === ErrorCode.OUTPUT_VALIDATION_FAILED &&
      attemptNumber >= MAX_OUTPUT_VALIDATION_RETRIES
    ) {
      throw ApplicationFailure.nonRetryable(
        `Agent ${agentName} failed output validation after ${attemptNumber} attempts`,
        'OutputValidationError',
        [{ agentName, attemptNumber, elapsed: Date.now() - startTime }]
      );
    }
    // Classify error for Temporal retry behavior
    const classified = classifyErrorForTemporal(error);
    // Truncate message to prevent protobuf buffer overflow
    const rawMessage = error instanceof Error ? error.message : String(error);
    const message = truncateErrorMessage(rawMessage);
    if (classified.retryable) {
      // Temporal will retry with configured backoff
      const failure = ApplicationFailure.create({
        message,
        type: classified.type,
@@ -298,7 +183,6 @@ async function runAgentActivity(
      truncateStackTrace(failure);
      throw failure;
    } else {
      // Fail immediately - no retry
      const failure = ApplicationFailure.nonRetryable(message, classified.type, [
        { agentName, attemptNumber, elapsed: Date.now() - startTime },
      ]);
@@ -310,9 +194,6 @@ async function runAgentActivity(
  }
 }
 // === Individual Agent Activity Exports ===
 // Each function is a thin wrapper around runAgentActivity with the agent name.
 export async function runPreReconAgent(input: ActivityInput): Promise<AgentMetrics> {
  return runAgentActivity('pre-recon', input);
 }
@@ -367,92 +248,56 @@ export async function runReportAgent(input: ActivityInput): Promise<AgentMetrics
 /**
 * Assemble the final report by concatenating exploitation evidence files.
 * This must be called BEFORE runReportAgent to create the file that the report agent will modify.
 */
 export async function assembleReportActivity(input: ActivityInput): Promise<void> {
  const { repoPath } = input;
-  console.log(chalk.blue('📝 Assembling deliverables from specialist agents...'));
+  const logger = createActivityLogger();
  logger.info('Assembling deliverables from specialist agents...');
  try {
-    await assembleFinalReport(repoPath);
+    await assembleFinalReport(repoPath, logger);
  } catch (error) {
    const err = error as Error;
-    console.log(chalk.yellow(`⚠️ Error assembling final report: ${err.message}`));
+    logger.warn(`Error assembling final report: ${err.message}`);
    // Don't throw - the report agent can still create content even if no exploitation files exist
  }
 }
 /**
 * Inject model metadata into the final report.
 * This must be called AFTER runReportAgent to add the model information to the Executive Summary.
 */
 export async function injectReportMetadataActivity(input: ActivityInput): Promise<void> {
  const { repoPath, sessionId, outputPath } = input;
  const logger = createActivityLogger();
  const effectiveOutputPath = outputPath
    ? path.join(outputPath, sessionId)
    : path.join('./audit-logs', sessionId);
  try {
-    await injectModelIntoReport(repoPath, effectiveOutputPath);
+    await injectModelIntoReport(repoPath, effectiveOutputPath, logger);
  } catch (error) {
    const err = error as Error;
-    console.log(chalk.yellow(`⚠️ Error injecting model into report: ${err.message}`));
+    logger.warn(`Error injecting model into report: ${err.message}`);
    // Don't throw - this is a non-critical enhancement
  }
 }
 /**
 * Check if exploitation should run for a given vulnerability type.
 * Reads the vulnerability queue file and returns the decision.
 *
- * This activity allows the workflow to skip exploit agents entirely
+ * Uses existing container if available (from prior agent runs),
- * when no vulnerabilities were found, saving API calls and time.
+ * otherwise creates service directly (stateless, no dependencies).
 *
 * Error handling:
 * - Retryable errors (missing files, invalid JSON): re-throw for Temporal retry
 * - Non-retryable errors: skip exploitation gracefully
 */
 export async function checkExploitationQueue(
  input: ActivityInput,
  vulnType: VulnType
 ): Promise<ExploitationDecision> {
-  const { repoPath } = input;
+  const { repoPath, workflowId } = input;
  const logger = createActivityLogger();
-  const result = await safeValidateQueueAndDeliverable(vulnType, repoPath);
+  // Reuse container's service if available (from prior vuln agent runs)
  const existingContainer = getContainer(workflowId);
  const checker = existingContainer?.exploitationChecker ?? new ExploitationCheckerService();
-  if (result.success && result.data) {
+  return checker.checkQueue(vulnType, repoPath, logger);
    const { shouldExploit, vulnerabilityCount } = result.data;
    console.log(
      chalk.blue(
        `🔍 ${vulnType}: ${shouldExploit ? `${vulnerabilityCount} vulnerabilities found` : 'no vulnerabilities, skipping exploitation'}`
      )
    );
    return result.data;
  }
  // Validation failed - check if we should retry or skip
  const error = result.error;
  if (error?.retryable) {
    // Re-throw retryable errors so Temporal can retry the vuln agent
    console.log(chalk.yellow(`⚠️ ${vulnType}: ${error.message} (retrying)`));
    throw error;
  }
  // Non-retryable error - skip exploitation gracefully
  console.log(
    chalk.yellow(`⚠️ ${vulnType}: ${error?.message ?? 'Unknown error'}, skipping exploitation`)
  );
  return {
    shouldExploit: false,
    shouldRetry: false,
    vulnerabilityCount: 0,
    vulnType,
  };
 }
 // === Resume Activities ===
 /**
 * Session.json structure for resume state loading
 */
 interface SessionJson {
  session: {
    id: string;
@@ -462,27 +307,27 @@ interface SessionJson {
    resumeAttempts?: ResumeAttempt[];
  };
  metrics: {
-    agents: Record<string, {
+    agents: Record<
-      status: 'in-progress' | 'success' | 'failed';
+      string,
-      checkpoint?: string;
+      {
-    }>;
+        status: 'in-progress' | 'success' | 'failed';
        checkpoint?: string;
      }
    >;
  };
 }
 /**
 * Load resume state from an existing workspace.
 * Validates workspace exists, URL matches, and determines which agents to skip.
 *
 * @throws ApplicationFailure.nonRetryable if workspace not found or URL mismatch
 */
 export async function loadResumeState(
  workspaceName: string,
  expectedUrl: string,
  expectedRepoPath: string
 ): Promise<ResumeState> {
  // 1. Validate workspace exists
  const sessionPath = path.join('./audit-logs', workspaceName, 'session.json');
  // Validate workspace exists
  const exists = await fileExists(sessionPath);
  if (!exists) {
    throw ApplicationFailure.nonRetryable(
@@ -491,7 +336,7 @@ export async function loadResumeState(
    );
  }
-  // Load session.json
+  // 2. Parse session.json and validate URL match
  let session: SessionJson;
  try {
    session = await readJson<SessionJson>(sessionPath);
@@ -503,7 +348,6 @@ export async function loadResumeState(
    );
  }
  // Validate URL matches
  if (session.session.webUrl !== expectedUrl) {
    throw ApplicationFailure.nonRetryable(
      `URL mismatch with workspace\n  Workspace URL: ${session.session.webUrl}\n  Provided URL:  ${expectedUrl}`,
@@ -511,34 +355,30 @@ export async function loadResumeState(
    );
  }
-  // Find completed agents (status === 'success' AND deliverable exists)
+  // 3. Cross-check agent status with deliverables on disk
  const completedAgents: string[] = [];
  const agents = session.metrics.agents;
  for (const agentName of ALL_AGENTS) {
    const agentData = agents[agentName];
    // Skip if agent never ran or didn't succeed
    if (!agentData || agentData.status !== 'success') {
      continue;
    }
-    // Validate deliverable exists
+    const deliverableFilename = AGENTS[agentName].deliverableFilename;
-    const deliverablePath = getDeliverablePath(agentName, expectedRepoPath);
+    const deliverablePath = `${expectedRepoPath}/deliverables/${deliverableFilename}`;
    const deliverableExists = await fileExists(deliverablePath);
    if (!deliverableExists) {
-      console.log(
+      const logger = createActivityLogger();
-        chalk.yellow(`Agent ${agentName} shows success but deliverable missing, will re-run`)
+      logger.warn(`Agent ${agentName} shows success but deliverable missing, will re-run`);
      );
      continue;
    }
    // Agent completed successfully and deliverable exists
    completedAgents.push(agentName);
  }
-  // Find latest checkpoint from completed agents
+  // 4. Collect git checkpoints and validate at least one exists
  const checkpoints = completedAgents
    .map((name) => agents[name]?.checkpoint)
    .filter((hash): hash is string => hash != null);
@@ -550,24 +390,26 @@ export async function loadResumeState(
    throw ApplicationFailure.nonRetryable(
      `Cannot resume workspace ${workspaceName}: ` +
-      (successAgents.length > 0
+        (successAgents.length > 0
-        ? `${successAgents.length} agent(s) show success in session.json (${successAgents.join(', ')}) ` +
+          ? `${successAgents.length} agent(s) show success in session.json (${successAgents.join(', ')}) ` +
-          `but their deliverable files are missing from disk. ` +
+            `but their deliverable files are missing from disk. ` +
-          `Start a fresh run instead.`
+            `Start a fresh run instead.`
-        : `No agents completed successfully. Start a fresh run instead.`),
+          : `No agents completed successfully. Start a fresh run instead.`),
      'NoCheckpointsError'
    );
  }
-  // Find most recent commit among checkpoints
+  // 5. Find the most recent checkpoint commit
  const checkpointHash = await findLatestCommit(expectedRepoPath, checkpoints);
  const originalWorkflowId = session.session.originalWorkflowId || session.session.id;
-  console.log(chalk.cyan(`=== RESUME STATE ===`));
+  // 6. Log summary and return resume state
-  console.log(`Workspace: ${workspaceName}`);
+  const logger = createActivityLogger();
-  console.log(`Completed agents: ${completedAgents.length}`);
+  logger.info('Resume state loaded', {
-  console.log(`Checkpoint: ${checkpointHash}`);
+    workspace: workspaceName,
    completedAgents: completedAgents.length,
    checkpoint: checkpointHash,
  });
  return {
    workspaceName,
@@ -578,20 +420,21 @@ export async function loadResumeState(
  };
 }
 /**
 * Find the most recent commit among a list of commit hashes.
 * Uses git rev-list to determine which commit is newest.
 */
 async function findLatestCommit(repoPath: string, commitHashes: string[]): Promise<string> {
  if (commitHashes.length === 1) {
    const hash = commitHashes[0];
    if (!hash) {
-      throw new Error('Empty commit hash in array');
+      throw new PentestError(
        'Empty commit hash in array',
        'filesystem',
        false, // Non-retryable - corrupt workspace state
        { phase: 'resume' },
        ErrorCode.GIT_CHECKPOINT_FAILED
      );
    }
    return hash;
  }
  // Use git rev-list to find the most recent commit among all hashes
  const result = await executeGitCommandWithRetry(
    ['git', 'rev-list', '--max-count=1', ...commitHashes],
    repoPath,
@@ -603,20 +446,15 @@ async function findLatestCommit(repoPath: string, commitHashes: string[]): Promi
 /**
 * Restore git workspace to a checkpoint and clean up partial deliverables.
 *
 * @param repoPath - Repository path
 * @param checkpointHash - Git commit hash to reset to
 * @param incompleteAgents - Agents that didn't complete (will have deliverables cleaned up)
 */
 export async function restoreGitCheckpoint(
  repoPath: string,
  checkpointHash: string,
  incompleteAgents: AgentName[]
 ): Promise<void> {
-  console.log(chalk.blue(`Restoring git workspace to ${checkpointHash}...`));
+  const logger = createActivityLogger();
  logger.info(`Restoring git workspace to ${checkpointHash}...`);
  // Checkpoint hash points to the success commit (after commitGitSuccess),
  // so git reset --hard naturally preserves all completed agent deliverables.
  await executeGitCommandWithRetry(
    ['git', 'reset', '--hard', checkpointHash],
    repoPath,
@@ -628,67 +466,60 @@ export async function restoreGitCheckpoint(
    'clean untracked files for resume'
  );
  // Clean up any partial deliverables from incomplete agents
  for (const agentName of incompleteAgents) {
-    const deliverablePath = getDeliverablePath(agentName, repoPath);
+    const deliverableFilename = AGENTS[agentName].deliverableFilename;
    const deliverablePath = `${repoPath}/deliverables/${deliverableFilename}`;
    try {
      const exists = await fileExists(deliverablePath);
      if (exists) {
-        console.log(chalk.yellow(`Cleaning partial deliverable: ${agentName}`));
+        logger.warn(`Cleaning partial deliverable: ${agentName}`);
        await fs.unlink(deliverablePath);
      }
    } catch (error) {
-      console.log(chalk.gray(`Note: Failed to delete ${deliverablePath}: ${error}`));
+      logger.info(`Note: Failed to delete ${deliverablePath}: ${error}`);
    }
  }
-  console.log(chalk.green('Workspace restored to clean state'));
+  logger.info('Workspace restored to clean state');
 }
 /**
- * Record a resume attempt in session.json.
+ * Record a resume attempt in session.json and write resume header to workflow.log.
 * Tracks the new workflow ID, terminated workflows, and checkpoint hash.
 */
 export async function recordResumeAttempt(
  input: ActivityInput,
  terminatedWorkflows: string[],
-  checkpointHash: string
+  checkpointHash: string,
  previousWorkflowId: string,
  completedAgents: string[]
 ): Promise<void> {
-  const { webUrl, repoPath, outputPath, sessionId, workflowId } = input;
+  const sessionMetadata = buildSessionMetadata(input);
  const sessionMetadata: SessionMetadata = {
    id: sessionId,
    webUrl,
    repoPath,
    ...(outputPath && { outputPath }),
  };
  const auditSession = new AuditSession(sessionMetadata);
  await auditSession.initialize();
-  await auditSession.addResumeAttempt(workflowId, terminatedWorkflows, checkpointHash);
+  // Update session.json with resume attempt
  await auditSession.addResumeAttempt(input.workflowId, terminatedWorkflows, checkpointHash);
  // Write resume header to workflow.log
  await auditSession.logResumeHeader({
    previousWorkflowId,
    newWorkflowId: input.workflowId,
    checkpointHash,
    completedAgents,
  });
 }
 /**
 * Log phase transition to the unified workflow log.
 * Called at phase boundaries for per-workflow logging.
 */
 export async function logPhaseTransition(
  input: ActivityInput,
  phase: string,
  event: 'start' | 'complete'
 ): Promise<void> {
-  const { webUrl, repoPath, outputPath, sessionId, workflowId } = input;
+  const sessionMetadata = buildSessionMetadata(input);
  const sessionMetadata: SessionMetadata = {
    id: sessionId,
    webUrl,
    repoPath,
    ...(outputPath && { outputPath }),
  };
  const auditSession = new AuditSession(sessionMetadata);
-  await auditSession.initialize(workflowId);
+  await auditSession.initialize(input.workflowId);
  if (event === 'start') {
    await auditSession.logPhaseStart(phase);
@@ -698,28 +529,23 @@ export async function logPhaseTransition(
 }
 /**
- * Log workflow completion with full summary to the unified workflow log.
+ * Log workflow completion with full summary.
- * Called at the end of the workflow to write a summary breakdown.
+ * Cleans up container when done.
 */
 export async function logWorkflowComplete(
  input: ActivityInput,
  summary: WorkflowSummary
 ): Promise<void> {
-  const { webUrl, repoPath, outputPath, sessionId, workflowId } = input;
+  const { repoPath, workflowId } = input;
-
+  const sessionMetadata = buildSessionMetadata(input);
  const sessionMetadata: SessionMetadata = {
    id: sessionId,
    webUrl,
    repoPath,
    ...(outputPath && { outputPath }),
  };
  // 1. Initialize audit session and mark final status
  const auditSession = new AuditSession(sessionMetadata);
  await auditSession.initialize(workflowId);
  await auditSession.updateSessionStatus(summary.status);
-  // Use cumulative metrics from session.json (includes all resume attempts)
+  // 2. Load cumulative metrics from session.json
-  const sessionData = await auditSession.getMetrics() as {
+  const sessionData = (await auditSession.getMetrics()) as {
    metrics: {
      total_duration_ms: number;
      total_cost_usd: number;
@@ -727,7 +553,7 @@ export async function logWorkflowComplete(
    };
  };
-  // Fill in metrics for skipped agents (completed in previous runs)
+  // 3. Fill in metrics for skipped agents (resumed from previous run)
  const agentMetrics = { ...summary.agentMetrics };
  for (const agentName of summary.completedAgents) {
    if (!agentMetrics[agentName]) {
@@ -741,18 +567,27 @@ export async function logWorkflowComplete(
    }
  }
  // 4. Build cumulative summary with cross-run totals
  const cumulativeSummary: WorkflowSummary = {
    ...summary,
    totalDurationMs: sessionData.metrics.total_duration_ms,
    totalCostUsd: sessionData.metrics.total_cost_usd,
    agentMetrics,
  };
  // 5. Write completion entry to workflow.log
  await auditSession.logWorkflowComplete(cumulativeSummary);
-  // Copy all deliverables to audit-logs once at workflow end (non-fatal)
+  // 6. Copy deliverables to audit-logs
  try {
    await copyDeliverablesToAudit(sessionMetadata, repoPath);
  } catch (copyErr) {
-    console.error('Failed to copy deliverables to audit-logs:', copyErr);
+    const logger = createActivityLogger();
    logger.error('Failed to copy deliverables to audit-logs', {
      error: copyErr instanceof Error ? copyErr.message : String(copyErr),
    });
  }
  // 7. Clean up container
  removeContainer(workflowId);
 }
@@ -0,0 +1,34 @@
 // Copyright (C) 2025 Keygraph, Inc.
 //
 // This program is free software: you can redistribute it and/or modify
 // it under the terms of the GNU Affero General Public License version 3
 // as published by the Free Software Foundation.
 import { Context } from '@temporalio/activity';
 import type { ActivityLogger } from '../types/activity-logger.js';
 /**
 * ActivityLogger backed by Temporal's Context.current().log.
 * Must be called inside a running Temporal activity — throws otherwise.
 */
 export class TemporalActivityLogger implements ActivityLogger {
  info(message: string, attrs?: Record<string, unknown>): void {
    Context.current().log.info(message, attrs ?? {});
  }
  warn(message: string, attrs?: Record<string, unknown>): void {
    Context.current().log.warn(message, attrs ?? {});
  }
  error(message: string, attrs?: Record<string, unknown>): void {
    Context.current().log.error(message, attrs ?? {});
  }
 }
 /**
 * Create an ActivityLogger. Must be called inside a Temporal activity.
 * Throws if called outside an activity context.
 */
 export function createActivityLogger(): ActivityLogger {
  return new TemporalActivityLogger();
 }
@@ -26,12 +26,11 @@
 *   TEMPORAL_ADDRESS - Temporal server address (default: localhost:7233)
 */
-import { Connection, Client, WorkflowNotFoundError } from '@temporalio/client';
+import { Connection, Client, WorkflowNotFoundError, type WorkflowHandle } from '@temporalio/client';
 import dotenv from 'dotenv';
 import chalk from 'chalk';
 import { displaySplashScreen } from '../splash-screen.js';
 import { sanitizeHostname } from '../audit/utils.js';
-import { readJson, fileExists } from '../audit/utils.js';
+import { readJson, fileExists } from '../utils/file-io.js';
 import path from 'path';
 // Import types only - these don't pull in workflow runtime code
 import type { PipelineInput, PipelineState, PipelineProgress } from './shared.js';
@@ -89,18 +88,18 @@ async function terminateExistingWorkflows(
      const description = await handle.describe();
      if (description.status.name === 'RUNNING') {
-        console.log(chalk.yellow(`Terminating running workflow: ${wfId}`));
+        console.log(`Terminating running workflow: ${wfId}`);
        await handle.terminate('Superseded by resume workflow');
        terminated.push(wfId);
-        console.log(chalk.green(`Terminated: ${wfId}`));
+        console.log(`Terminated: ${wfId}`);
      } else {
-        console.log(chalk.gray(`Workflow already ${description.status.name}: ${wfId}`));
+        console.log(`Workflow already ${description.status.name}: ${wfId}`);
      }
    } catch (error) {
      if (error instanceof WorkflowNotFoundError) {
-        console.log(chalk.gray(`Workflow not found (already cleaned up): ${wfId}`));
+        console.log(`Workflow not found (already cleaned up): ${wfId}`);
      } else {
-        console.log(chalk.red(`Failed to terminate ${wfId}: ${error}`));
+        console.log(`Failed to terminate ${wfId}: ${error}`);
        // Continue anyway - don't block resume on termination failure
      }
    }
@@ -118,13 +117,13 @@ function isValidWorkspaceName(name: string): boolean {
 }
 function showUsage(): void {
-  console.log(chalk.cyan.bold('\nShannon Temporal Client'));
+  console.log('\nShannon Temporal Client');
-  console.log(chalk.gray('Start a pentest pipeline workflow\n'));
+  console.log('Start a pentest pipeline workflow\n');
-  console.log(chalk.yellow('Usage:'));
+  console.log('Usage:');
  console.log(
    '  node dist/temporal/client.js <webUrl> <repoPath> [options]\n'
  );
-  console.log(chalk.yellow('Options:'));
+  console.log('Options:');
  console.log('  --config <path>       Configuration file path');
  console.log('  --output <path>       Output directory for audit logs');
  console.log('  --pipeline-testing    Use minimal prompts for fast testing');
@@ -133,54 +132,65 @@ function showUsage(): void {
    '  --workflow-id <id>    Custom workflow ID (default: shannon-<timestamp>)'
  );
  console.log('  --wait                Wait for workflow completion with progress polling\n');
-  console.log(chalk.yellow('Examples:'));
+  console.log('Examples:');
  console.log('  node dist/temporal/client.js https://example.com /path/to/repo');
  console.log(
    '  node dist/temporal/client.js https://example.com /path/to/repo --config config.yaml\n'
  );
 }
-async function startPipeline(): Promise<void> {
+// === CLI Argument Parsing ===
  const args = process.argv.slice(2);
-  if (args.includes('--help') || args.includes('-h') || args.length === 0) {
+interface CliArgs {
  webUrl: string;
  repoPath: string;
  configPath?: string;
  outputPath?: string;
  displayOutputPath?: string;
  pipelineTestingMode: boolean;
  customWorkflowId?: string;
  waitForCompletion: boolean;
  resumeFromWorkspace?: string;
 }
 function parseCliArgs(argv: string[]): CliArgs {
  if (argv.includes('--help') || argv.includes('-h') || argv.length === 0) {
    showUsage();
    process.exit(0);
  }
  // Parse arguments
  let webUrl: string | undefined;
  let repoPath: string | undefined;
  let configPath: string | undefined;
  let outputPath: string | undefined;
-  let displayOutputPath: string | undefined; // Host path for display purposes
+  let displayOutputPath: string | undefined;
  let pipelineTestingMode = false;
  let customWorkflowId: string | undefined;
  let waitForCompletion = false;
  let resumeFromWorkspace: string | undefined;
-  for (let i = 0; i < args.length; i++) {
+  for (let i = 0; i < argv.length; i++) {
-    const arg = args[i];
+    const arg = argv[i];
    if (arg === '--config') {
-      const nextArg = args[i + 1];
+      const nextArg = argv[i + 1];
      if (nextArg && !nextArg.startsWith('-')) {
        configPath = nextArg;
        i++;
      }
    } else if (arg === '--output') {
-      const nextArg = args[i + 1];
+      const nextArg = argv[i + 1];
      if (nextArg && !nextArg.startsWith('-')) {
        outputPath = nextArg;
        i++;
      }
    } else if (arg === '--display-output') {
-      const nextArg = args[i + 1];
+      const nextArg = argv[i + 1];
      if (nextArg && !nextArg.startsWith('-')) {
        displayOutputPath = nextArg;
        i++;
      }
    } else if (arg === '--workflow-id') {
-      const nextArg = args[i + 1];
+      const nextArg = argv[i + 1];
      if (nextArg && !nextArg.startsWith('-')) {
        customWorkflowId = nextArg;
        i++;
@@ -188,7 +198,7 @@ async function startPipeline(): Promise<void> {
    } else if (arg === '--pipeline-testing') {
      pipelineTestingMode = true;
    } else if (arg === '--workspace') {
-      const nextArg = args[i + 1];
+      const nextArg = argv[i + 1];
      if (nextArg && !nextArg.startsWith('-')) {
        resumeFromWorkspace = nextArg;
        i++;
@@ -205,177 +215,233 @@ async function startPipeline(): Promise<void> {
  }
  if (!webUrl || !repoPath) {
-    console.log(chalk.red('Error: webUrl and repoPath are required'));
+    console.log('Error: webUrl and repoPath are required');
    showUsage();
    process.exit(1);
  }
-  // Display splash screen
+  return {
    webUrl, repoPath, pipelineTestingMode, waitForCompletion,
    ...(configPath && { configPath }),
    ...(outputPath && { outputPath }),
    ...(displayOutputPath && { displayOutputPath }),
    ...(customWorkflowId && { customWorkflowId }),
    ...(resumeFromWorkspace && { resumeFromWorkspace }),
  };
 }
 // === Workspace Resolution ===
 interface WorkspaceResolution {
  workflowId: string;
  sessionId: string;
  isResume: boolean;
  terminatedWorkflows: string[];
 }
 async function resolveWorkspace(
  client: Client,
  args: CliArgs
 ): Promise<WorkspaceResolution> {
  if (!args.resumeFromWorkspace) {
    const hostname = sanitizeHostname(args.webUrl);
    const workflowId = args.customWorkflowId || `${hostname}_shannon-${Date.now()}`;
    return {
      workflowId,
      sessionId: workflowId,
      isResume: false,
      terminatedWorkflows: [],
    };
  }
  const workspace = args.resumeFromWorkspace;
  const sessionPath = path.join('./audit-logs', workspace, 'session.json');
  const workspaceExists = await fileExists(sessionPath);
  if (workspaceExists) {
    console.log('=== RESUME MODE ===');
    console.log(`Workspace: ${workspace}\n`);
    // 1. Terminate any running workflows from previous attempts
    const terminatedWorkflows = await terminateExistingWorkflows(client, workspace);
    if (terminatedWorkflows.length > 0) {
      console.log(`Terminated ${terminatedWorkflows.length} previous workflow(s)\n`);
    }
    // 2. Validate URL matches the workspace
    const session = await readJson<SessionJson>(sessionPath);
    if (session.session.webUrl !== args.webUrl) {
      console.error('ERROR: URL mismatch with workspace');
      console.error(`  Workspace URL: ${session.session.webUrl}`);
      console.error(`  Provided URL:  ${args.webUrl}`);
      process.exit(1);
    }
    // 3. Generate a new workflow ID scoped to this resume attempt
    // 4. Return resolution with isResume=true so downstream uses resume logic
    return {
      workflowId: `${workspace}_resume_${Date.now()}`,
      sessionId: workspace,
      isResume: true,
      terminatedWorkflows,
    };
  }
  if (!isValidWorkspaceName(workspace)) {
    console.error(`ERROR: Invalid workspace name: "${workspace}"`);
    console.error('  Must be 1-128 characters, alphanumeric/hyphens/underscores, starting with alphanumeric');
    process.exit(1);
  }
  console.log('=== NEW NAMED WORKSPACE ===');
  console.log(`Workspace: ${workspace}\n`);
  return {
    workflowId: `${workspace}_shannon-${Date.now()}`,
    sessionId: workspace,
    isResume: false,
    terminatedWorkflows: [],
  };
 }
 // === Pipeline Input Construction ===
 function buildPipelineInput(args: CliArgs, workspace: WorkspaceResolution): PipelineInput {
  return {
    webUrl: args.webUrl,
    repoPath: args.repoPath,
    workflowId: workspace.workflowId,
    sessionId: workspace.sessionId,
    ...(args.configPath && { configPath: args.configPath }),
    ...(args.outputPath && { outputPath: args.outputPath }),
    ...(args.pipelineTestingMode && { pipelineTestingMode: args.pipelineTestingMode }),
    ...(workspace.isResume && args.resumeFromWorkspace && { resumeFromWorkspace: args.resumeFromWorkspace }),
    ...(workspace.terminatedWorkflows.length > 0 && { terminatedWorkflows: workspace.terminatedWorkflows }),
  };
 }
 // === Display Helpers ===
 function displayWorkflowInfo(args: CliArgs, workspace: WorkspaceResolution): void {
  console.log(`✓ Workflow started: ${workspace.workflowId}`);
  if (workspace.isResume) {
    console.log(`  (Resuming workspace: ${workspace.sessionId})`);
  }
  console.log();
  console.log(`  Target:     ${args.webUrl}`);
  console.log(`  Repository: ${args.repoPath}`);
  console.log(`  Workspace:  ${workspace.sessionId}`);
  if (args.configPath) {
    console.log(`  Config:     ${args.configPath}`);
  }
  if (args.displayOutputPath) {
    console.log(`  Output:     ${args.displayOutputPath}`);
  }
  if (args.pipelineTestingMode) {
    console.log(`  Mode:       Pipeline Testing`);
  }
  console.log();
 }
 function displayMonitoringInfo(args: CliArgs, workspace: WorkspaceResolution): void {
  const effectiveDisplayPath = args.displayOutputPath || args.outputPath || './audit-logs';
  const outputDir = `${effectiveDisplayPath}/${workspace.sessionId}`;
  console.log('Monitor progress:');
  console.log(`  Web UI:  http://localhost:8233/namespaces/default/workflows/${workspace.workflowId}`);
  console.log(`  Logs:    ./shannon logs ID=${workspace.workflowId}`);
  console.log();
  console.log('Output:');
  console.log(`  Reports: ${outputDir}`);
  console.log();
 }
 // === Workflow Result Handling ===
 async function waitForWorkflowResult(
  handle: WorkflowHandle<(input: PipelineInput) => Promise<PipelineState>>,
  workspace: WorkspaceResolution
 ): Promise<void> {
  const progressInterval = setInterval(async () => {
    try {
      const progress = await handle.query<PipelineProgress>(PROGRESS_QUERY);
      const elapsed = Math.floor(progress.elapsedMs / 1000);
      console.log(
        `[${elapsed}s] Phase: ${progress.currentPhase || 'unknown'} | Agent: ${progress.currentAgent || 'none'} | Completed: ${progress.completedAgents.length}/13`
      );
    } catch {
      // Workflow may have completed
    }
  }, 30000);
  try {
    // 1. Block until workflow completes
    const result = await handle.result();
    clearInterval(progressInterval);
    // 2. Display run metrics
    console.log('\nPipeline completed successfully!');
    if (result.summary) {
      console.log(`Duration: ${Math.floor(result.summary.totalDurationMs / 1000)}s`);
      console.log(`Agents completed: ${result.summary.agentCount}`);
      console.log(`Total turns: ${result.summary.totalTurns}`);
      console.log(`Run cost: $${result.summary.totalCostUsd.toFixed(4)}`);
      // 3. Show cumulative cost across all resume attempts
      if (workspace.isResume) {
        try {
          const session = await readJson<SessionJson>(
            path.join('./audit-logs', workspace.sessionId, 'session.json')
          );
          console.log(`Cumulative cost: $${session.metrics.total_cost_usd.toFixed(4)}`);
        } catch {
          // Non-fatal, skip cumulative cost display
        }
      }
    }
  } catch (error) {
    clearInterval(progressInterval);
    console.error('\nPipeline failed:', error);
    process.exit(1);
  }
 }
 // === Main Entry Point ===
 async function startPipeline(): Promise<void> {
  // 1. Parse CLI args and display splash
  const args = parseCliArgs(process.argv.slice(2));
  await displaySplashScreen();
  // 2. Connect to Temporal server
  const address = process.env.TEMPORAL_ADDRESS || 'localhost:7233';
-  console.log(chalk.gray(`Connecting to Temporal at ${address}...`));
+  console.log(`Connecting to Temporal at ${address}...`);
  const connection = await Connection.connect({ address });
  const client = new Client({ connection });
  try {
-    let terminatedWorkflows: string[] = [];
+    // 3. Resolve workspace (new or resume) and build pipeline input
-    let workflowId: string;
+    const workspace = await resolveWorkspace(client, args);
-    let sessionId: string; // Workspace name (persistent directory)
+    const input = buildPipelineInput(args, workspace);
    let isResume = false;
-    if (resumeFromWorkspace) {
+    // 4. Start the Temporal workflow
      const sessionPath = path.join('./audit-logs', resumeFromWorkspace, 'session.json');
      const workspaceExists = await fileExists(sessionPath);
      if (workspaceExists) {
        // === Resume Mode: existing workspace ===
        isResume = true;
        console.log(chalk.cyan('=== RESUME MODE ==='));
        console.log(`Workspace: ${resumeFromWorkspace}\n`);
        // Terminate any running workflows for this workspace
        terminatedWorkflows = await terminateExistingWorkflows(client, resumeFromWorkspace);
        if (terminatedWorkflows.length > 0) {
          console.log(chalk.yellow(`Terminated ${terminatedWorkflows.length} previous workflow(s)\n`));
        }
        // Validate URL matches workspace
        const session = await readJson<SessionJson>(sessionPath);
        if (session.session.webUrl !== webUrl) {
          console.error(chalk.red('ERROR: URL mismatch with workspace'));
          console.error(`  Workspace URL: ${session.session.webUrl}`);
          console.error(`  Provided URL:  ${webUrl}`);
          process.exit(1);
        }
        // Generate resume workflow ID
        workflowId = `${resumeFromWorkspace}_resume_${Date.now()}`;
        sessionId = resumeFromWorkspace;
      } else {
        // === New Named Workspace ===
        if (!isValidWorkspaceName(resumeFromWorkspace)) {
          console.error(chalk.red(`ERROR: Invalid workspace name: "${resumeFromWorkspace}"`));
          console.error(chalk.gray('  Must be 1-128 characters, alphanumeric/hyphens/underscores, starting with alphanumeric'));
          process.exit(1);
        }
        console.log(chalk.cyan('=== NEW NAMED WORKSPACE ==='));
        console.log(`Workspace: ${resumeFromWorkspace}\n`);
        workflowId = `${resumeFromWorkspace}_shannon-${Date.now()}`;
        sessionId = resumeFromWorkspace;
      }
    } else {
      // === New Auto-Named Workflow ===
      const hostname = sanitizeHostname(webUrl);
      workflowId = customWorkflowId || `${hostname}_shannon-${Date.now()}`;
      sessionId = workflowId;
    }
    const input: PipelineInput = {
      webUrl,
      repoPath,
      workflowId, // Add for audit correlation
      sessionId, // Workspace directory name
      ...(configPath && { configPath }),
      ...(outputPath && { outputPath }),
      ...(pipelineTestingMode && { pipelineTestingMode }),
      ...(isResume && resumeFromWorkspace && { resumeFromWorkspace }),
      ...(terminatedWorkflows.length > 0 && { terminatedWorkflows }),
    };
    // Determine output directory for display (use sessionId for persistent directory)
    // Use displayOutputPath (host path) if provided, otherwise fall back to outputPath or default
    const effectiveDisplayPath = displayOutputPath || outputPath || './audit-logs';
    const outputDir = `${effectiveDisplayPath}/${sessionId}`;
    console.log(chalk.green.bold(`✓ Workflow started: ${workflowId}`));
    if (isResume) {
      console.log(chalk.gray(`  (Resuming workspace: ${sessionId})`));
    }
    console.log();
    console.log(chalk.white('  Target:     ') + chalk.cyan(webUrl));
    console.log(chalk.white('  Repository: ') + chalk.cyan(repoPath));
    console.log(chalk.white('  Workspace:  ') + chalk.cyan(sessionId));
    if (configPath) {
      console.log(chalk.white('  Config:     ') + chalk.cyan(configPath));
    }
    if (displayOutputPath) {
      console.log(chalk.white('  Output:     ') + chalk.cyan(displayOutputPath));
    }
    if (pipelineTestingMode) {
      console.log(chalk.white('  Mode:       ') + chalk.yellow('Pipeline Testing'));
    }
    console.log();
    // Start workflow by name (not by importing the function)
    const handle = await client.workflow.start<(input: PipelineInput) => Promise<PipelineState>>(
      'pentestPipelineWorkflow',
      {
        taskQueue: 'shannon-pipeline',
-        workflowId,
+        workflowId: workspace.workflowId,
        args: [input],
      }
    );
-    if (!waitForCompletion) {
+    // 5. Display info and optionally wait for completion
-      console.log(chalk.bold('Monitor progress:'));
+    displayWorkflowInfo(args, workspace);
      console.log(chalk.white('  Web UI:  ') + chalk.blue(`http://localhost:8233/namespaces/default/workflows/${workflowId}`));
      console.log(chalk.white('  Logs:    ') + chalk.gray(`./shannon logs ID=${workflowId}`));
      console.log();
      console.log(chalk.bold('Output:'));
      console.log(chalk.white('  Reports: ') + chalk.cyan(outputDir));
      console.log();
      return;
    }
-    // Poll for progress every 30 seconds
+    if (args.waitForCompletion) {
-    const progressInterval = setInterval(async () => {
+      await waitForWorkflowResult(handle, workspace);
-      try {
+    } else {
-        const progress = await handle.query<PipelineProgress>(PROGRESS_QUERY);
+      displayMonitoringInfo(args, workspace);
        const elapsed = Math.floor(progress.elapsedMs / 1000);
        console.log(
          chalk.gray(`[${elapsed}s]`),
          chalk.cyan(`Phase: ${progress.currentPhase || 'unknown'}`),
          chalk.gray(`| Agent: ${progress.currentAgent || 'none'}`),
          chalk.gray(`| Completed: ${progress.completedAgents.length}/13`)
        );
      } catch {
        // Workflow may have completed
      }
    }, 30000);
    try {
      const result = await handle.result();
      clearInterval(progressInterval);
      console.log(chalk.green.bold('\nPipeline completed successfully!'));
      if (result.summary) {
        console.log(chalk.gray(`Duration: ${Math.floor(result.summary.totalDurationMs / 1000)}s`));
        console.log(chalk.gray(`Agents completed: ${result.summary.agentCount}`));
        console.log(chalk.gray(`Total turns: ${result.summary.totalTurns}`));
        console.log(chalk.gray(`Run cost: $${result.summary.totalCostUsd.toFixed(4)}`));
        // Show cumulative cost from session.json (includes all resume attempts)
        if (isResume) {
          try {
            const session = await readJson<SessionJson>(
              path.join('./audit-logs', sessionId, 'session.json')
            );
            console.log(chalk.gray(`Cumulative cost: $${session.metrics.total_cost_usd.toFixed(4)}`));
          } catch {
            // Non-fatal, skip cumulative cost display
          }
        }
      }
    } catch (error) {
      clearInterval(progressInterval);
      console.error(chalk.red.bold('\nPipeline failed:'), error);
      process.exit(1);
    }
  } finally {
    await connection.close();
@@ -383,6 +449,6 @@ async function startPipeline(): Promise<void> {
 }
 startPipeline().catch((err) => {
-  console.error(chalk.red('Client error:'), err);
+  console.error('Client error:', err);
  process.exit(1);
 });
@@ -1,6 +1,7 @@
 import { defineQuery } from '@temporalio/workflow';
-// === Types ===
+export type { AgentMetrics } from '../types/metrics.js';
 import type { AgentMetrics } from '../types/metrics.js';
 export interface PipelineInput {
  webUrl: string;
@@ -8,7 +9,7 @@ export interface PipelineInput {
  configPath?: string;
  outputPath?: string;
  pipelineTestingMode?: boolean;
-  workflowId?: string; // Added by client, used for audit correlation
+  workflowId?: string; // Used for audit correlation
  sessionId?: string; // Workspace directory name (distinct from workflowId for named workspaces)
  resumeFromWorkspace?: string; // Workspace name to resume from
  terminatedWorkflows?: string[]; // Workflows terminated during resume
@@ -22,15 +23,6 @@ export interface ResumeState {
  originalWorkflowId: string;
 }
 export interface AgentMetrics {
  durationMs: number;
  inputTokens: number | null;
  outputTokens: number | null;
  costUsd: number | null;
  numTurns: number | null;
  model?: string | undefined;
 }
 export interface PipelineSummary {
  totalCostUsd: number;
  totalDurationMs: number; // Wall-clock time (end - start)
@@ -68,6 +60,4 @@ export interface VulnExploitPipelineResult {
  error: string | null;
 }
 // === Queries ===
 export const getProgress = defineQuery<PipelineProgress>('getProgress');
@@ -0,0 +1,45 @@
 // Copyright (C) 2025 Keygraph, Inc.
 //
 // This program is free software: you can redistribute it and/or modify
 // it under the terms of the GNU Affero General Public License version 3
 // as published by the Free Software Foundation.
 /**
 * Maps PipelineState to WorkflowSummary for audit logging.
 * Pure function with no side effects.
 */
 import type { PipelineState } from './shared.js';
 import type { WorkflowSummary } from '../audit/workflow-logger.js';
 /**
 * Maps PipelineState to WorkflowSummary.
 *
 * This function is deterministic (no Date.now() or I/O) so it can be
 * safely imported into Temporal workflows. The caller must ensure
 * state.summary is set before calling (via computeSummary).
 */
 export function toWorkflowSummary(
  state: PipelineState,
  status: 'completed' | 'failed'
 ): WorkflowSummary {
  // state.summary must be computed before calling this mapper
  const summary = state.summary;
  if (!summary) {
    throw new Error('toWorkflowSummary: state.summary must be set before calling');
  }
  return {
    status,
    totalDurationMs: summary.totalDurationMs,
    totalCostUsd: summary.totalCostUsd,
    completedAgents: state.completedAgents,
    agentMetrics: Object.fromEntries(
      Object.entries(state.agentMetrics).map(([name, m]) => [
        name,
        { durationMs: m.durationMs, costUsd: m.costUsd },
      ])
    ),
    ...(state.error && { error: state.error }),
  };
 }
@@ -24,7 +24,6 @@ import { NativeConnection, Worker, bundleWorkflowCode } from '@temporalio/worker
 import { fileURLToPath } from 'node:url';
 import path from 'node:path';
 import dotenv from 'dotenv';
 import chalk from 'chalk';
 import * as activities from './activities.js';
 dotenv.config();
@@ -33,12 +32,12 @@ const __dirname = path.dirname(fileURLToPath(import.meta.url));
 async function runWorker(): Promise<void> {
  const address = process.env.TEMPORAL_ADDRESS || 'localhost:7233';
-  console.log(chalk.cyan(`Connecting to Temporal at ${address}...`));
+  console.log(`Connecting to Temporal at ${address}...`);
  const connection = await NativeConnection.connect({ address });
  // Bundle workflows for Temporal's V8 isolate
-  console.log(chalk.gray('Bundling workflows...'));
+  console.log('Bundling workflows...');
  const workflowBundle = await bundleWorkflowCode({
    workflowsPath: path.join(__dirname, 'workflows.js'),
  });
@@ -54,26 +53,26 @@ async function runWorker(): Promise<void> {
  // Graceful shutdown handling
  const shutdown = async (): Promise<void> => {
-    console.log(chalk.yellow('\nShutting down worker...'));
+    console.log('\nShutting down worker...');
    worker.shutdown();
  };
  process.on('SIGINT', shutdown);
  process.on('SIGTERM', shutdown);
-  console.log(chalk.green('Shannon worker started'));
+  console.log('Shannon worker started');
-  console.log(chalk.gray('Task queue: shannon-pipeline'));
+  console.log('Task queue: shannon-pipeline');
-  console.log(chalk.gray('Press Ctrl+C to stop\n'));
+  console.log('Press Ctrl+C to stop\n');
  try {
    await worker.run();
  } finally {
    await connection.close();
-    console.log(chalk.gray('Worker stopped'));
+    console.log('Worker stopped');
  }
 }
 runWorker().catch((err) => {
-  console.error(chalk.red('Worker failed:'), err);
+  console.error('Worker failed:', err);
  process.exit(1);
 });
@@ -24,6 +24,7 @@
 */
 import {
  log,
  proxyActivities,
  setHandler,
  workflowInfo,
@@ -40,9 +41,10 @@ import {
  type AgentMetrics,
  type ResumeState,
 } from './shared.js';
-import type { VulnType } from '../queue-validation.js';
+import type { VulnType } from '../services/queue-validation.js';
 import type { AgentName } from '../types/agents.js';
 import { ALL_AGENTS } from '../types/agents.js';
 import { toWorkflowSummary } from './summary-mapper.js';
 // Retry configuration for production (long intervals for billing recovery)
 const PRODUCTION_RETRY = {
@@ -103,11 +105,9 @@ export async function pentestPipelineWorkflow(
 ): Promise<PipelineState> {
  const { workflowId } = workflowInfo();
  // Select activity proxy based on testing mode
  // Pipeline testing uses fast retry intervals (10s) for quick iteration
  const a = input.pipelineTestingMode ? testActs : acts;
  // Workflow state (queryable)
  const state: PipelineState = {
    status: 'running',
    currentPhase: null,
@@ -120,7 +120,6 @@ export async function pentestPipelineWorkflow(
    summary: null,
  };
  // Register query handler for real-time progress inspection
  setHandler(getProgress, (): PipelineProgress => ({
    ...state,
    workflowId,
@@ -145,18 +144,17 @@ export async function pentestPipelineWorkflow(
    }),
  };
  // === RESUME LOGIC ===
  let resumeState: ResumeState | null = null;
  if (input.resumeFromWorkspace) {
-    // Load resume state from existing workspace
+    // 1. Load resume state (validates workspace, cross-checks deliverables)
    resumeState = await a.loadResumeState(
      input.resumeFromWorkspace,
      input.webUrl,
      input.repoPath
    );
-    // Restore git checkpoint and clean up partial deliverables
+    // 2. Restore git workspace and clean up incomplete deliverables
    const incompleteAgents = ALL_AGENTS.filter(
      (agentName) => !resumeState!.completedAgents.includes(agentName)
    ) as AgentName[];
@@ -167,120 +165,59 @@ export async function pentestPipelineWorkflow(
      incompleteAgents
    );
-    // Check if all agents are already complete
+    // 3. Short-circuit if all agents already completed
    if (resumeState.completedAgents.length === ALL_AGENTS.length) {
-      console.log(`All ${ALL_AGENTS.length} agents already completed. Nothing to resume.`);
+      log.info(`All ${ALL_AGENTS.length} agents already completed. Nothing to resume.`);
      state.status = 'completed';
      state.completedAgents = [...resumeState.completedAgents];
      state.summary = computeSummary(state);
      return state;
    }
-    // Record resume attempt in session.json
+    // 4. Record this resume attempt in session.json and workflow.log
    await a.recordResumeAttempt(
      activityInput,
      input.terminatedWorkflows || [],
-      resumeState.checkpointHash
+      resumeState.checkpointHash,
      resumeState.originalWorkflowId,
      resumeState.completedAgents
    );
-    console.log('Resume state loaded and workspace restored');
+    log.info('Resume state loaded and workspace restored');
  }
  // Helper to check if an agent should be skipped
  const shouldSkip = (agentName: string): boolean => {
    return resumeState?.completedAgents.includes(agentName) ?? false;
  };
-  try {
+  // Run a sequential agent phase (pre-recon, recon)
-    // === Phase 1: Pre-Reconnaissance ===
+  async function runSequentialPhase(
-    if (!shouldSkip('pre-recon')) {
+    phaseName: string,
-      state.currentPhase = 'pre-recon';
+    agentName: AgentName,
-      state.currentAgent = 'pre-recon';
+    runAgent: (input: ActivityInput) => Promise<AgentMetrics>
-      await a.logPhaseTransition(activityInput, 'pre-recon', 'start');
+  ): Promise<void> {
-      state.agentMetrics['pre-recon'] =
+    if (!shouldSkip(agentName)) {
-        await a.runPreReconAgent(activityInput);
+      state.currentPhase = phaseName;
-      state.completedAgents.push('pre-recon');
+      state.currentAgent = agentName;
-      await a.logPhaseTransition(activityInput, 'pre-recon', 'complete');
+      await a.logPhaseTransition(activityInput, phaseName, 'start');
      state.agentMetrics[agentName] = await runAgent(activityInput);
      state.completedAgents.push(agentName);
      await a.logPhaseTransition(activityInput, phaseName, 'complete');
    } else {
-      console.log('Skipping pre-recon (already complete)');
+      log.info(`Skipping ${agentName} (already complete)`);
-      state.completedAgents.push('pre-recon');
+      state.completedAgents.push(agentName);
    }
  }
-    // === Phase 2: Reconnaissance ===
+  // Build pipeline configs for the 5 vuln→exploit pairs
-    if (!shouldSkip('recon')) {
+  function buildPipelineConfigs(): Array<{
-      state.currentPhase = 'recon';
+    vulnType: VulnType;
-      state.currentAgent = 'recon';
+    vulnAgent: string;
-      await a.logPhaseTransition(activityInput, 'recon', 'start');
+    exploitAgent: string;
-      state.agentMetrics['recon'] = await a.runReconAgent(activityInput);
+    runVuln: () => Promise<AgentMetrics>;
-      state.completedAgents.push('recon');
+    runExploit: () => Promise<AgentMetrics>;
-      await a.logPhaseTransition(activityInput, 'recon', 'complete');
+  }> {
-    } else {
+    return [
      console.log('Skipping recon (already complete)');
      state.completedAgents.push('recon');
    }
    // === Phases 3-4: Vulnerability Analysis + Exploitation (Pipelined) ===
    // Each vuln type runs as an independent pipeline:
    // vuln agent → queue check → conditional exploit agent
    // This eliminates the synchronization barrier between phases - each exploit
    // starts immediately when its vuln agent finishes, not waiting for all.
    state.currentPhase = 'vulnerability-exploitation';
    state.currentAgent = 'pipelines';
    await a.logPhaseTransition(activityInput, 'vulnerability-exploitation', 'start');
    // Helper: Run a single vuln→exploit pipeline with skip logic
    async function runVulnExploitPipeline(
      vulnType: VulnType,
      runVulnAgent: () => Promise<AgentMetrics>,
      runExploitAgent: () => Promise<AgentMetrics>
    ): Promise<VulnExploitPipelineResult> {
      const vulnAgentName = `${vulnType}-vuln`;
      const exploitAgentName = `${vulnType}-exploit`;
      // Step 1: Run vulnerability agent (or skip if completed)
      let vulnMetrics: AgentMetrics | null = null;
      if (!shouldSkip(vulnAgentName)) {
        vulnMetrics = await runVulnAgent();
      } else {
        console.log(`Skipping ${vulnAgentName} (already complete)`);
      }
      // Step 2: Check exploitation queue (only if vuln agent ran or completed previously)
      const decision = await a.checkExploitationQueue(activityInput, vulnType);
      // Step 3: Conditionally run exploit agent (skip if already completed)
      let exploitMetrics: AgentMetrics | null = null;
      if (decision.shouldExploit) {
        if (!shouldSkip(exploitAgentName)) {
          exploitMetrics = await runExploitAgent();
        } else {
          console.log(`Skipping ${exploitAgentName} (already complete)`);
        }
      }
      return {
        vulnType,
        vulnMetrics,
        exploitMetrics,
        exploitDecision: {
          shouldExploit: decision.shouldExploit,
          vulnerabilityCount: decision.vulnerabilityCount,
        },
        error: null,
      };
    }
    // Determine which pipelines to run (skip if both vuln and exploit completed)
    const pipelinesToRun: Array<Promise<VulnExploitPipelineResult>> = [];
    // Only run pipeline if at least one agent (vuln or exploit) is incomplete
    const pipelineConfigs: Array<{
      vulnType: VulnType;
      vulnAgent: string;
      exploitAgent: string;
      runVuln: () => Promise<AgentMetrics>;
      runExploit: () => Promise<AgentMetrics>;
    }> = [
      {
        vulnType: 'injection',
        vulnAgent: 'injection-vuln',
@@ -317,56 +254,34 @@ export async function pentestPipelineWorkflow(
        runExploit: () => a.runAuthzExploitAgent(activityInput),
      },
    ];
  }
-    for (const config of pipelineConfigs) {
+  // Aggregate results from settled pipeline promises into workflow state
-      const vulnComplete = shouldSkip(config.vulnAgent);
+  function aggregatePipelineResults(
-      const exploitComplete = shouldSkip(config.exploitAgent);
+    results: PromiseSettledResult<VulnExploitPipelineResult>[]
-
+  ): void {
      // Only run pipeline if at least one agent needs to run
      if (!vulnComplete || !exploitComplete) {
        pipelinesToRun.push(
          runVulnExploitPipeline(config.vulnType, config.runVuln, config.runExploit)
        );
      } else {
        console.log(
          `Skipping entire ${config.vulnType} pipeline (both agents complete)`
        );
        // Still need to mark them as completed in state
        state.completedAgents.push(config.vulnAgent, config.exploitAgent);
      }
    }
    // Run pipelines in parallel with graceful failure handling
    // Promise.allSettled ensures other pipelines continue if one fails
    const pipelineResults = await Promise.allSettled(pipelinesToRun);
    // Aggregate results from all pipelines
    const failedPipelines: string[] = [];
-    for (const result of pipelineResults) {
+
    for (const result of results) {
      if (result.status === 'fulfilled') {
        const { vulnType, vulnMetrics, exploitMetrics } = result.value;
        // Record vuln agent
        const vulnAgentName = `${vulnType}-vuln`;
        if (vulnMetrics) {
          state.agentMetrics[vulnAgentName] = vulnMetrics;
          state.completedAgents.push(vulnAgentName);
        } else if (shouldSkip(vulnAgentName)) {
          // Agent was skipped because already complete
          state.completedAgents.push(vulnAgentName);
        }
        // Record exploit agent (if it ran)
        const exploitAgentName = `${vulnType}-exploit`;
        if (exploitMetrics) {
          state.agentMetrics[exploitAgentName] = exploitMetrics;
          state.completedAgents.push(exploitAgentName);
        } else if (shouldSkip(exploitAgentName)) {
          // Agent was skipped because already complete
          state.completedAgents.push(exploitAgentName);
        }
      } else {
        // Pipeline failed - log error but continue with others
        const errorMsg =
          result.reason instanceof Error
            ? result.reason.message
@@ -375,15 +290,87 @@ export async function pentestPipelineWorkflow(
      }
    }
    // Log any pipeline failures (workflow continues despite failures)
    if (failedPipelines.length > 0) {
-      console.log(
+      log.warn(`${failedPipelines.length} pipeline(s) failed`, {
-        `⚠️ ${failedPipelines.length} pipeline(s) failed:`,
+        failures: failedPipelines,
-        failedPipelines
+      });
-      );
+    }
  }
  try {
    // === Phase 1: Pre-Reconnaissance ===
    await runSequentialPhase('pre-recon', 'pre-recon', a.runPreReconAgent);
    // === Phase 2: Reconnaissance ===
    await runSequentialPhase('recon', 'recon', a.runReconAgent);
    // === Phases 3-4: Vulnerability Analysis + Exploitation (Pipelined) ===
    // Each vuln type runs as an independent pipeline:
    // vuln agent → queue check → conditional exploit agent
    // Exploits start immediately when their vuln finishes, not waiting for all.
    state.currentPhase = 'vulnerability-exploitation';
    state.currentAgent = 'pipelines';
    await a.logPhaseTransition(activityInput, 'vulnerability-exploitation', 'start');
    // Closure over shouldSkip and activityInput by design (Temporal replay safety)
    async function runVulnExploitPipeline(
      vulnType: VulnType,
      runVulnAgent: () => Promise<AgentMetrics>,
      runExploitAgent: () => Promise<AgentMetrics>
    ): Promise<VulnExploitPipelineResult> {
      const vulnAgentName = `${vulnType}-vuln`;
      const exploitAgentName = `${vulnType}-exploit`;
      // 1. Run vulnerability analysis (or skip if resumed)
      let vulnMetrics: AgentMetrics | null = null;
      if (!shouldSkip(vulnAgentName)) {
        vulnMetrics = await runVulnAgent();
      } else {
        log.info(`Skipping ${vulnAgentName} (already complete)`);
      }
      // 2. Check exploitation queue for actionable findings
      const decision = await a.checkExploitationQueue(activityInput, vulnType);
      // 3. Conditionally run exploitation agent
      let exploitMetrics: AgentMetrics | null = null;
      if (decision.shouldExploit) {
        if (!shouldSkip(exploitAgentName)) {
          exploitMetrics = await runExploitAgent();
        } else {
          log.info(`Skipping ${exploitAgentName} (already complete)`);
        }
      }
      return {
        vulnType,
        vulnMetrics,
        exploitMetrics,
        exploitDecision: {
          shouldExploit: decision.shouldExploit,
          vulnerabilityCount: decision.vulnerabilityCount,
        },
        error: null,
      };
    }
-    // Update phase markers
+    const pipelineConfigs = buildPipelineConfigs();
    const pipelinesToRun: Array<Promise<VulnExploitPipelineResult>> = [];
    for (const config of pipelineConfigs) {
      if (!shouldSkip(config.vulnAgent) || !shouldSkip(config.exploitAgent)) {
        pipelinesToRun.push(
          runVulnExploitPipeline(config.vulnType, config.runVuln, config.runExploit)
        );
      } else {
        log.info(`Skipping entire ${config.vulnType} pipeline (both agents complete)`);
        state.completedAgents.push(config.vulnAgent, config.exploitAgent);
      }
    }
    const pipelineResults = await Promise.allSettled(pipelinesToRun);
    aggregatePipelineResults(pipelineResults);
    state.currentPhase = 'exploitation';
    state.currentAgent = null;
    await a.logPhaseTransition(activityInput, 'vulnerability-exploitation', 'complete');
@@ -406,29 +393,17 @@ export async function pentestPipelineWorkflow(
      await a.logPhaseTransition(activityInput, 'reporting', 'complete');
    } else {
-      console.log('Skipping report (already complete)');
+      log.info('Skipping report (already complete)');
      state.completedAgents.push('report');
    }
    // === Complete ===
    state.status = 'completed';
    state.currentPhase = null;
    state.currentAgent = null;
    state.summary = computeSummary(state);
    // Log workflow completion summary
-    await a.logWorkflowComplete(activityInput, {
+    await a.logWorkflowComplete(activityInput, toWorkflowSummary(state, 'completed'));
      status: 'completed',
      totalDurationMs: state.summary.totalDurationMs,
      totalCostUsd: state.summary.totalCostUsd,
      completedAgents: state.completedAgents,
      agentMetrics: Object.fromEntries(
        Object.entries(state.agentMetrics).map(([name, m]) => [
          name,
          { durationMs: m.durationMs, costUsd: m.costUsd },
        ])
      ),
    });
    return state;
  } catch (error) {
@@ -438,19 +413,7 @@ export async function pentestPipelineWorkflow(
    state.summary = computeSummary(state);
    // Log workflow failure summary
-    await a.logWorkflowComplete(activityInput, {
+    await a.logWorkflowComplete(activityInput, toWorkflowSummary(state, 'failed'));
      status: 'failed',
      totalDurationMs: state.summary.totalDurationMs,
      totalCostUsd: state.summary.totalCostUsd,
      completedAgents: state.completedAgents,
      agentMetrics: Object.fromEntries(
        Object.entries(state.agentMetrics).map(([name, m]) => [
          name,
          { durationMs: m.durationMs, costUsd: m.costUsd },
        ])
      ),
      error: state.error ?? undefined,
    });
    throw error;
  }
@@ -20,7 +20,6 @@
 import fs from 'fs/promises';
 import path from 'path';
 import chalk from 'chalk';
 interface SessionJson {
  session: {
@@ -59,16 +58,7 @@ function formatDuration(ms: number): string {
 }
 function getStatusDisplay(status: string): string {
-  switch (status) {
+  return status;
    case 'completed':
      return chalk.green(status);
    case 'in-progress':
      return chalk.yellow(status);
    case 'failed':
      return chalk.red(status);
    default:
      return status;
  }
 }
 function truncate(str: string, maxLen: number): string {
@@ -83,8 +73,8 @@ async function listWorkspaces(): Promise<void> {
  try {
    entries = await fs.readdir(auditDir);
  } catch {
-    console.log(chalk.yellow('No audit-logs directory found.'));
+    console.log('No audit-logs directory found.');
-    console.log(chalk.gray(`Expected: ${auditDir}`));
+    console.log(`Expected: ${auditDir}`);
    return;
  }
@@ -110,15 +100,15 @@ async function listWorkspaces(): Promise<void> {
  }
  if (workspaces.length === 0) {
-    console.log(chalk.yellow('\nNo workspaces found.'));
+    console.log('\nNo workspaces found.');
-    console.log(chalk.gray('Run a pipeline first: ./shannon start URL=<url> REPO=<repo>'));
+    console.log('Run a pipeline first: ./shannon start URL=<url> REPO=<repo>');
    return;
  }
  // Sort by creation date (most recent first)
  workspaces.sort((a, b) => b.createdAt.getTime() - a.createdAt.getTime());
-  console.log(chalk.cyan.bold('\n=== Shannon Workspaces ===\n'));
+  console.log('\n=== Shannon Workspaces ===\n');
  // Column widths
  const nameWidth = 30;
@@ -129,16 +119,14 @@ async function listWorkspaces(): Promise<void> {
  // Header
  console.log(
-    chalk.gray(
+    '  ' +
-      '  ' +
+    'WORKSPACE'.padEnd(nameWidth) +
-      'WORKSPACE'.padEnd(nameWidth) +
+    'URL'.padEnd(urlWidth) +
-      'URL'.padEnd(urlWidth) +
+    'STATUS'.padEnd(statusWidth) +
-      'STATUS'.padEnd(statusWidth) +
+    'DURATION'.padEnd(durationWidth) +
-      'DURATION'.padEnd(durationWidth) +
+    'COST'.padEnd(costWidth)
      'COST'.padEnd(costWidth)
    )
  );
-  console.log(chalk.gray('  ' + '\u2500'.repeat(nameWidth + urlWidth + statusWidth + durationWidth + costWidth)));
+  console.log('  ' + '\u2500'.repeat(nameWidth + urlWidth + statusWidth + durationWidth + costWidth));
  let resumableCount = 0;
@@ -154,15 +142,15 @@ async function listWorkspaces(): Promise<void> {
      resumableCount++;
    }
-    const resumeTag = isResumable ? chalk.cyan(' (resumable)') : '';
+    const resumeTag = isResumable ? ' (resumable)' : '';
    console.log(
      '  ' +
-      chalk.white(truncate(ws.name, nameWidth - 2).padEnd(nameWidth)) +
+      truncate(ws.name, nameWidth - 2).padEnd(nameWidth) +
-      chalk.gray(truncate(ws.url, urlWidth - 2).padEnd(urlWidth)) +
+      truncate(ws.url, urlWidth - 2).padEnd(urlWidth) +
-      getStatusDisplay(ws.status).padEnd(statusWidth + 10) + // +10 for chalk escape codes
+      getStatusDisplay(ws.status).padEnd(statusWidth) +
-      chalk.gray(duration.padEnd(durationWidth)) +
+      duration.padEnd(durationWidth) +
-      chalk.gray(cost.padEnd(costWidth)) +
+      cost.padEnd(costWidth) +
      resumeTag
    );
  }
@@ -170,16 +158,16 @@ async function listWorkspaces(): Promise<void> {
  console.log();
  const summary = `${workspaces.length} workspace${workspaces.length === 1 ? '' : 's'} found`;
  const resumeSummary = resumableCount > 0 ? ` (${resumableCount} resumable)` : '';
-  console.log(chalk.gray(`${summary}${resumeSummary}`));
+  console.log(`${summary}${resumeSummary}`);
  if (resumableCount > 0) {
-    console.log(chalk.gray('\nResume with: ./shannon start URL=<url> REPO=<repo> WORKSPACE=<name>'));
+    console.log('\nResume with: ./shannon start URL=<url> REPO=<repo> WORKSPACE=<name>');
  }
  console.log();
 }
 listWorkspaces().catch((err) => {
-  console.error(chalk.red('Error listing workspaces:'), err);
+  console.error('Error listing workspaces:', err);
  process.exit(1);
 });
@@ -1,66 +0,0 @@
 // Copyright (C) 2025 Keygraph, Inc.
 //
 // This program is free software: you can redistribute it and/or modify
 // it under the terms of the GNU Affero General Public License version 3
 // as published by the Free Software Foundation.
 import { $ } from 'zx';
 import chalk from 'chalk';
 type ToolName = 'nmap' | 'subfinder' | 'whatweb' | 'schemathesis';
 export type ToolAvailability = Record<ToolName, boolean>;
 // Check availability of required tools
 export const checkToolAvailability = async (): Promise<ToolAvailability> => {
  const tools: ToolName[] = ['nmap', 'subfinder', 'whatweb', 'schemathesis'];
  const availability: ToolAvailability = {
    nmap: false,
    subfinder: false,
    whatweb: false,
    schemathesis: false
  };
  console.log(chalk.blue('🔧 Checking tool availability...'));
  for (const tool of tools) {
    try {
      await $`command -v ${tool}`;
      availability[tool] = true;
      console.log(chalk.green(`  ✅ ${tool} - available`));
    } catch {
      availability[tool] = false;
      console.log(chalk.yellow(`  ⚠️ ${tool} - not found`));
    }
  }
  return availability;
 };
 // Handle missing tools with user-friendly messages
 export const handleMissingTools = (toolAvailability: ToolAvailability): ToolName[] => {
  const missing = (Object.entries(toolAvailability) as Array<[ToolName, boolean]>)
    .filter(([, available]) => !available)
    .map(([tool]) => tool);
  if (missing.length > 0) {
    console.log(chalk.yellow(`\n⚠️ Missing tools: ${missing.join(', ')}`));
    console.log(chalk.gray('Some functionality will be limited. Install missing tools for full capability.'));
    // Provide installation hints
    const installHints: Record<ToolName, string> = {
      'nmap': 'brew install nmap (macOS) or apt install nmap (Ubuntu)',
      'subfinder': 'go install -v github.com/projectdiscovery/subfinder/v2/cmd/subfinder@latest',
      'whatweb': 'gem install whatweb',
      'schemathesis': 'pip install schemathesis'
    };
    console.log(chalk.gray('\nInstallation hints:'));
    missing.forEach(tool => {
      console.log(chalk.gray(`  ${tool}: ${installHints[tool]}`));
    });
    console.log('');
  }
  return missing;
 };
@@ -0,0 +1,15 @@
 // Copyright (C) 2025 Keygraph, Inc.
 //
 // This program is free software: you can redistribute it and/or modify
 // it under the terms of the GNU Affero General Public License version 3
 // as published by the Free Software Foundation.
 /**
 * Logger interface for services called from Temporal activities.
 * Keeps services Temporal-agnostic while providing structured logging.
 */
 export interface ActivityLogger {
  info(message: string, attrs?: Record<string, unknown>): void;
  warn(message: string, attrs?: Record<string, unknown>): void;
  error(message: string, attrs?: Record<string, unknown>): void;
 }
@@ -34,21 +34,6 @@ export const ALL_AGENTS = [
 */
 export type AgentName = typeof ALL_AGENTS[number];
 export type PromptName =
  | 'pre-recon-code'
  | 'recon'
  | 'vuln-injection'
  | 'vuln-xss'
  | 'vuln-auth'
  | 'vuln-ssrf'
  | 'vuln-authz'
  | 'exploit-injection'
  | 'exploit-xss'
  | 'exploit-auth'
  | 'exploit-ssrf'
  | 'exploit-authz'
  | 'report-executive';
 export type PlaywrightAgent =
  | 'playwright-agent1'
  | 'playwright-agent2'
@@ -56,7 +41,9 @@ export type PlaywrightAgent =
  | 'playwright-agent4'
  | 'playwright-agent5';
-export type AgentValidator = (sourceDir: string) => Promise<boolean>;
+import type { ActivityLogger } from './activity-logger.js';
 export type AgentValidator = (sourceDir: string, logger: ActivityLogger) => Promise<boolean>;
 export type AgentStatus =
  | 'pending'
@@ -69,52 +56,21 @@ export interface AgentDefinition {
  name: AgentName;
  displayName: string;
  prerequisites: AgentName[];
  promptTemplate: string;
  deliverableFilename: string;
 }
 /**
- * Maps an agent name to its corresponding prompt file name.
+ * Vulnerability types supported by the pipeline.
 */
-export function getPromptNameForAgent(agentName: AgentName): PromptName {
+export type VulnType = 'injection' | 'xss' | 'auth' | 'ssrf' | 'authz';
  const mappings: Record<AgentName, PromptName> = {
    'pre-recon': 'pre-recon-code',
    'recon': 'recon',
    'injection-vuln': 'vuln-injection',
    'xss-vuln': 'vuln-xss',
    'auth-vuln': 'vuln-auth',
    'ssrf-vuln': 'vuln-ssrf',
    'authz-vuln': 'vuln-authz',
    'injection-exploit': 'exploit-injection',
    'xss-exploit': 'exploit-xss',
    'auth-exploit': 'exploit-auth',
    'ssrf-exploit': 'exploit-ssrf',
    'authz-exploit': 'exploit-authz',
    'report': 'report-executive',
  };
  return mappings[agentName];
 }
 /**
- * Maps an agent name to its deliverable file path.
+ * Decision returned by queue validation for exploitation phase.
 * Must match mcp-server/src/types/deliverables.ts:DELIVERABLE_FILENAMES
 */
-export function getDeliverablePath(agentName: AgentName, repoPath: string): string {
+export interface ExploitationDecision {
-  const deliverableMap: Record<AgentName, string> = {
+  shouldExploit: boolean;
-    'pre-recon': 'code_analysis_deliverable.md',
+  shouldRetry: boolean;
-    'recon': 'recon_deliverable.md',
+  vulnerabilityCount: number;
-    'injection-vuln': 'injection_analysis_deliverable.md',
+  vulnType: VulnType;
    'xss-vuln': 'xss_analysis_deliverable.md',
    'auth-vuln': 'auth_analysis_deliverable.md',
    'ssrf-vuln': 'ssrf_analysis_deliverable.md',
    'authz-vuln': 'authz_analysis_deliverable.md',
    'injection-exploit': 'injection_exploitation_evidence.md',
    'xss-exploit': 'xss_exploitation_evidence.md',
    'auth-exploit': 'auth_exploitation_evidence.md',
    'ssrf-exploit': 'ssrf_exploitation_evidence.md',
    'authz-exploit': 'authz_exploitation_evidence.md',
    'report': 'comprehensive_security_assessment_report.md',
  };
  const filename = deliverableMap[agentName];
  return `${repoPath}/deliverables/${filename}`;
 }
@@ -0,0 +1,35 @@
 // Copyright (C) 2025 Keygraph, Inc.
 //
 // This program is free software: you can redistribute it and/or modify
 // it under the terms of the GNU Affero General Public License version 3
 // as published by the Free Software Foundation.
 /**
 * Audit system type definitions
 */
 /**
 * Cross-cutting session metadata used by services, temporal, and audit.
 */
 export interface SessionMetadata {
  id: string;
  webUrl: string;
  repoPath?: string;
  outputPath?: string;
  [key: string]: unknown;
 }
 /**
 * Result data passed to audit system when an agent execution ends.
 * Used by both AuditSession and MetricsTracker.
 */
 export interface AgentEndResult {
  attemptNumber: number;
  duration_ms: number;
  cost_usd: number;
  success: boolean;
  model?: string | undefined;
  error?: string | undefined;
  checkpoint?: string | undefined;
  isFinalAttempt?: boolean | undefined;
 }
@@ -29,10 +29,8 @@ export interface Rules {
 export type LoginType = 'form' | 'sso' | 'api' | 'basic';
 export type SuccessConditionType = 'url' | 'cookie' | 'element' | 'redirect';
 export interface SuccessCondition {
-  type: SuccessConditionType;
+  type: 'url' | 'cookie' | 'element' | 'redirect';
  value: string;
 }
@@ -53,7 +51,6 @@ export interface Authentication {
 export interface Config {
  rules?: Rules;
  authentication?: Authentication;
  login?: unknown; // Deprecated
 }
 export interface DistributedConfig {
@@ -8,6 +8,39 @@
 * Error type definitions
 */
 /**
 * Specific error codes for reliable classification.
 *
 * ErrorCode provides precision within the coarse 8-category PentestErrorType.
 * Used by classifyErrorForTemporal for code-based classification (preferred)
 * with string matching as fallback for external errors.
 */
 export enum ErrorCode {
  // Config errors (PentestErrorType: 'config')
  CONFIG_NOT_FOUND = 'CONFIG_NOT_FOUND',
  CONFIG_VALIDATION_FAILED = 'CONFIG_VALIDATION_FAILED',
  CONFIG_PARSE_ERROR = 'CONFIG_PARSE_ERROR',
  // Agent execution errors (PentestErrorType: 'validation')
  AGENT_EXECUTION_FAILED = 'AGENT_EXECUTION_FAILED',
  OUTPUT_VALIDATION_FAILED = 'OUTPUT_VALIDATION_FAILED',
  // Billing errors (PentestErrorType: 'billing')
  API_RATE_LIMITED = 'API_RATE_LIMITED',
  SPENDING_CAP_REACHED = 'SPENDING_CAP_REACHED',
  INSUFFICIENT_CREDITS = 'INSUFFICIENT_CREDITS',
  // Git errors (PentestErrorType: 'filesystem')
  GIT_CHECKPOINT_FAILED = 'GIT_CHECKPOINT_FAILED',
  GIT_ROLLBACK_FAILED = 'GIT_ROLLBACK_FAILED',
  // Prompt errors (PentestErrorType: 'prompt')
  PROMPT_LOAD_FAILED = 'PROMPT_LOAD_FAILED',
  // Validation errors (PentestErrorType: 'validation')
  DELIVERABLE_NOT_FOUND = 'DELIVERABLE_NOT_FOUND',
 }
 export type PentestErrorType =
  | 'config'
  | 'network'
@@ -8,6 +8,10 @@
 * Type definitions barrel export
 */
 export * from './activity-logger.js';
 export * from './errors.js';
 export * from './config.js';
 export * from './agents.js';
 export * from './audit.js';
 export * from './result.js';
 export * from './metrics.js';
@@ -0,0 +1,19 @@
 // Copyright (C) 2025 Keygraph, Inc.
 //
 // This program is free software: you can redistribute it and/or modify
 // it under the terms of the GNU Affero General Public License version 3
 // as published by the Free Software Foundation.
 /**
 * Agent metrics types used across services and activities.
 * Centralized here to avoid temporal/shared.ts import boundary violations.
 */
 export interface AgentMetrics {
  durationMs: number;
  inputTokens: number | null;
  outputTokens: number | null;
  costUsd: number | null;
  numTurns: number | null;
  model?: string | undefined;
 }
@@ -0,0 +1,62 @@
 // Copyright (C) 2025 Keygraph, Inc.
 //
 // This program is free software: you can redistribute it and/or modify
 // it under the terms of the GNU Affero General Public License version 3
 // as published by the Free Software Foundation.
 /**
 * Minimal Result type for explicit error handling.
 *
 * A discriminated union that makes error handling explicit without adding
 * heavy machinery. Used in key modules (config loading, agent execution,
 * queue validation) where callers need to make decisions based on error type.
 */
 /**
 * Success variant of Result
 */
 export interface Ok<T> {
  readonly ok: true;
  readonly value: T;
 }
 /**
 * Error variant of Result
 */
 export interface Err<E> {
  readonly ok: false;
  readonly error: E;
 }
 /**
 * Result type - either Ok with a value or Err with an error
 */
 export type Result<T, E> = Ok<T> | Err<E>;
 /**
 * Create a success Result
 */
 export function ok<T>(value: T): Ok<T> {
  return { ok: true, value };
 }
 /**
 * Create an error Result
 */
 export function err<E>(error: E): Err<E> {
  return { ok: false, error };
 }
 /**
 * Type guard for Ok variant
 */
 export function isOk<T, E>(result: Result<T, E>): result is Ok<T> {
  return result.ok === true;
 }
 /**
 * Type guard for Err variant
 */
 export function isErr<T, E>(result: Result<T, E>): result is Err<E> {
  return result.ok === false;
 }
@@ -0,0 +1,95 @@
 // Copyright (C) 2025 Keygraph, Inc.
 //
 // This program is free software: you can redistribute it and/or modify
 // it under the terms of the GNU Affero General Public License version 3
 // as published by the Free Software Foundation.
 /**
 * Consolidated billing/spending cap detection utilities.
 *
 * Anthropic's spending cap behavior is inconsistent:
 * - Sometimes a proper SDK error (billing_error)
 * - Sometimes Claude responds with text about the cap
 * - Sometimes partial billing before cutoff
 *
 * This module provides defense-in-depth detection with shared pattern lists
 * to prevent drift between detection points.
 */
 /**
 * Text patterns for SDK output sniffing (what Claude says).
 * Used by message-handlers.ts and the behavioral heuristic.
 */
 export const BILLING_TEXT_PATTERNS = [
  'spending cap',
  'spending limit',
  'cap reached',
  'budget exceeded',
  'usage limit',
  'resets',
 ] as const;
 /**
 * API patterns for error message classification (what the API returns).
 * Used by classifyErrorForTemporal in error-handling.ts.
 */
 export const BILLING_API_PATTERNS = [
  'billing_error',
  'credit balance is too low',
  'insufficient credits',
  'usage is blocked due to insufficient credits',
  'please visit plans & billing',
  'please visit plans and billing',
  'usage limit reached',
  'quota exceeded',
  'daily rate limit',
  'limit will reset',
  'billing limit reached',
 ] as const;
 /**
 * Checks if text matches any billing text pattern.
 * Used for sniffing SDK output content for spending cap messages.
 */
 export function matchesBillingTextPattern(text: string): boolean {
  const lowerText = text.toLowerCase();
  return BILLING_TEXT_PATTERNS.some((pattern) => lowerText.includes(pattern));
 }
 /**
 * Checks if an error message matches any billing API pattern.
 * Used for classifying API error messages.
 */
 export function matchesBillingApiPattern(message: string): boolean {
  const lowerMessage = message.toLowerCase();
  return BILLING_API_PATTERNS.some((pattern) => lowerMessage.includes(pattern));
 }
 /**
 * Behavioral heuristic for detecting spending cap.
 *
 * When Claude hits a spending cap, it often returns a short message
 * with $0 cost. Legitimate agent work NEVER costs $0 with only 1-2 turns.
 *
 * This combines three signals:
 * 1. Very low turn count (<=2)
 * 2. Zero cost ($0)
 * 3. Text matches billing patterns
 *
 * @param turns - Number of turns the agent took
 * @param cost - Total cost in USD
 * @param resultText - The result text from the agent
 * @returns true if this looks like a spending cap hit
 */
 export function isSpendingCapBehavior(
  turns: number,
  cost: number,
  resultText: string
 ): boolean {
  // Only check if turns <= 2 AND cost is exactly 0
  if (turns > 2 || cost !== 0) {
    return false;
  }
  return matchesBillingTextPattern(resultText);
 }
@@ -4,11 +4,6 @@
 // it under the terms of the GNU Affero General Public License version 3
 // as published by the Free Software Foundation.
 import chalk from 'chalk';
 import { formatDuration } from './formatting.js';
 // Timing utilities
 export class Timer {
  name: string;
  startTime: number;
@@ -29,82 +24,3 @@ export class Timer {
    return end - this.startTime;
  }
 }
 interface TimingResultsAgents {
  [key: string]: number;
 }
 interface TimingResults {
  total: Timer | null;
  agents: TimingResultsAgents;
 }
 interface CostResultsAgents {
  [key: string]: number;
 }
 interface CostResults {
  agents: CostResultsAgents;
  total: number;
 }
 // Global timing and cost tracker
 export const timingResults: TimingResults = {
  total: null,
  agents: {},
 };
 export const costResults: CostResults = {
  agents: {},
  total: 0,
 };
 // Function to display comprehensive timing summary
 export const displayTimingSummary = (): void => {
  if (!timingResults.total) {
    console.log(chalk.yellow('No timing data available'));
    return;
  }
  const totalDuration = timingResults.total.stop();
  console.log(chalk.cyan.bold('\n⏱️  TIMING SUMMARY'));
  console.log(chalk.gray('─'.repeat(60)));
  // Total execution time
  console.log(chalk.cyan(`📊 Total Execution Time: ${formatDuration(totalDuration)}`));
  console.log();
  // Agent breakdown
  if (Object.keys(timingResults.agents).length > 0) {
    console.log(chalk.magenta.bold('🤖 Agent Breakdown:'));
    let agentTotal = 0;
    for (const [agent, duration] of Object.entries(timingResults.agents)) {
      const percentage = ((duration / totalDuration) * 100).toFixed(1);
      const displayName = agent.replace(/-/g, ' ');
      console.log(
        chalk.magenta(
          `  ${displayName.padEnd(20)} ${formatDuration(duration).padStart(8)} (${percentage}%)`
        )
      );
      agentTotal += duration;
    }
    console.log(
      chalk.gray(
        `  ${'Agents Total'.padEnd(20)} ${formatDuration(agentTotal).padStart(8)} (${((agentTotal / totalDuration) * 100).toFixed(1)}%)`
      )
    );
  }
  // Cost breakdown
  if (Object.keys(costResults.agents).length > 0) {
    console.log(chalk.green.bold('\n💰 Cost Breakdown:'));
    for (const [agent, cost] of Object.entries(costResults.agents)) {
      const displayName = agent.replace(/-/g, ' ');
      console.log(chalk.green(`  ${displayName.padEnd(20)} $${cost.toFixed(4).padStart(8)}`));
    }
    console.log(chalk.gray(`  ${'Total Cost'.padEnd(20)} $${costResults.total.toFixed(4).padStart(8)}`));
  }
  console.log(chalk.gray('─'.repeat(60)));
 };
@@ -1,264 +0,0 @@
 // Copyright (C) 2025 Keygraph, Inc.
 //
 // This program is free software: you can redistribute it and/or modify
 // it under the terms of the GNU Affero General Public License version 3
 // as published by the Free Software Foundation.
 import { AGENTS } from '../session-manager.js';
 interface ToolCallInput {
  url?: string;
  element?: string;
  key?: string;
  fields?: unknown[];
  text?: string;
  action?: string;
  description?: string;
  todos?: Array<{
    status: string;
    content: string;
  }>;
  [key: string]: unknown;
 }
 interface ToolCall {
  name: string;
  input?: ToolCallInput;
 }
 /**
 * Extract domain from URL for display
 */
 function extractDomain(url: string): string {
  try {
    const urlObj = new URL(url);
    return urlObj.hostname || url.slice(0, 30);
  } catch {
    return url.slice(0, 30);
  }
 }
 /**
 * Summarize TodoWrite updates into clean progress indicators
 */
 function summarizeTodoUpdate(input: ToolCallInput | undefined): string | null {
  if (!input?.todos || !Array.isArray(input.todos)) {
    return null;
  }
  const todos = input.todos;
  const completed = todos.filter((t) => t.status === 'completed');
  const inProgress = todos.filter((t) => t.status === 'in_progress');
  // Show recently completed tasks
  if (completed.length > 0) {
    const recent = completed[completed.length - 1]!;
    return `✅ ${recent.content}`;
  }
  // Show current in-progress task
  if (inProgress.length > 0) {
    const current = inProgress[0]!;
    return `🔄 ${current.content}`;
  }
  return null;
 }
 /**
 * Get agent prefix for parallel execution
 */
 export function getAgentPrefix(description: string): string {
  // Map agent names to their prefixes
  const agentPrefixes: Record<string, string> = {
    'injection-vuln': '[Injection]',
    'xss-vuln': '[XSS]',
    'auth-vuln': '[Auth]',
    'authz-vuln': '[Authz]',
    'ssrf-vuln': '[SSRF]',
    'injection-exploit': '[Injection]',
    'xss-exploit': '[XSS]',
    'auth-exploit': '[Auth]',
    'authz-exploit': '[Authz]',
    'ssrf-exploit': '[SSRF]',
  };
  // First try to match by agent name directly
  for (const [agentName, prefix] of Object.entries(agentPrefixes)) {
    const agent = AGENTS[agentName as keyof typeof AGENTS];
    if (agent && description.includes(agent.displayName)) {
      return prefix;
    }
  }
  // Fallback to partial matches for backwards compatibility
  if (description.includes('injection')) return '[Injection]';
  if (description.includes('xss')) return '[XSS]';
  if (description.includes('authz')) return '[Authz]'; // Check authz before auth
  if (description.includes('auth')) return '[Auth]';
  if (description.includes('ssrf')) return '[SSRF]';
  return '[Agent]';
 }
 /**
 * Format browser tool calls into clean progress indicators
 */
 function formatBrowserAction(toolCall: ToolCall): string {
  const toolName = toolCall.name;
  const input = toolCall.input || {};
  // Core Browser Operations
  if (toolName === 'mcp__playwright__browser_navigate') {
    const url = input.url || '';
    const domain = extractDomain(url);
    return `🌐 Navigating to ${domain}`;
  }
  if (toolName === 'mcp__playwright__browser_navigate_back') {
    return `⬅️ Going back`;
  }
  // Page Interaction
  if (toolName === 'mcp__playwright__browser_click') {
    const element = input.element || 'element';
    return `🖱️ Clicking ${element.slice(0, 25)}`;
  }
  if (toolName === 'mcp__playwright__browser_hover') {
    const element = input.element || 'element';
    return `👆 Hovering over ${element.slice(0, 20)}`;
  }
  if (toolName === 'mcp__playwright__browser_type') {
    const element = input.element || 'field';
    return `⌨️ Typing in ${element.slice(0, 20)}`;
  }
  if (toolName === 'mcp__playwright__browser_press_key') {
    const key = input.key || 'key';
    return `⌨️ Pressing ${key}`;
  }
  // Form Handling
  if (toolName === 'mcp__playwright__browser_fill_form') {
    const fieldCount = input.fields?.length || 0;
    return `📝 Filling ${fieldCount} form fields`;
  }
  if (toolName === 'mcp__playwright__browser_select_option') {
    return `📋 Selecting dropdown option`;
  }
  if (toolName === 'mcp__playwright__browser_file_upload') {
    return `📁 Uploading file`;
  }
  // Page Analysis
  if (toolName === 'mcp__playwright__browser_snapshot') {
    return `📸 Taking page snapshot`;
  }
  if (toolName === 'mcp__playwright__browser_take_screenshot') {
    return `📸 Taking screenshot`;
  }
  if (toolName === 'mcp__playwright__browser_evaluate') {
    return `🔍 Running JavaScript analysis`;
  }
  // Waiting & Monitoring
  if (toolName === 'mcp__playwright__browser_wait_for') {
    if (input.text) {
      return `⏳ Waiting for "${input.text.slice(0, 20)}"`;
    }
    return `⏳ Waiting for page response`;
  }
  if (toolName === 'mcp__playwright__browser_console_messages') {
    return `📜 Checking console logs`;
  }
  if (toolName === 'mcp__playwright__browser_network_requests') {
    return `🌐 Analyzing network traffic`;
  }
  // Tab Management
  if (toolName === 'mcp__playwright__browser_tabs') {
    const action = input.action || 'managing';
    return `🗂️ ${action} browser tab`;
  }
  // Dialog Handling
  if (toolName === 'mcp__playwright__browser_handle_dialog') {
    return `💬 Handling browser dialog`;
  }
  // Fallback for any missed tools
  const actionType = toolName.split('_').pop();
  return `🌐 Browser: ${actionType}`;
 }
 /**
 * Filter out JSON tool calls from content, with special handling for Task calls
 */
 export function filterJsonToolCalls(content: string | null | undefined): string {
  if (!content || typeof content !== 'string') {
    return content || '';
  }
  const lines = content.split('\n');
  const processedLines: string[] = [];
  for (const line of lines) {
    const trimmed = line.trim();
    // Skip empty lines
    if (trimmed === '') {
      continue;
    }
    // Check if this is a JSON tool call
    if (trimmed.startsWith('{"type":"tool_use"')) {
      try {
        const toolCall = JSON.parse(trimmed) as ToolCall;
        // Special handling for Task tool calls
        if (toolCall.name === 'Task') {
          const description = toolCall.input?.description || 'analysis agent';
          processedLines.push(`🚀 Launching ${description}`);
          continue;
        }
        // Special handling for TodoWrite tool calls
        if (toolCall.name === 'TodoWrite') {
          const summary = summarizeTodoUpdate(toolCall.input);
          if (summary) {
            processedLines.push(summary);
          }
          continue;
        }
        // Special handling for browser tool calls
        if (toolCall.name.startsWith('mcp__playwright__browser_')) {
          const browserAction = formatBrowserAction(toolCall);
          if (browserAction) {
            processedLines.push(browserAction);
          }
          continue;
        }
        // Hide all other tool calls (Read, Write, Grep, etc.)
        continue;
      } catch {
        // If JSON parsing fails, treat as regular text
        processedLines.push(line);
      }
    } else {
      // Keep non-JSON lines (assistant text)
      processedLines.push(line);
    }
  }
  return processedLines.join('\n');
 }
@@ -33,11 +33,11 @@
    "exactOptionalPropertyTypes": true,
    // Style Options
-    // "noImplicitReturns": true,
+    "noImplicitReturns": true,
-    // "noImplicitOverride": true,
+    "noImplicitOverride": true,
-    // "noUnusedLocals": true,
+    "noUnusedLocals": true,
-    // "noUnusedParameters": true,
+    "noUnusedParameters": true,
-    // "noFallthroughCasesInSwitch": true,
+    "noFallthroughCasesInSwitch": true,
    // "noPropertyAccessFromIndexSignature": true,
    // Recommended Options