Merge pull request #141 from KeygraphHQ/refactor/architecture

refactor: decompose activities into services layer with structured error handling
This commit is contained in:
Arjun Malleswaran
2026-02-17 12:22:23 -08:00
committed by GitHub
56 changed files with 2899 additions and 2913 deletions
+22 -14
View File
@@ -8,13 +8,14 @@ You are debugging an issue. Follow this structured approach to avoid spinning in
- Read the full error message and stack trace - Read the full error message and stack trace
- Identify the layer where the error originated: - Identify the layer where the error originated:
- **CLI/Args** - Input validation, path resolution - **CLI/Args** - Input validation, path resolution
- **Config Parsing** - YAML parsing, JSON Schema validation - **Config Parsing** - YAML parsing, JSON Schema validation (`src/config-parser.ts`)
- **Session Management** - Mutex, session.json, lock files - **Session Management** - Agent definitions (`src/session-manager.ts`), mutex (`src/utils/concurrency.ts`)
- **Audit System** - Logging, metrics tracking, atomic writes - **DI Container** - Container initialization/lookup (`src/services/container.ts`)
- **Claude SDK** - Agent execution, MCP servers, turn handling - **Services** - AgentExecutionService, ConfigLoaderService, ExploitationCheckerService, error-handling (`src/services/`)
- **Git Operations** - Checkpoints, rollback, commit - **Audit System** - Logging, metrics tracking, atomic writes (`src/audit/`)
- **Tool Execution** - nmap, subfinder, whatweb - **Claude SDK** - Agent execution, MCP servers, turn handling (`src/ai/claude-executor.ts`)
- **Validation** - Deliverable checks, queue validation - **Git Operations** - Checkpoints, rollback, commit (`src/services/git-manager.ts`)
- **Validation** - Deliverable checks, queue validation (`src/services/queue-validation.ts`)
## Step 2: Check Relevant Logs ## Step 2: Check Relevant Logs
@@ -37,12 +38,14 @@ For Shannon, trace through these layers:
1. **Temporal Client**`src/temporal/client.ts` - Workflow initiation 1. **Temporal Client**`src/temporal/client.ts` - Workflow initiation
2. **Workflow**`src/temporal/workflows.ts` - Pipeline orchestration 2. **Workflow**`src/temporal/workflows.ts` - Pipeline orchestration
3. **Activities**`src/temporal/activities.ts` - Agent execution with heartbeats 3. **Activities**`src/temporal/activities.ts` - Thin wrappers: heartbeat, error classification
4. **Config**`src/config-parser.ts` - YAML loading, schema validation 4. **Container**`src/services/container.ts` - Per-workflow DI
5. **Session**`src/session-manager.ts` - Agent definitions, execution order 5. **Services**`src/services/agent-execution.ts` - Agent lifecycle
6. **Audit**`src/audit/audit-session.ts` - Logging facade, metrics tracking 6. **Config**`src/config-parser.ts` via `src/services/config-loader.ts`
7. **Executor**`src/ai/claude-executor.ts` - SDK calls, MCP setup, retry logic 7. **Prompts**`src/services/prompt-manager.ts`
8. **Validation**`src/queue-validation.ts` - Deliverable checks 8. **Audit**`src/audit/audit-session.ts` - Logging facade, metrics tracking
9. **Executor**`src/ai/claude-executor.ts` - SDK calls, MCP setup, retry logic
10. **Validation**`src/services/queue-validation.ts` - Deliverable checks
## Step 4: Identify Root Cause ## Step 4: Identify Root Cause
@@ -58,7 +61,10 @@ For Shannon, trace through these layers:
| Cost/timing not tracked | Metrics not reloaded before update | Add `metricsTracker.reload()` before updates | | Cost/timing not tracked | Metrics not reloaded before update | Add `metricsTracker.reload()` before updates |
| session.json corrupted | Partial write during crash | Delete and restart, or restore from backup | | session.json corrupted | Partial write during crash | Delete and restart, or restore from backup |
| YAML config rejected | Invalid schema or unsafe content | Run through AJV validator manually | | YAML config rejected | Invalid schema or unsafe content | Run through AJV validator manually |
| Prompt variable not replaced | Missing `{{VARIABLE}}` in context | Check `prompt-manager.ts` interpolation | | Prompt variable not replaced | Missing `{{VARIABLE}}` in context | Check `src/services/prompt-manager.ts` interpolation |
| Service returns Err result | Check `ErrorCode` in Result | Trace through `classifyErrorForTemporal()` in `src/services/error-handling.ts` |
| Container not found | `getOrCreateContainer()` not called | Check activity setup code in `src/temporal/activities.ts` |
| ActivityLogger undefined | `createActivityLogger()` not called | Must be called at top of each activity function |
**MCP Server Issues:** **MCP Server Issues:**
```bash ```bash
@@ -123,6 +129,8 @@ shannon <URL> <REPO> --pipeline-testing
## Quick Reference: Error Types ## Quick Reference: Error Types
`ErrorCode` enum in `src/types/errors.ts` provides finer-grained classification used by `classifyErrorForTemporal()` in `src/services/error-handling.ts`.
| PentestError Type | Meaning | Retryable? | | PentestError Type | Meaning | Retryable? |
|-------------------|---------|------------| |-------------------|---------|------------|
| `config` | Configuration file issues | No | | `config` | Configuration file issues | No |
+11
View File
@@ -19,6 +19,8 @@ git diff HEAD
- [ ] **Retryable flag matches behavior** - If error will be retried, set `retryable: true` - [ ] **Retryable flag matches behavior** - If error will be retried, set `retryable: true`
- [ ] **Context includes debugging info** - Add relevant paths, tool names, error codes to context object - [ ] **Context includes debugging info** - Add relevant paths, tool names, error codes to context object
- [ ] **Never swallow errors silently** - Always log or propagate errors - [ ] **Never swallow errors silently** - Always log or propagate errors
- [ ] **Use ErrorCode enum** - Prefer `ErrorCode.CONFIG_INVALID` over string matching for classification
- [ ] **Result<T,E> for service returns** - Services return `Result`, not throw
### Audit System & Concurrency (CRITICAL) ### Audit System & Concurrency (CRITICAL)
- [ ] **Mutex protection for parallel operations** - Use `sessionMutex.lock()` when updating `session.json` during parallel agent execution - [ ] **Mutex protection for parallel operations** - Use `sessionMutex.lock()` when updating `session.json` during parallel agent execution
@@ -41,6 +43,13 @@ git diff HEAD
- [ ] **Duplicate rule detection** - Same `type:url_path` cannot appear twice - [ ] **Duplicate rule detection** - Same `type:url_path` cannot appear twice
- [ ] **JSON Schema validation before use** - Config must pass AJV validation - [ ] **JSON Schema validation before use** - Config must pass AJV validation
### Services Layer & DI Container (CRITICAL)
- [ ] **Business logic in services, not activities** — Activities: heartbeat loop, error classification, container calls only. Domain logic → `src/services/`
- [ ] **Services accept ActivityLogger** — Never import `@temporalio/*` in services. Use `ActivityLogger` interface from `src/types/`
- [ ] **Result type for fallible operations** — Service methods return `Result<T, PentestError>`, unwrap with `isOk()`/`isErr()`. Activities call `executeOrThrow()` at the boundary
- [ ] **Container lifecycle**`getOrCreateContainer()` at activity start, `removeContainer()` only in workflow cleanup
- [ ] **AuditSession not in container** — Must be passed per-agent call (parallel safety)
### Session & Agent Management (CRITICAL) ### Session & Agent Management (CRITICAL)
- [ ] **Deliverable dependencies respected** - Exploitation agents only run if vulnerability queue exists AND has items - [ ] **Deliverable dependencies respected** - Exploitation agents only run if vulnerability queue exists AND has items
- [ ] **Queue validation before exploitation** - Use `safeValidateQueueAndDeliverable()` to check eligibility - [ ] **Queue validation before exploitation** - Use `safeValidateQueueAndDeliverable()` to check eligibility
@@ -91,6 +100,8 @@ git diff HEAD
- [ ] **Duplicate retry logic** - Don't implement retry at both caller and callee level - [ ] **Duplicate retry logic** - Don't implement retry at both caller and callee level
- [ ] **Hardcoded error message matching** - Prefer error codes over regex on error.message - [ ] **Hardcoded error message matching** - Prefer error codes over regex on error.message
- [ ] **Missing timeout on long operations** - Git operations and API calls should have timeouts - [ ] **Missing timeout on long operations** - Git operations and API calls should have timeouts
- [ ] **Console.log in services** — Use `ActivityLogger`. Only CLI display code (`client.ts`, `worker.ts`, `output-formatters.ts`) uses console.log
- [ ] **Temporal imports in services** — Services must stay Temporal-agnostic. If you need Temporal APIs, it belongs in activities
### Code Quality ### Code Quality
- [ ] **No dead code added** - Remove unused imports, functions, variables - [ ] **No dead code added** - Remove unused imports, functions, variables
+35 -13
View File
@@ -41,18 +41,20 @@ npm run build
## Architecture ## Architecture
### Core Modules ### Core Modules
- `src/session-manager.ts` — Agent definitions, execution order, parallel groups - `src/session-manager.ts` — Agent definitions (`AGENTS` record). Agent types in `src/types/agents.ts`
- `src/ai/claude-executor.ts` — Claude Agent SDK integration with retry logic and git checkpoints
- `src/config-parser.ts` — YAML config parsing with JSON Schema validation - `src/config-parser.ts` — YAML config parsing with JSON Schema validation
- `src/error-handling.ts` — Categorized error types (PentestError, ConfigError, NetworkError) with retry logic - `src/ai/claude-executor.ts` — Claude Agent SDK integration with retry logic
- `src/tool-checker.ts` — Validates external security tool availability before execution - `src/services/` — Business logic layer (Temporal-agnostic). Activities delegate here. Key: `agent-execution.ts`, `error-handling.ts`, `container.ts`
- `src/queue-validation.ts` — Deliverable validation and agent prerequisites - `src/types/` — Consolidated types: `Result<T,E>`, `ErrorCode`, `AgentName`, `ActivityLogger`, etc.
- `src/utils/` — Shared utilities (file I/O, formatting, concurrency)
### Temporal Orchestration ### Temporal Orchestration
Durable workflow orchestration with crash recovery, queryable progress, intelligent retry, and parallel execution (5 concurrent agents in vuln/exploit phases). Durable workflow orchestration with crash recovery, queryable progress, intelligent retry, and parallel execution (5 concurrent agents in vuln/exploit phases).
- `src/temporal/workflows.ts` — Main workflow (`pentestPipelineWorkflow`) - `src/temporal/workflows.ts` — Main workflow (`pentestPipelineWorkflow`)
- `src/temporal/activities.ts`Activity implementations with heartbeats - `src/temporal/activities.ts`Thin wrappers — heartbeat loop, error classification, container lifecycle. Business logic delegated to `src/services/`
- `src/temporal/activity-logger.ts``TemporalActivityLogger` implementation of `ActivityLogger` interface
- `src/temporal/summary-mapper.ts` — Maps `PipelineSummary` to `WorkflowSummary`
- `src/temporal/worker.ts` — Worker entry point - `src/temporal/worker.ts` — Worker entry point
- `src/temporal/client.ts` — CLI client for starting workflows - `src/temporal/client.ts` — CLI client for starting workflows
- `src/temporal/shared.ts` — Types, interfaces, query definitions - `src/temporal/shared.ts` — Types, interfaces, query definitions
@@ -66,30 +68,32 @@ Durable workflow orchestration with crash recovery, queryable progress, intellig
### Supporting Systems ### Supporting Systems
- **Configuration** — YAML configs in `configs/` with JSON Schema validation (`config-schema.json`). Supports auth settings, MFA/TOTP, and per-app testing parameters - **Configuration** — YAML configs in `configs/` with JSON Schema validation (`config-schema.json`). Supports auth settings, MFA/TOTP, and per-app testing parameters
- **Prompts** — Per-phase templates in `prompts/` with variable substitution (`{{TARGET_URL}}`, `{{CONFIG_CONTEXT}}`). Shared partials in `prompts/shared/` via `prompt-manager.ts` - **Prompts** — Per-phase templates in `prompts/` with variable substitution (`{{TARGET_URL}}`, `{{CONFIG_CONTEXT}}`). Shared partials in `prompts/shared/` via `src/services/prompt-manager.ts`
- **SDK Integration** — Uses `@anthropic-ai/claude-agent-sdk` with `maxTurns: 10_000` and `bypassPermissions` mode. Playwright MCP for browser automation, TOTP generation via MCP tool. Login flow template at `prompts/shared/login-instructions.txt` supports form, SSO, API, and basic auth - **SDK Integration** — Uses `@anthropic-ai/claude-agent-sdk` with `maxTurns: 10_000` and `bypassPermissions` mode. Playwright MCP for browser automation, TOTP generation via MCP tool. Login flow template at `prompts/shared/login-instructions.txt` supports form, SSO, API, and basic auth
- **Audit System** — Crash-safe append-only logging in `audit-logs/{hostname}_{sessionId}/`. Tracks session metrics, per-agent logs, prompts, and deliverables - **Audit System** — Crash-safe append-only logging in `audit-logs/{hostname}_{sessionId}/`. Tracks session metrics, per-agent logs, prompts, and deliverables. WorkflowLogger (`audit/workflow-logger.ts`) provides unified human-readable per-workflow logs, backed by LogStream (`audit/log-stream.ts`) shared stream primitive
- **Deliverables** — Saved to `deliverables/` in the target repo via the `save_deliverable` MCP tool - **Deliverables** — Saved to `deliverables/` in the target repo via the `save_deliverable` MCP tool
- **Workspaces & Resume** — Named workspaces via `WORKSPACE=<name>` or auto-named from URL+timestamp. Resume passes `--workspace` to the Temporal client (`src/temporal/client.ts`), which loads `session.json` to detect completed agents. `loadResumeState()` in `src/temporal/activities.ts` validates deliverable existence, restores git checkpoints, and cleans up incomplete deliverables. Workspace listing via `src/temporal/workspaces.ts` - **Workspaces & Resume** — Named workspaces via `WORKSPACE=<name>` or auto-named from URL+timestamp. Resume passes `--workspace` to the Temporal client (`src/temporal/client.ts`), which loads `session.json` to detect completed agents. `loadResumeState()` in `src/temporal/activities.ts` validates deliverable existence, restores git checkpoints, and cleans up incomplete deliverables. Workspace listing via `src/temporal/workspaces.ts`
## Development Notes ## Development Notes
### Adding a New Agent ### Adding a New Agent
1. Define agent in `src/session-manager.ts` (add to `AGENT_QUEUE` and parallel group) 1. Define agent in `src/session-manager.ts` (add to `AGENTS` record). `ALL_AGENTS`/`AgentName` types live in `src/types/agents.ts`
2. Create prompt template in `prompts/` (e.g., `vuln-newtype.txt`) 2. Create prompt template in `prompts/` (e.g., `vuln-newtype.txt`)
3. Add activity function in `src/temporal/activities.ts` 3. Two-layer pattern: add a thin activity wrapper in `src/temporal/activities.ts` (heartbeat + error classification). `AgentExecutionService` in `src/services/agent-execution.ts` handles the agent lifecycle automatically via the `AGENTS` registry
4. Register activity in `src/temporal/workflows.ts` within the appropriate phase 4. Register activity in `src/temporal/workflows.ts` within the appropriate phase
### Modifying Prompts ### Modifying Prompts
- Variable substitution: `{{TARGET_URL}}`, `{{CONFIG_CONTEXT}}`, `{{LOGIN_INSTRUCTIONS}}` - Variable substitution: `{{TARGET_URL}}`, `{{CONFIG_CONTEXT}}`, `{{LOGIN_INSTRUCTIONS}}`
- Shared partials in `prompts/shared/` included via `prompt-manager.ts` - Shared partials in `prompts/shared/` included via `src/services/prompt-manager.ts`
- Test with `PIPELINE_TESTING=true` for fast iteration - Test with `PIPELINE_TESTING=true` for fast iteration
### Key Design Patterns ### Key Design Patterns
- **Configuration-Driven** — YAML configs with JSON Schema validation - **Configuration-Driven** — YAML configs with JSON Schema validation
- **Progressive Analysis** — Each phase builds on previous results - **Progressive Analysis** — Each phase builds on previous results
- **SDK-First** — Claude Agent SDK handles autonomous analysis - **SDK-First** — Claude Agent SDK handles autonomous analysis
- **Modular Error Handling** — Categorized errors with automatic retry (3 attempts per agent) - **Modular Error Handling** — `ErrorCode` enum, `Result<T,E>` for explicit error propagation, automatic retry (3 attempts per agent)
- **Services Boundary** — Activities are thin Temporal wrappers; `src/services/` owns business logic, accepts `ActivityLogger`, returns `Result<T,E>`. No Temporal imports in services
- **DI Container** — Per-workflow in `src/services/container.ts`. `AuditSession` excluded (parallel safety)
### Security ### Security
Defensive security tool only. Use only on systems you own or have explicit permission to test. Defensive security tool only. Use only on systems you own or have explicit permission to test.
@@ -111,18 +115,36 @@ Defensive security tool only. Use only on systems you own or have explicit permi
- Use `function` keyword for top-level functions (not arrow functions) - Use `function` keyword for top-level functions (not arrow functions)
- Explicit return type annotations on exported/top-level functions - Explicit return type annotations on exported/top-level functions
- Prefer `readonly` for data that shouldn't be mutated - Prefer `readonly` for data that shouldn't be mutated
- `exactOptionalPropertyTypes` is enabled — use spread for optional props, not direct `undefined` assignment
### Avoid ### Avoid
- Combining multiple concerns into a single function to "save lines" - Combining multiple concerns into a single function to "save lines"
- Dense callback chains when sequential logic is clearer - Dense callback chains when sequential logic is clearer
- Sacrificing readability for DRY — some repetition is fine if clearer - Sacrificing readability for DRY — some repetition is fine if clearer
- Abstractions for one-time operations - Abstractions for one-time operations
- Backwards-compatibility shims, deprecated wrappers, or re-exports for removed code — delete the old code, don't preserve it
### Comments
Comments must be **timeless** — no references to this conversation, refactoring history, or the AI.
**Patterns used in this codebase:**
- `/** JSDoc */` — file headers (after license) and exported functions/interfaces
- `// N. Description` — numbered sequential steps inside function bodies. Use when a
function has 3+ distinct phases where at least one isn't immediately obvious from the
code. Each step marks the start of a logical phase. Reference: `AgentExecutionService.execute`
(steps 1-9) and `injectModelIntoReport` (steps 1-5)
- `// === Section ===` — high-level dividers between groups of functions in long files,
or to label major branching/classification blocks (e.g., `// === SPENDING CAP SAFEGUARD ===`).
Not for sequential steps inside function bodies — use numbered steps for that
- `// NOTE:` / `// WARNING:` / `// IMPORTANT:` — gotchas and constraints
**Never:** obvious comments, conversation references ("as discussed"), history ("moved from X")
## Key Files ## Key Files
**Entry Points:** `src/temporal/workflows.ts`, `src/temporal/activities.ts`, `src/temporal/worker.ts`, `src/temporal/client.ts` **Entry Points:** `src/temporal/workflows.ts`, `src/temporal/activities.ts`, `src/temporal/worker.ts`, `src/temporal/client.ts`
**Core Logic:** `src/session-manager.ts`, `src/ai/claude-executor.ts`, `src/config-parser.ts`, `src/audit/` **Core Logic:** `src/session-manager.ts`, `src/ai/claude-executor.ts`, `src/config-parser.ts`, `src/services/`, `src/audit/`
**Config:** `shannon` (CLI), `docker-compose.yml`, `configs/`, `prompts/` **Config:** `shannon` (CLI), `docker-compose.yml`, `configs/`, `prompts/`
-1
View File
@@ -21,7 +21,6 @@
"figlet": "^1.9.3", "figlet": "^1.9.3",
"gradient-string": "^3.0.0", "gradient-string": "^3.0.0",
"js-yaml": "^4.1.0", "js-yaml": "^4.1.0",
"zod": "^4.3.6",
"zx": "^8.0.0" "zx": "^8.0.0"
}, },
"devDependencies": { "devDependencies": {
-1
View File
@@ -23,7 +23,6 @@
"figlet": "^1.9.3", "figlet": "^1.9.3",
"gradient-string": "^3.0.0", "gradient-string": "^3.0.0",
"js-yaml": "^4.1.0", "js-yaml": "^4.1.0",
"zod": "^4.3.6",
"zx": "^8.0.0" "zx": "^8.0.0"
}, },
"devDependencies": { "devDependencies": {
+53 -208
View File
@@ -7,18 +7,16 @@
// Production Claude agent execution with retry, git checkpoints, and audit logging // Production Claude agent execution with retry, git checkpoints, and audit logging
import { fs, path } from 'zx'; import { fs, path } from 'zx';
import chalk, { type ChalkInstance } from 'chalk';
import { query } from '@anthropic-ai/claude-agent-sdk'; import { query } from '@anthropic-ai/claude-agent-sdk';
import { isRetryableError, getRetryDelay, PentestError } from '../error-handling.js'; import { isRetryableError, PentestError } from '../services/error-handling.js';
import { timingResults, Timer } from '../utils/metrics.js'; import { isSpendingCapBehavior } from '../utils/billing-detection.js';
import { Timer } from '../utils/metrics.js';
import { formatTimestamp } from '../utils/formatting.js'; import { formatTimestamp } from '../utils/formatting.js';
import { createGitCheckpoint, commitGitSuccess, rollbackGitWorkspace, getGitCommitHash } from '../utils/git-manager.js'; import { AGENT_VALIDATORS, MCP_AGENT_MAPPING } from '../session-manager.js';
import { AGENT_VALIDATORS, MCP_AGENT_MAPPING } from '../constants.js';
import { AuditSession } from '../audit/index.js'; import { AuditSession } from '../audit/index.js';
import { createShannonHelperServer } from '../../mcp-server/dist/index.js'; import { createShannonHelperServer } from '../../mcp-server/dist/index.js';
import type { SessionMetadata } from '../audit/utils.js'; import { AGENTS } from '../session-manager.js';
import { getPromptNameForAgent } from '../types/agents.js';
import type { AgentName } from '../types/index.js'; import type { AgentName } from '../types/index.js';
import { dispatchMessage } from './message-handlers.js'; import { dispatchMessage } from './message-handlers.js';
@@ -26,6 +24,7 @@ import { detectExecutionContext, formatErrorOutput, formatCompletionMessage } fr
import { createProgressManager } from './progress-manager.js'; import { createProgressManager } from './progress-manager.js';
import { createAuditLogger } from './audit-logger.js'; import { createAuditLogger } from './audit-logger.js';
import { getActualModelName } from './router-utils.js'; import { getActualModelName } from './router-utils.js';
import type { ActivityLogger } from '../types/activity-logger.js';
declare global { declare global {
var SHANNON_DISABLE_LOADER: boolean | undefined; var SHANNON_DISABLE_LOADER: boolean | undefined;
@@ -58,24 +57,27 @@ type McpServer = ReturnType<typeof createShannonHelperServer> | StdioMcpServer;
// Configures MCP servers for agent execution, with Docker-specific Chromium handling // Configures MCP servers for agent execution, with Docker-specific Chromium handling
function buildMcpServers( function buildMcpServers(
sourceDir: string, sourceDir: string,
agentName: string | null agentName: string | null,
logger: ActivityLogger
): Record<string, McpServer> { ): Record<string, McpServer> {
// 1. Create the shannon-helper server (always present)
const shannonHelperServer = createShannonHelperServer(sourceDir); const shannonHelperServer = createShannonHelperServer(sourceDir);
const mcpServers: Record<string, McpServer> = { const mcpServers: Record<string, McpServer> = {
'shannon-helper': shannonHelperServer, 'shannon-helper': shannonHelperServer,
}; };
// 2. Look up the agent's Playwright MCP mapping
if (agentName) { if (agentName) {
const promptName = getPromptNameForAgent(agentName as AgentName); const promptTemplate = AGENTS[agentName as AgentName].promptTemplate;
const playwrightMcpName = MCP_AGENT_MAPPING[promptName as keyof typeof MCP_AGENT_MAPPING] || null; const playwrightMcpName = MCP_AGENT_MAPPING[promptTemplate as keyof typeof MCP_AGENT_MAPPING] || null;
if (playwrightMcpName) { if (playwrightMcpName) {
console.log(chalk.gray(` Assigned ${agentName} -> ${playwrightMcpName}`)); logger.info(`Assigned ${agentName} -> ${playwrightMcpName}`);
const userDataDir = `/tmp/${playwrightMcpName}`; const userDataDir = `/tmp/${playwrightMcpName}`;
// Docker uses system Chromium; local dev uses Playwright's bundled browsers // 3. Configure Playwright MCP args with Docker/local browser handling
const isDocker = process.env.SHANNON_DOCKER === 'true'; const isDocker = process.env.SHANNON_DOCKER === 'true';
const mcpArgs: string[] = [ const mcpArgs: string[] = [
@@ -84,7 +86,6 @@ function buildMcpServers(
'--user-data-dir', userDataDir, '--user-data-dir', userDataDir,
]; ];
// Docker: Use system Chromium; Local: Use Playwright's bundled browsers
if (isDocker) { if (isDocker) {
mcpArgs.push('--executable-path', '/usr/bin/chromium-browser'); mcpArgs.push('--executable-path', '/usr/bin/chromium-browser');
mcpArgs.push('--browser', 'chromium'); mcpArgs.push('--browser', 'chromium');
@@ -107,6 +108,7 @@ function buildMcpServers(
} }
} }
// 4. Return configured servers
return mcpServers; return mcpServers;
} }
@@ -142,23 +144,23 @@ async function writeErrorLog(
}; };
const logPath = path.join(sourceDir, 'error.log'); const logPath = path.join(sourceDir, 'error.log');
await fs.appendFile(logPath, JSON.stringify(errorLog) + '\n'); await fs.appendFile(logPath, JSON.stringify(errorLog) + '\n');
} catch (logError) { } catch {
const logErrMsg = logError instanceof Error ? logError.message : String(logError); // Best-effort error log writing - don't propagate failures
console.log(chalk.gray(` (Failed to write error log: ${logErrMsg})`));
} }
} }
export async function validateAgentOutput( export async function validateAgentOutput(
result: ClaudePromptResult, result: ClaudePromptResult,
agentName: string | null, agentName: string | null,
sourceDir: string sourceDir: string,
logger: ActivityLogger
): Promise<boolean> { ): Promise<boolean> {
console.log(chalk.blue(` Validating ${agentName} agent output`)); logger.info(`Validating ${agentName} agent output`);
try { try {
// Check if agent completed successfully // Check if agent completed successfully
if (!result.success || !result.result) { if (!result.success || !result.result) {
console.log(chalk.red(` Validation failed: Agent execution was unsuccessful`)); logger.error('Validation failed: Agent execution was unsuccessful');
return false; return false;
} }
@@ -166,28 +168,27 @@ export async function validateAgentOutput(
const validator = agentName ? AGENT_VALIDATORS[agentName as keyof typeof AGENT_VALIDATORS] : undefined; const validator = agentName ? AGENT_VALIDATORS[agentName as keyof typeof AGENT_VALIDATORS] : undefined;
if (!validator) { if (!validator) {
console.log(chalk.yellow(` No validator found for agent "${agentName}" - assuming success`)); logger.warn(`No validator found for agent "${agentName}" - assuming success`);
console.log(chalk.green(` Validation passed: Unknown agent with successful result`)); logger.info('Validation passed: Unknown agent with successful result');
return true; return true;
} }
console.log(chalk.blue(` Using validator for agent: ${agentName}`)); logger.info(`Using validator for agent: ${agentName}`, { sourceDir });
console.log(chalk.blue(` Source directory: ${sourceDir}`));
// Apply validation function // Apply validation function
const validationResult = await validator(sourceDir); const validationResult = await validator(sourceDir, logger);
if (validationResult) { if (validationResult) {
console.log(chalk.green(` Validation passed: Required files/structure present`)); logger.info('Validation passed: Required files/structure present');
} else { } else {
console.log(chalk.red(` Validation failed: Missing required deliverable files`)); logger.error('Validation failed: Missing required deliverable files');
} }
return validationResult; return validationResult;
} catch (error) { } catch (error) {
const errMsg = error instanceof Error ? error.message : String(error); const errMsg = error instanceof Error ? error.message : String(error);
console.log(chalk.red(` Validation failed with error: ${errMsg}`)); logger.error(`Validation failed with error: ${errMsg}`);
return false; return false;
} }
} }
@@ -200,14 +201,14 @@ export async function runClaudePrompt(
context: string = '', context: string = '',
description: string = 'Claude analysis', description: string = 'Claude analysis',
agentName: string | null = null, agentName: string | null = null,
colorFn: ChalkInstance = chalk.cyan,
sessionMetadata: SessionMetadata | null = null,
auditSession: AuditSession | null = null, auditSession: AuditSession | null = null,
attemptNumber: number = 1 logger: ActivityLogger
): Promise<ClaudePromptResult> { ): Promise<ClaudePromptResult> {
// 1. Initialize timing and prompt
const timer = new Timer(`agent-${description.toLowerCase().replace(/\s+/g, '-')}`); const timer = new Timer(`agent-${description.toLowerCase().replace(/\s+/g, '-')}`);
const fullPrompt = context ? `${context}\n\n${prompt}` : prompt; const fullPrompt = context ? `${context}\n\n${prompt}` : prompt;
// 2. Set up progress and audit infrastructure
const execContext = detectExecutionContext(description); const execContext = detectExecutionContext(description);
const progress = createProgressManager( const progress = createProgressManager(
{ description, useCleanOutput: execContext.useCleanOutput }, { description, useCleanOutput: execContext.useCleanOutput },
@@ -215,11 +216,12 @@ export async function runClaudePrompt(
); );
const auditLogger = createAuditLogger(auditSession); const auditLogger = createAuditLogger(auditSession);
console.log(chalk.blue(` Running Claude Code: ${description}...`)); logger.info(`Running Claude Code: ${description}...`);
const mcpServers = buildMcpServers(sourceDir, agentName); // 3. Configure MCP servers
const mcpServers = buildMcpServers(sourceDir, agentName, logger);
// Build env vars to pass to SDK subprocesses // 4. Build env vars to pass to SDK subprocesses
const sdkEnv: Record<string, string> = { const sdkEnv: Record<string, string> = {
CLAUDE_CODE_MAX_OUTPUT_TOKENS: process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS || '64000', CLAUDE_CODE_MAX_OUTPUT_TOKENS: process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS || '64000',
}; };
@@ -230,6 +232,7 @@ export async function runClaudePrompt(
sdkEnv.CLAUDE_CODE_OAUTH_TOKEN = process.env.CLAUDE_CODE_OAUTH_TOKEN; sdkEnv.CLAUDE_CODE_OAUTH_TOKEN = process.env.CLAUDE_CODE_OAUTH_TOKEN;
} }
// 5. Configure SDK options
const options = { const options = {
model: 'claude-sonnet-4-5-20250929', model: 'claude-sonnet-4-5-20250929',
maxTurns: 10_000, maxTurns: 10_000,
@@ -241,7 +244,7 @@ export async function runClaudePrompt(
}; };
if (!execContext.useCleanOutput) { if (!execContext.useCleanOutput) {
console.log(chalk.gray(` SDK Options: maxTurns=${options.maxTurns}, cwd=${sourceDir}, permissions=BYPASS`)); logger.info(`SDK Options: maxTurns=${options.maxTurns}, cwd=${sourceDir}, permissions=BYPASS`);
} }
let turnCount = 0; let turnCount = 0;
@@ -252,10 +255,11 @@ export async function runClaudePrompt(
progress.start(); progress.start();
try { try {
// 6. Process the message stream
const messageLoopResult = await processMessageStream( const messageLoopResult = await processMessageStream(
fullPrompt, fullPrompt,
options, options,
{ execContext, description, colorFn, progress, auditLogger }, { execContext, description, progress, auditLogger, logger },
timer timer
); );
@@ -266,30 +270,21 @@ export async function runClaudePrompt(
const model = messageLoopResult.model; const model = messageLoopResult.model;
// === SPENDING CAP SAFEGUARD === // === SPENDING CAP SAFEGUARD ===
// Defense-in-depth: Detect spending cap that slipped through detectApiError(). // 7. Defense-in-depth: Detect spending cap that slipped through detectApiError().
// When spending cap is hit, Claude returns a short message with $0 cost. // Uses consolidated billing detection from utils/billing-detection.ts
// Legitimate agent work NEVER costs $0 with only 1-2 turns. if (isSpendingCapBehavior(turnCount, totalCost, result || '')) {
if (turnCount <= 2 && totalCost === 0) { throw new PentestError(
const resultLower = (result || '').toLowerCase(); `Spending cap likely reached (turns=${turnCount}, cost=$0): ${result?.slice(0, 100)}`,
const BILLING_KEYWORDS = ['spending', 'cap', 'limit', 'budget', 'resets']; 'billing',
const looksLikeBillingError = BILLING_KEYWORDS.some((kw) => true // Retryable - Temporal will use 5-30 min backoff
resultLower.includes(kw)
); );
if (looksLikeBillingError) {
throw new PentestError(
`Spending cap likely reached (turns=${turnCount}, cost=$0): ${result?.slice(0, 100)}`,
'billing',
true // Retryable - Temporal will use 5-30 min backoff
);
}
} }
// 8. Finalize successful result
const duration = timer.stop(); const duration = timer.stop();
timingResults.agents[execContext.agentKey] = duration;
if (apiErrorDetected) { if (apiErrorDetected) {
console.log(chalk.yellow(` API Error detected in ${description} - will validate deliverables before failing`)); logger.warn(`API Error detected in ${description} - will validate deliverables before failing`);
} }
progress.finish(formatCompletionMessage(execContext, description, turnCount, duration)); progress.finish(formatCompletionMessage(execContext, description, turnCount, duration));
@@ -306,8 +301,8 @@ export async function runClaudePrompt(
}; };
} catch (error) { } catch (error) {
// 9. Handle errors — log, write error file, return failure
const duration = timer.stop(); const duration = timer.stop();
timingResults.agents[execContext.agentKey] = duration;
const err = error as Error & { code?: string; status?: number }; const err = error as Error & { code?: string; status?: number };
@@ -340,9 +335,9 @@ interface MessageLoopResult {
interface MessageLoopDeps { interface MessageLoopDeps {
execContext: ReturnType<typeof detectExecutionContext>; execContext: ReturnType<typeof detectExecutionContext>;
description: string; description: string;
colorFn: ChalkInstance;
progress: ReturnType<typeof createProgressManager>; progress: ReturnType<typeof createProgressManager>;
auditLogger: ReturnType<typeof createAuditLogger>; auditLogger: ReturnType<typeof createAuditLogger>;
logger: ActivityLogger;
} }
async function processMessageStream( async function processMessageStream(
@@ -351,7 +346,7 @@ async function processMessageStream(
deps: MessageLoopDeps, deps: MessageLoopDeps,
timer: Timer timer: Timer
): Promise<MessageLoopResult> { ): Promise<MessageLoopResult> {
const { execContext, description, colorFn, progress, auditLogger } = deps; const { execContext, description, progress, auditLogger, logger } = deps;
const HEARTBEAT_INTERVAL = 30000; const HEARTBEAT_INTERVAL = 30000;
let turnCount = 0; let turnCount = 0;
@@ -365,7 +360,7 @@ async function processMessageStream(
// Heartbeat logging when loader is disabled // Heartbeat logging when loader is disabled
const now = Date.now(); const now = Date.now();
if (global.SHANNON_DISABLE_LOADER && now - lastHeartbeat > HEARTBEAT_INTERVAL) { if (global.SHANNON_DISABLE_LOADER && now - lastHeartbeat > HEARTBEAT_INTERVAL) {
console.log(chalk.blue(` [${Math.floor((now - timer.startTime) / 1000)}s] ${description} running... (Turn ${turnCount})`)); logger.info(`[${Math.floor((now - timer.startTime) / 1000)}s] ${description} running... (Turn ${turnCount})`);
lastHeartbeat = now; lastHeartbeat = now;
} }
@@ -377,7 +372,7 @@ async function processMessageStream(
const dispatchResult = await dispatchMessage( const dispatchResult = await dispatchMessage(
message as { type: string; subtype?: string }, message as { type: string; subtype?: string },
turnCount, turnCount,
{ execContext, description, colorFn, progress, auditLogger } { execContext, description, progress, auditLogger, logger }
); );
if (dispatchResult.type === 'throw') { if (dispatchResult.type === 'throw') {
@@ -403,153 +398,3 @@ async function processMessageStream(
return { turnCount, result, apiErrorDetected, cost, model }; return { turnCount, result, apiErrorDetected, cost, model };
} }
// Main entry point for agent execution. Handles retries, git checkpoints, and validation.
export async function runClaudePromptWithRetry(
prompt: string,
sourceDir: string,
_allowedTools: string = 'Read',
context: string = '',
description: string = 'Claude analysis',
agentName: string | null = null,
colorFn: ChalkInstance = chalk.cyan,
sessionMetadata: SessionMetadata | null = null
): Promise<ClaudePromptResult> {
const maxRetries = 3;
let lastError: Error | undefined;
let retryContext = context;
console.log(chalk.cyan(`Starting ${description} with ${maxRetries} max attempts`));
let auditSession: AuditSession | null = null;
if (sessionMetadata && agentName) {
auditSession = new AuditSession(sessionMetadata);
await auditSession.initialize();
}
for (let attempt = 1; attempt <= maxRetries; attempt++) {
await createGitCheckpoint(sourceDir, description, attempt);
if (auditSession && agentName) {
const fullPrompt = retryContext ? `${retryContext}\n\n${prompt}` : prompt;
await auditSession.startAgent(agentName, fullPrompt, attempt);
}
try {
const result = await runClaudePrompt(
prompt, sourceDir, retryContext,
description, agentName, colorFn, sessionMetadata, auditSession, attempt
);
if (result.success) {
const validationPassed = await validateAgentOutput(result, agentName, sourceDir);
if (validationPassed) {
if (result.apiErrorDetected) {
console.log(chalk.yellow(`Validation: Ready for exploitation despite API error warnings`));
}
if (auditSession && agentName) {
const commitHash = await getGitCommitHash(sourceDir);
const endResult: {
attemptNumber: number;
duration_ms: number;
cost_usd: number;
success: true;
checkpoint?: string;
} = {
attemptNumber: attempt,
duration_ms: result.duration,
cost_usd: result.cost || 0,
success: true,
};
if (commitHash) {
endResult.checkpoint = commitHash;
}
await auditSession.endAgent(agentName, endResult);
}
await commitGitSuccess(sourceDir, description);
console.log(chalk.green.bold(`${description} completed successfully on attempt ${attempt}/${maxRetries}`));
return result;
// Validation failure is retryable - agent might succeed on retry with cleaner workspace
} else {
console.log(chalk.yellow(`${description} completed but output validation failed`));
if (auditSession && agentName) {
await auditSession.endAgent(agentName, {
attemptNumber: attempt,
duration_ms: result.duration,
cost_usd: result.partialCost || result.cost || 0,
success: false,
error: 'Output validation failed',
isFinalAttempt: attempt === maxRetries
});
}
if (result.apiErrorDetected) {
console.log(chalk.yellow(`API Error detected with validation failure - treating as retryable`));
lastError = new Error('API Error: terminated with validation failure');
} else {
lastError = new Error('Output validation failed');
}
if (attempt < maxRetries) {
await rollbackGitWorkspace(sourceDir, 'validation failure');
continue;
} else {
throw new PentestError(
`Agent ${description} failed output validation after ${maxRetries} attempts. Required deliverable files were not created.`,
'validation',
false,
{ description, sourceDir, attemptsExhausted: maxRetries }
);
}
}
}
} catch (error) {
const err = error as Error & { duration?: number; cost?: number; partialResults?: unknown };
lastError = err;
if (auditSession && agentName) {
await auditSession.endAgent(agentName, {
attemptNumber: attempt,
duration_ms: err.duration || 0,
cost_usd: err.cost || 0,
success: false,
error: err.message,
isFinalAttempt: attempt === maxRetries
});
}
if (!isRetryableError(err)) {
console.log(chalk.red(`${description} failed with non-retryable error: ${err.message}`));
await rollbackGitWorkspace(sourceDir, 'non-retryable error cleanup');
throw err;
}
if (attempt < maxRetries) {
await rollbackGitWorkspace(sourceDir, 'retryable error cleanup');
const delay = getRetryDelay(err, attempt);
const delaySeconds = (delay / 1000).toFixed(1);
console.log(chalk.yellow(`${description} failed (attempt ${attempt}/${maxRetries})`));
console.log(chalk.gray(` Error: ${err.message}`));
console.log(chalk.gray(` Workspace rolled back, retrying in ${delaySeconds}s...`));
if (err.partialResults) {
retryContext = `${context}\n\nPrevious partial results: ${JSON.stringify(err.partialResults)}`;
}
await new Promise(resolve => setTimeout(resolve, delay));
} else {
await rollbackGitWorkspace(sourceDir, 'final failure cleanup');
console.log(chalk.red(`${description} failed after ${maxRetries} attempts`));
console.log(chalk.red(` Final error: ${err.message}`));
}
}
}
throw lastError;
}
+30 -43
View File
@@ -4,20 +4,19 @@
// it under the terms of the GNU Affero General Public License version 3 // it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation. // as published by the Free Software Foundation.
// Pure functions for processing SDK message types import { PentestError } from '../services/error-handling.js';
import { ErrorCode } from '../types/errors.js';
import { PentestError } from '../error-handling.js'; import { matchesBillingTextPattern } from '../utils/billing-detection.js';
import { filterJsonToolCalls } from '../utils/output-formatter.js'; import { filterJsonToolCalls } from './output-formatters.js';
import { formatTimestamp } from '../utils/formatting.js'; import { formatTimestamp } from '../utils/formatting.js';
import chalk from 'chalk';
import { getActualModelName } from './router-utils.js'; import { getActualModelName } from './router-utils.js';
import type { ActivityLogger } from '../types/activity-logger.js';
import { import {
formatAssistantOutput, formatAssistantOutput,
formatResultOutput, formatResultOutput,
formatToolUseOutput, formatToolUseOutput,
formatToolResultOutput, formatToolResultOutput,
} from './output-formatters.js'; } from './output-formatters.js';
import { costResults } from '../utils/metrics.js';
import type { AuditLogger } from './audit-logger.js'; import type { AuditLogger } from './audit-logger.js';
import type { ProgressManager } from './progress-manager.js'; import type { ProgressManager } from './progress-manager.js';
import type { import type {
@@ -35,10 +34,9 @@ import type {
SystemInitMessage, SystemInitMessage,
ExecutionContext, ExecutionContext,
} from './types.js'; } from './types.js';
import type { ChalkInstance } from 'chalk';
// Handles both array and string content formats from SDK // Handles both array and string content formats from SDK
export function extractMessageContent(message: AssistantMessage): string { function extractMessageContent(message: AssistantMessage): string {
const messageContent = message.message; const messageContent = message.message;
if (Array.isArray(messageContent.content)) { if (Array.isArray(messageContent.content)) {
@@ -51,7 +49,7 @@ export function extractMessageContent(message: AssistantMessage): string {
} }
// Extracts only text content (no tool_use JSON) to avoid false positives in error detection // Extracts only text content (no tool_use JSON) to avoid false positives in error detection
export function extractTextOnlyContent(message: AssistantMessage): string { function extractTextOnlyContent(message: AssistantMessage): string {
const messageContent = message.message; const messageContent = message.message;
if (Array.isArray(messageContent.content)) { if (Array.isArray(messageContent.content)) {
@@ -64,7 +62,7 @@ export function extractTextOnlyContent(message: AssistantMessage): string {
return String(messageContent.content); return String(messageContent.content);
} }
export function detectApiError(content: string): ApiErrorDetection { function detectApiError(content: string): ApiErrorDetection {
if (!content || typeof content !== 'string') { if (!content || typeof content !== 'string') {
return { detected: false }; return { detected: false };
} }
@@ -75,25 +73,15 @@ export function detectApiError(content: string): ApiErrorDetection {
// When Claude Code hits its spending cap, it returns a short message like // When Claude Code hits its spending cap, it returns a short message like
// "Spending cap reached resets 8am" instead of throwing an error. // "Spending cap reached resets 8am" instead of throwing an error.
// These should retry with 5-30 min backoff so workflows can recover when cap resets. // These should retry with 5-30 min backoff so workflows can recover when cap resets.
const BILLING_PATTERNS = [ if (matchesBillingTextPattern(content)) {
'spending cap',
'spending limit',
'cap reached',
'budget exceeded',
'usage limit',
];
const isBillingError = BILLING_PATTERNS.some((pattern) =>
lowerContent.includes(pattern)
);
if (isBillingError) {
return { return {
detected: true, detected: true,
shouldThrow: new PentestError( shouldThrow: new PentestError(
`Billing limit reached: ${content.slice(0, 100)}`, `Billing limit reached: ${content.slice(0, 100)}`,
'billing', 'billing',
true // RETRYABLE - Temporal will use 5-30 min backoff true, // RETRYABLE - Temporal will use 5-30 min backoff
{},
ErrorCode.SPENDING_CAP_REACHED
), ),
}; };
} }
@@ -127,7 +115,9 @@ function handleStructuredError(
shouldThrow: new PentestError( shouldThrow: new PentestError(
`Billing error (structured): ${content.slice(0, 100)}`, `Billing error (structured): ${content.slice(0, 100)}`,
'billing', 'billing',
true // Retryable with backoff true, // Retryable with backoff
{},
ErrorCode.INSUFFICIENT_CREDITS
), ),
}; };
case 'rate_limit': case 'rate_limit':
@@ -136,7 +126,9 @@ function handleStructuredError(
shouldThrow: new PentestError( shouldThrow: new PentestError(
`Rate limit hit (structured): ${content.slice(0, 100)}`, `Rate limit hit (structured): ${content.slice(0, 100)}`,
'network', 'network',
true // Retryable with backoff true, // Retryable with backoff
{},
ErrorCode.API_RATE_LIMITED
), ),
}; };
case 'authentication_failed': case 'authentication_failed':
@@ -181,7 +173,7 @@ function handleStructuredError(
} }
} }
export function handleAssistantMessage( function handleAssistantMessage(
message: AssistantMessage, message: AssistantMessage,
turnCount: number turnCount: number
): AssistantResult { ): AssistantResult {
@@ -219,7 +211,7 @@ export function handleAssistantMessage(
} }
// Final message of a query with cost/duration info // Final message of a query with cost/duration info
export function handleResultMessage(message: ResultMessage): ResultData { function handleResultMessage(message: ResultMessage): ResultData {
const result: ResultData = { const result: ResultData = {
result: message.result || null, result: message.result || null,
cost: message.total_cost_usd || 0, cost: message.total_cost_usd || 0,
@@ -236,14 +228,14 @@ export function handleResultMessage(message: ResultMessage): ResultData {
if (message.stop_reason !== undefined) { if (message.stop_reason !== undefined) {
result.stop_reason = message.stop_reason; result.stop_reason = message.stop_reason;
if (message.stop_reason && message.stop_reason !== 'end_turn') { if (message.stop_reason && message.stop_reason !== 'end_turn') {
console.log(chalk.yellow(` Stop reason: ${message.stop_reason}`)); console.log(` Stop reason: ${message.stop_reason}`);
} }
} }
return result; return result;
} }
export function handleToolUseMessage(message: ToolUseMessage): ToolUseData { function handleToolUseMessage(message: ToolUseMessage): ToolUseData {
return { return {
toolName: message.name, toolName: message.name,
parameters: message.input || {}, parameters: message.input || {},
@@ -252,7 +244,7 @@ export function handleToolUseMessage(message: ToolUseMessage): ToolUseData {
} }
// Truncates long results for display (500 char limit), preserves full content for logging // Truncates long results for display (500 char limit), preserves full content for logging
export function handleToolResultMessage(message: ToolResultMessage): ToolResultData { function handleToolResultMessage(message: ToolResultMessage): ToolResultData {
const content = message.content; const content = message.content;
const contentStr = const contentStr =
typeof content === 'string' ? content : JSON.stringify(content, null, 2); typeof content === 'string' ? content : JSON.stringify(content, null, 2);
@@ -269,14 +261,12 @@ export function handleToolResultMessage(message: ToolResultMessage): ToolResultD
}; };
} }
// Output helper for console logging
function outputLines(lines: string[]): void { function outputLines(lines: string[]): void {
for (const line of lines) { for (const line of lines) {
console.log(line); console.log(line);
} }
} }
// Message dispatch result types
export type MessageDispatchAction = export type MessageDispatchAction =
| { type: 'continue'; apiErrorDetected?: boolean | undefined; model?: string | undefined } | { type: 'continue'; apiErrorDetected?: boolean | undefined; model?: string | undefined }
| { type: 'complete'; result: string | null; cost: number } | { type: 'complete'; result: string | null; cost: number }
@@ -285,9 +275,9 @@ export type MessageDispatchAction =
export interface MessageDispatchDeps { export interface MessageDispatchDeps {
execContext: ExecutionContext; execContext: ExecutionContext;
description: string; description: string;
colorFn: ChalkInstance;
progress: ProgressManager; progress: ProgressManager;
auditLogger: AuditLogger; auditLogger: AuditLogger;
logger: ActivityLogger;
} }
// Dispatches SDK messages to appropriate handlers and formatters // Dispatches SDK messages to appropriate handlers and formatters
@@ -296,7 +286,7 @@ export async function dispatchMessage(
turnCount: number, turnCount: number,
deps: MessageDispatchDeps deps: MessageDispatchDeps
): Promise<MessageDispatchAction> { ): Promise<MessageDispatchAction> {
const { execContext, description, colorFn, progress, auditLogger } = deps; const { execContext, description, progress, auditLogger, logger } = deps;
switch (message.type) { switch (message.type) {
case 'assistant': { case 'assistant': {
@@ -312,8 +302,7 @@ export async function dispatchMessage(
assistantResult.cleanedContent, assistantResult.cleanedContent,
execContext, execContext,
turnCount, turnCount,
description, description
colorFn
)); ));
progress.start(); progress.start();
} }
@@ -321,7 +310,7 @@ export async function dispatchMessage(
await auditLogger.logLlmResponse(turnCount, assistantResult.content); await auditLogger.logLlmResponse(turnCount, assistantResult.content);
if (assistantResult.apiErrorDetected) { if (assistantResult.apiErrorDetected) {
console.log(chalk.red(` API Error detected in assistant response`)); logger.warn('API Error detected in assistant response');
return { type: 'continue', apiErrorDetected: true }; return { type: 'continue', apiErrorDetected: true };
} }
@@ -333,10 +322,10 @@ export async function dispatchMessage(
const initMsg = message as SystemInitMessage; const initMsg = message as SystemInitMessage;
const actualModel = getActualModelName(initMsg.model); const actualModel = getActualModelName(initMsg.model);
if (!execContext.useCleanOutput) { if (!execContext.useCleanOutput) {
console.log(chalk.blue(` Model: ${actualModel}, Permission: ${initMsg.permissionMode}`)); logger.info(`Model: ${actualModel}, Permission: ${initMsg.permissionMode}`);
if (initMsg.mcp_servers && initMsg.mcp_servers.length > 0) { if (initMsg.mcp_servers && initMsg.mcp_servers.length > 0) {
const mcpStatus = initMsg.mcp_servers.map(s => `${s.name}(${s.status})`).join(', '); const mcpStatus = initMsg.mcp_servers.map(s => `${s.name}(${s.status})`).join(', ');
console.log(chalk.blue(` MCP: ${mcpStatus}`)); logger.info(`MCP: ${mcpStatus}`);
} }
} }
// Return actual model for tracking in audit logs // Return actual model for tracking in audit logs
@@ -368,13 +357,11 @@ export async function dispatchMessage(
case 'result': { case 'result': {
const resultData = handleResultMessage(message as ResultMessage); const resultData = handleResultMessage(message as ResultMessage);
outputLines(formatResultOutput(resultData, !execContext.useCleanOutput)); outputLines(formatResultOutput(resultData, !execContext.useCleanOutput));
costResults.agents[execContext.agentKey] = resultData.cost;
costResults.total += resultData.cost;
return { type: 'complete', result: resultData.result, cost: resultData.cost }; return { type: 'complete', result: resultData.result, cost: resultData.cost };
} }
default: default:
console.log(chalk.gray(` ${message.type}: ${JSON.stringify(message, null, 2)}`)); logger.info(`Unhandled message type: ${message.type}`);
return { type: 'continue' }; return { type: 'continue' };
} }
} }
+286 -41
View File
@@ -4,13 +4,267 @@
// it under the terms of the GNU Affero General Public License version 3 // it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation. // as published by the Free Software Foundation.
// Pure functions for formatting console output
import chalk from 'chalk';
import { extractAgentType, formatDuration } from '../utils/formatting.js'; import { extractAgentType, formatDuration } from '../utils/formatting.js';
import { getAgentPrefix } from '../utils/output-formatter.js'; import { AGENTS } from '../session-manager.js';
import type { ExecutionContext, ResultData } from './types.js'; import type { ExecutionContext, ResultData } from './types.js';
interface ToolCallInput {
url?: string;
element?: string;
key?: string;
fields?: unknown[];
text?: string;
action?: string;
description?: string;
todos?: Array<{
status: string;
content: string;
}>;
[key: string]: unknown;
}
interface ToolCall {
name: string;
input?: ToolCallInput;
}
/**
* Get agent prefix for parallel execution
*/
export function getAgentPrefix(description: string): string {
// Map agent names to their prefixes
const agentPrefixes: Record<string, string> = {
'injection-vuln': '[Injection]',
'xss-vuln': '[XSS]',
'auth-vuln': '[Auth]',
'authz-vuln': '[Authz]',
'ssrf-vuln': '[SSRF]',
'injection-exploit': '[Injection]',
'xss-exploit': '[XSS]',
'auth-exploit': '[Auth]',
'authz-exploit': '[Authz]',
'ssrf-exploit': '[SSRF]',
};
// First try to match by agent name directly
for (const [agentName, prefix] of Object.entries(agentPrefixes)) {
const agent = AGENTS[agentName as keyof typeof AGENTS];
if (agent && description.includes(agent.displayName)) {
return prefix;
}
}
// Fallback to partial matches for backwards compatibility
if (description.includes('injection')) return '[Injection]';
if (description.includes('xss')) return '[XSS]';
if (description.includes('authz')) return '[Authz]'; // Check authz before auth
if (description.includes('auth')) return '[Auth]';
if (description.includes('ssrf')) return '[SSRF]';
return '[Agent]';
}
/**
* Extract domain from URL for display
*/
function extractDomain(url: string): string {
try {
const urlObj = new URL(url);
return urlObj.hostname || url.slice(0, 30);
} catch {
return url.slice(0, 30);
}
}
/**
* Summarize TodoWrite updates into clean progress indicators
*/
function summarizeTodoUpdate(input: ToolCallInput | undefined): string | null {
if (!input?.todos || !Array.isArray(input.todos)) {
return null;
}
const todos = input.todos;
const completed = todos.filter((t) => t.status === 'completed');
const inProgress = todos.filter((t) => t.status === 'in_progress');
// Show recently completed tasks
if (completed.length > 0) {
const recent = completed[completed.length - 1]!;
return `${recent.content}`;
}
// Show current in-progress task
if (inProgress.length > 0) {
const current = inProgress[0]!;
return `🔄 ${current.content}`;
}
return null;
}
/**
* Format browser tool calls into clean progress indicators
*/
function formatBrowserAction(toolCall: ToolCall): string {
const toolName = toolCall.name;
const input = toolCall.input || {};
// Core Browser Operations
if (toolName === 'mcp__playwright__browser_navigate') {
const url = input.url || '';
const domain = extractDomain(url);
return `🌐 Navigating to ${domain}`;
}
if (toolName === 'mcp__playwright__browser_navigate_back') {
return `⬅️ Going back`;
}
// Page Interaction
if (toolName === 'mcp__playwright__browser_click') {
const element = input.element || 'element';
return `🖱️ Clicking ${element.slice(0, 25)}`;
}
if (toolName === 'mcp__playwright__browser_hover') {
const element = input.element || 'element';
return `👆 Hovering over ${element.slice(0, 20)}`;
}
if (toolName === 'mcp__playwright__browser_type') {
const element = input.element || 'field';
return `⌨️ Typing in ${element.slice(0, 20)}`;
}
if (toolName === 'mcp__playwright__browser_press_key') {
const key = input.key || 'key';
return `⌨️ Pressing ${key}`;
}
// Form Handling
if (toolName === 'mcp__playwright__browser_fill_form') {
const fieldCount = input.fields?.length || 0;
return `📝 Filling ${fieldCount} form fields`;
}
if (toolName === 'mcp__playwright__browser_select_option') {
return `📋 Selecting dropdown option`;
}
if (toolName === 'mcp__playwright__browser_file_upload') {
return `📁 Uploading file`;
}
// Page Analysis
if (toolName === 'mcp__playwright__browser_snapshot') {
return `📸 Taking page snapshot`;
}
if (toolName === 'mcp__playwright__browser_take_screenshot') {
return `📸 Taking screenshot`;
}
if (toolName === 'mcp__playwright__browser_evaluate') {
return `🔍 Running JavaScript analysis`;
}
// Waiting & Monitoring
if (toolName === 'mcp__playwright__browser_wait_for') {
if (input.text) {
return `⏳ Waiting for "${input.text.slice(0, 20)}"`;
}
return `⏳ Waiting for page response`;
}
if (toolName === 'mcp__playwright__browser_console_messages') {
return `📜 Checking console logs`;
}
if (toolName === 'mcp__playwright__browser_network_requests') {
return `🌐 Analyzing network traffic`;
}
// Tab Management
if (toolName === 'mcp__playwright__browser_tabs') {
const action = input.action || 'managing';
return `🗂️ ${action} browser tab`;
}
// Dialog Handling
if (toolName === 'mcp__playwright__browser_handle_dialog') {
return `💬 Handling browser dialog`;
}
// Fallback for any missed tools
const actionType = toolName.split('_').pop();
return `🌐 Browser: ${actionType}`;
}
/**
* Filter out JSON tool calls from content, with special handling for Task calls
*/
export function filterJsonToolCalls(content: string | null | undefined): string {
if (!content || typeof content !== 'string') {
return content || '';
}
const lines = content.split('\n');
const processedLines: string[] = [];
for (const line of lines) {
const trimmed = line.trim();
// Skip empty lines
if (trimmed === '') {
continue;
}
// Check if this is a JSON tool call
if (trimmed.startsWith('{"type":"tool_use"')) {
try {
const toolCall = JSON.parse(trimmed) as ToolCall;
// Special handling for Task tool calls
if (toolCall.name === 'Task') {
const description = toolCall.input?.description || 'analysis agent';
processedLines.push(`🚀 Launching ${description}`);
continue;
}
// Special handling for TodoWrite tool calls
if (toolCall.name === 'TodoWrite') {
const summary = summarizeTodoUpdate(toolCall.input);
if (summary) {
processedLines.push(summary);
}
continue;
}
// Special handling for browser tool calls
if (toolCall.name.startsWith('mcp__playwright__browser_')) {
const browserAction = formatBrowserAction(toolCall);
if (browserAction) {
processedLines.push(browserAction);
}
continue;
}
// Hide all other tool calls (Read, Write, Grep, etc.)
continue;
} catch {
// If JSON parsing fails, treat as regular text
processedLines.push(line);
}
} else {
// Keep non-JSON lines (assistant text)
processedLines.push(line);
}
}
return processedLines.join('\n');
}
export function detectExecutionContext(description: string): ExecutionContext { export function detectExecutionContext(description: string): ExecutionContext {
const isParallelExecution = const isParallelExecution =
description.includes('vuln agent') || description.includes('exploit agent'); description.includes('vuln agent') || description.includes('exploit agent');
@@ -33,8 +287,7 @@ export function formatAssistantOutput(
cleanedContent: string, cleanedContent: string,
context: ExecutionContext, context: ExecutionContext,
turnCount: number, turnCount: number,
description: string, description: string
colorFn: typeof chalk.cyan = chalk.cyan
): string[] { ): string[] {
if (!cleanedContent.trim()) { if (!cleanedContent.trim()) {
return []; return [];
@@ -45,11 +298,11 @@ export function formatAssistantOutput(
if (context.isParallelExecution) { if (context.isParallelExecution) {
// Compact output for parallel agents with prefixes // Compact output for parallel agents with prefixes
const prefix = getAgentPrefix(description); const prefix = getAgentPrefix(description);
lines.push(colorFn(`${prefix} ${cleanedContent}`)); lines.push(`${prefix} ${cleanedContent}`);
} else { } else {
// Full turn output for sequential agents // Full turn output for sequential agents
lines.push(colorFn(`\n Turn ${turnCount} (${description}):`)); lines.push(`\n Turn ${turnCount} (${description}):`);
lines.push(colorFn(` ${cleanedContent}`)); lines.push(` ${cleanedContent}`);
} }
return lines; return lines;
@@ -58,28 +311,24 @@ export function formatAssistantOutput(
export function formatResultOutput(data: ResultData, showFullResult: boolean): string[] { export function formatResultOutput(data: ResultData, showFullResult: boolean): string[] {
const lines: string[] = []; const lines: string[] = [];
lines.push(chalk.magenta(`\n COMPLETED:`)); lines.push(`\n COMPLETED:`);
lines.push( lines.push(` Duration: ${(data.duration_ms / 1000).toFixed(1)}s, Cost: $${data.cost.toFixed(4)}`);
chalk.gray(
` Duration: ${(data.duration_ms / 1000).toFixed(1)}s, Cost: $${data.cost.toFixed(4)}`
)
);
if (data.subtype === 'error_max_turns') { if (data.subtype === 'error_max_turns') {
lines.push(chalk.red(` Stopped: Hit maximum turns limit`)); lines.push(` Stopped: Hit maximum turns limit`);
} else if (data.subtype === 'error_during_execution') { } else if (data.subtype === 'error_during_execution') {
lines.push(chalk.red(` Stopped: Execution error`)); lines.push(` Stopped: Execution error`);
} }
if (data.permissionDenials > 0) { if (data.permissionDenials > 0) {
lines.push(chalk.yellow(` ${data.permissionDenials} permission denials`)); lines.push(` ${data.permissionDenials} permission denials`);
} }
if (showFullResult && data.result && typeof data.result === 'string') { if (showFullResult && data.result && typeof data.result === 'string') {
if (data.result.length > 1000) { if (data.result.length > 1000) {
lines.push(chalk.magenta(` ${data.result.slice(0, 1000)}... [${data.result.length} total chars]`)); lines.push(` ${data.result.slice(0, 1000)}... [${data.result.length} total chars]`);
} else { } else {
lines.push(chalk.magenta(` ${data.result}`)); lines.push(` ${data.result}`);
} }
} }
@@ -98,24 +347,24 @@ export function formatErrorOutput(
if (context.isParallelExecution) { if (context.isParallelExecution) {
const prefix = getAgentPrefix(description); const prefix = getAgentPrefix(description);
lines.push(chalk.red(`${prefix} Failed (${formatDuration(duration)})`)); lines.push(`${prefix} Failed (${formatDuration(duration)})`);
} else if (context.useCleanOutput) { } else if (context.useCleanOutput) {
lines.push(chalk.red(`${context.agentType} failed (${formatDuration(duration)})`)); lines.push(`${context.agentType} failed (${formatDuration(duration)})`);
} else { } else {
lines.push(chalk.red(` Claude Code failed: ${description} (${formatDuration(duration)})`)); lines.push(` Claude Code failed: ${description} (${formatDuration(duration)})`);
} }
lines.push(chalk.red(` Error Type: ${error.constructor.name}`)); lines.push(` Error Type: ${error.constructor.name}`);
lines.push(chalk.red(` Message: ${error.message}`)); lines.push(` Message: ${error.message}`);
lines.push(chalk.gray(` Agent: ${description}`)); lines.push(` Agent: ${description}`);
lines.push(chalk.gray(` Working Directory: ${sourceDir}`)); lines.push(` Working Directory: ${sourceDir}`);
lines.push(chalk.gray(` Retryable: ${isRetryable ? 'Yes' : 'No'}`)); lines.push(` Retryable: ${isRetryable ? 'Yes' : 'No'}`);
if (error.code) { if (error.code) {
lines.push(chalk.gray(` Error Code: ${error.code}`)); lines.push(` Error Code: ${error.code}`);
} }
if (error.status) { if (error.status) {
lines.push(chalk.gray(` HTTP Status: ${error.status}`)); lines.push(` HTTP Status: ${error.status}`);
} }
return lines; return lines;
@@ -129,18 +378,14 @@ export function formatCompletionMessage(
): string { ): string {
if (context.isParallelExecution) { if (context.isParallelExecution) {
const prefix = getAgentPrefix(description); const prefix = getAgentPrefix(description);
return chalk.green(`${prefix} Complete (${turnCount} turns, ${formatDuration(duration)})`); return `${prefix} Complete (${turnCount} turns, ${formatDuration(duration)})`;
} }
if (context.useCleanOutput) { if (context.useCleanOutput) {
return chalk.green( return `${context.agentType.charAt(0).toUpperCase() + context.agentType.slice(1)} complete! (${turnCount} turns, ${formatDuration(duration)})`;
`${context.agentType.charAt(0).toUpperCase() + context.agentType.slice(1)} complete! (${turnCount} turns, ${formatDuration(duration)})`
);
} }
return chalk.green( return ` Claude Code completed: ${description} (${turnCount} turns) in ${formatDuration(duration)}`;
` Claude Code completed: ${description} (${turnCount} turns) in ${formatDuration(duration)}`
);
} }
export function formatToolUseOutput( export function formatToolUseOutput(
@@ -149,9 +394,9 @@ export function formatToolUseOutput(
): string[] { ): string[] {
const lines: string[] = []; const lines: string[] = [];
lines.push(chalk.yellow(`\n Using Tool: ${toolName}`)); lines.push(`\n Using Tool: ${toolName}`);
if (input && Object.keys(input).length > 0) { if (input && Object.keys(input).length > 0) {
lines.push(chalk.gray(` Input: ${JSON.stringify(input, null, 2)}`)); lines.push(` Input: ${JSON.stringify(input, null, 2)}`);
} }
return lines; return lines;
@@ -160,9 +405,9 @@ export function formatToolUseOutput(
export function formatToolResultOutput(displayContent: string): string[] { export function formatToolResultOutput(displayContent: string): string[] {
const lines: string[] = []; const lines: string[] = [];
lines.push(chalk.green(` Tool Result:`)); lines.push(` Tool Result:`);
if (displayContent) { if (displayContent) {
lines.push(chalk.gray(` ${displayContent}`)); lines.push(` ${displayContent}`);
} }
return lines; return lines;
-6
View File
@@ -26,9 +26,3 @@ export function getActualModelName(sdkReportedModel?: string): string | undefine
return sdkReportedModel; return sdkReportedModel;
} }
/**
* Check if router mode is active.
*/
export function isRouterMode(): boolean {
return !!process.env.ANTHROPIC_BASE_URL && !!process.env.ROUTER_DEFAULT;
}
-38
View File
@@ -13,22 +13,6 @@ export interface ExecutionContext {
agentKey: string; agentKey: string;
} }
export interface ProcessingState {
turnCount: number;
result: string | null;
apiErrorDetected: boolean;
totalCost: number;
partialCost: number;
lastHeartbeat: number;
}
export interface ProcessingResult {
result: string | null;
turnCount: number;
apiErrorDetected: boolean;
totalCost: number;
}
export interface AssistantResult { export interface AssistantResult {
content: string; content: string;
cleanedContent: string; cleanedContent: string;
@@ -110,15 +94,6 @@ export interface ApiErrorDetection {
shouldThrow?: Error; shouldThrow?: Error;
} }
// Message types from SDK stream
export type SdkMessage =
| AssistantMessage
| ResultMessage
| ToolUseMessage
| ToolResultMessage
| SystemInitMessage
| UserMessage;
export interface SystemInitMessage { export interface SystemInitMessage {
type: 'system'; type: 'system';
subtype: 'init'; subtype: 'init';
@@ -131,16 +106,3 @@ export interface UserMessage {
type: 'user'; type: 'user';
} }
// Dispatch result types for message processing
export type MessageDispatchResult =
| { action: 'continue' }
| { action: 'break'; result: string | null; cost: number }
| { action: 'throw'; error: Error };
export interface MessageDispatchContext {
turnCount: number;
execContext: ExecutionContext;
description: string;
colorFn: (text: string) => string;
useCleanOutput: boolean;
}
+46 -29
View File
@@ -17,21 +17,13 @@ import { MetricsTracker } from './metrics-tracker.js';
import { initializeAuditStructure, type SessionMetadata } from './utils.js'; import { initializeAuditStructure, type SessionMetadata } from './utils.js';
import { formatTimestamp } from '../utils/formatting.js'; import { formatTimestamp } from '../utils/formatting.js';
import { SessionMutex } from '../utils/concurrency.js'; import { SessionMutex } from '../utils/concurrency.js';
import type { AgentEndResult } from '../types/index.js';
import { PentestError } from '../services/error-handling.js';
import { ErrorCode } from '../types/errors.js';
// Global mutex instance // Global mutex instance
const sessionMutex = new SessionMutex(); const sessionMutex = new SessionMutex();
interface AgentEndResult {
attemptNumber: number;
duration_ms: number;
cost_usd: number;
success: boolean;
model?: string | undefined;
error?: string | undefined;
checkpoint?: string | undefined;
isFinalAttempt?: boolean | undefined;
}
/** /**
* AuditSession - Main audit system facade * AuditSession - Main audit system facade
*/ */
@@ -50,10 +42,22 @@ export class AuditSession {
// Validate required fields // Validate required fields
if (!this.sessionId) { if (!this.sessionId) {
throw new Error('sessionMetadata.id is required'); throw new PentestError(
'sessionMetadata.id is required',
'config',
false,
{ field: 'sessionMetadata.id' },
ErrorCode.CONFIG_VALIDATION_FAILED
);
} }
if (!this.sessionMetadata.webUrl) { if (!this.sessionMetadata.webUrl) {
throw new Error('sessionMetadata.webUrl is required'); throw new PentestError(
'sessionMetadata.webUrl is required',
'config',
false,
{ field: 'sessionMetadata.webUrl' },
ErrorCode.CONFIG_VALIDATION_FAILED
);
} }
// Components // Components
@@ -103,29 +107,26 @@ export class AuditSession {
): Promise<void> { ): Promise<void> {
await this.ensureInitialized(); await this.ensureInitialized();
// Save prompt snapshot (only on first attempt) // 1. Save prompt snapshot (only on first attempt)
if (attemptNumber === 1) { if (attemptNumber === 1) {
await AgentLogger.savePrompt(this.sessionMetadata, agentName, promptContent); await AgentLogger.savePrompt(this.sessionMetadata, agentName, promptContent);
} }
// Track current agent name for workflow logging // 2. Create and initialize the per-agent logger
this.currentAgentName = agentName; this.currentAgentName = agentName;
// Create and initialize logger for this attempt
this.currentLogger = new AgentLogger(this.sessionMetadata, agentName, attemptNumber); this.currentLogger = new AgentLogger(this.sessionMetadata, agentName, attemptNumber);
await this.currentLogger.initialize(); await this.currentLogger.initialize();
// Start metrics tracking // 3. Start metrics timer
this.metricsTracker.startAgent(agentName, attemptNumber); this.metricsTracker.startAgent(agentName, attemptNumber);
// Log start event // 4. Log start event to both agent log and workflow log
await this.currentLogger.logEvent('agent_start', { await this.currentLogger.logEvent('agent_start', {
agentName, agentName,
attemptNumber, attemptNumber,
timestamp: formatTimestamp(), timestamp: formatTimestamp(),
}); });
// Log to unified workflow log
await this.workflowLogger.logAgent(agentName, 'start', { attemptNumber }); await this.workflowLogger.logAgent(agentName, 'start', { attemptNumber });
} }
@@ -134,7 +135,13 @@ export class AuditSession {
*/ */
async logEvent(eventType: string, eventData: unknown): Promise<void> { async logEvent(eventType: string, eventData: unknown): Promise<void> {
if (!this.currentLogger) { if (!this.currentLogger) {
throw new Error('No active logger. Call startAgent() first.'); throw new PentestError(
'No active logger. Call startAgent() first.',
'validation',
false,
{},
ErrorCode.AGENT_EXECUTION_FAILED
);
} }
// Log to agent-specific log file (JSON format) // Log to agent-specific log file (JSON format)
@@ -167,7 +174,7 @@ export class AuditSession {
* End agent execution (mutex-protected) * End agent execution (mutex-protected)
*/ */
async endAgent(agentName: string, result: AgentEndResult): Promise<void> { async endAgent(agentName: string, result: AgentEndResult): Promise<void> {
// Log end event // 1. Finalize agent log and close the stream
if (this.currentLogger) { if (this.currentLogger) {
await this.currentLogger.logEvent('agent_end', { await this.currentLogger.logEvent('agent_end', {
agentName, agentName,
@@ -177,15 +184,13 @@ export class AuditSession {
timestamp: formatTimestamp(), timestamp: formatTimestamp(),
}); });
// Close logger
await this.currentLogger.close(); await this.currentLogger.close();
this.currentLogger = null; this.currentLogger = null;
} }
// Reset current agent name // 2. Log completion to the unified workflow log
this.currentAgentName = null; this.currentAgentName = null;
// Log to unified workflow log
const agentLogDetails: AgentLogDetails = { const agentLogDetails: AgentLogDetails = {
attemptNumber: result.attemptNumber, attemptNumber: result.attemptNumber,
duration_ms: result.duration_ms, duration_ms: result.duration_ms,
@@ -195,13 +200,11 @@ export class AuditSession {
}; };
await this.workflowLogger.logAgent(agentName, 'end', agentLogDetails); await this.workflowLogger.logAgent(agentName, 'end', agentLogDetails);
// Mutex-protected update to session.json // 3. Acquire mutex before touching session.json
const unlock = await sessionMutex.lock(this.sessionId); const unlock = await sessionMutex.lock(this.sessionId);
try { try {
// Reload inside mutex to prevent lost updates during parallel exploitation phase // 4. Reload-then-write inside mutex to prevent lost updates during parallel phases
await this.metricsTracker.reload(); await this.metricsTracker.reload();
// Update metrics
await this.metricsTracker.endAgent(agentName, result); await this.metricsTracker.endAgent(agentName, result);
} finally { } finally {
unlock(); unlock();
@@ -278,4 +281,18 @@ export class AuditSession {
unlock(); unlock();
} }
} }
/**
* Log resume header to workflow.log
* Call this when a workflow is resuming to add a visual separator
*/
async logResumeHeader(resumeInfo: {
previousWorkflowId: string;
newWorkflowId: string;
checkpointHash: string;
completedAgents: string[];
}): Promise<void> {
await this.ensureInitialized();
await this.workflowLogger.logResumeHeader(resumeInfo);
}
} }
-4
View File
@@ -17,7 +17,3 @@
*/ */
export { AuditSession } from './audit-session.js'; export { AuditSession } from './audit-session.js';
export { AgentLogger } from './logger.js';
export { WorkflowLogger } from './workflow-logger.js';
export { MetricsTracker } from './metrics-tracker.js';
export * as AuditUtils from './utils.js';
+127
View File
@@ -0,0 +1,127 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
/**
* LogStream - Stream composition utility for append-only logging
*
* Encapsulates the common stream management pattern used by AgentLogger
* and WorkflowLogger: opening streams in append mode, handling backpressure,
* and proper cleanup.
*/
import fs from 'fs';
import path from 'path';
import { ensureDirectory } from '../utils/file-io.js';
/**
* LogStream - Manages a single append-only log file stream
*/
export class LogStream {
private readonly filePath: string;
private stream: fs.WriteStream | null = null;
private _isOpen: boolean = false;
constructor(filePath: string) {
this.filePath = filePath;
}
/**
* Open the stream for writing (creates parent directories, opens in append mode)
*/
async open(): Promise<void> {
if (this._isOpen) {
return;
}
// Ensure parent directory exists
await ensureDirectory(path.dirname(this.filePath));
// Create write stream in append mode
this.stream = fs.createWriteStream(this.filePath, {
flags: 'a',
encoding: 'utf8',
autoClose: true,
});
// Handle stream errors to prevent crashes (log and mark closed)
this.stream.on('error', (err) => {
console.error(`LogStream error for ${this.filePath}:`, err.message);
this._isOpen = false;
});
this._isOpen = true;
}
/**
* Write text to the stream with backpressure handling
*/
async write(text: string): Promise<void> {
return new Promise((resolve, reject) => {
if (!this._isOpen || !this.stream) {
reject(new Error('LogStream not open'));
return;
}
const stream = this.stream;
let drainHandler: (() => void) | null = null;
const cleanup = () => {
if (drainHandler) {
stream.removeListener('drain', drainHandler);
drainHandler = null;
}
};
const needsDrain = !stream.write(text, 'utf8', (error) => {
cleanup();
if (error) {
reject(error);
} else if (!needsDrain) {
resolve();
}
});
if (needsDrain) {
drainHandler = () => {
cleanup();
resolve();
};
stream.once('drain', drainHandler);
}
});
}
/**
* Close the stream (flush and close)
*/
async close(): Promise<void> {
if (!this._isOpen || !this.stream) {
return;
}
return new Promise((resolve) => {
this.stream!.end(() => {
this._isOpen = false;
this.stream = null;
resolve();
});
});
}
/**
* Check if the stream is currently open
*/
get isOpen(): boolean {
return this._isOpen;
}
/**
* Get the file path this stream writes to
*/
get path(): string {
return this.filePath;
}
}
+14 -54
View File
@@ -8,10 +8,9 @@
* Append-Only Agent Logger * Append-Only Agent Logger
* *
* Provides crash-safe, append-only logging for agent execution. * Provides crash-safe, append-only logging for agent execution.
* Uses file streams with immediate flush to prevent data loss. * Uses LogStream for stream management with backpressure handling.
*/ */
import fs from 'fs';
import { import {
generateLogPath, generateLogPath,
generatePromptPath, generatePromptPath,
@@ -19,6 +18,7 @@ import {
} from './utils.js'; } from './utils.js';
import { atomicWrite } from '../utils/file-io.js'; import { atomicWrite } from '../utils/file-io.js';
import { formatTimestamp } from '../utils/formatting.js'; import { formatTimestamp } from '../utils/formatting.js';
import { LogStream } from './log-stream.js';
interface LogEvent { interface LogEvent {
type: string; type: string;
@@ -30,13 +30,11 @@ interface LogEvent {
* AgentLogger - Manages append-only logging for a single agent execution * AgentLogger - Manages append-only logging for a single agent execution
*/ */
export class AgentLogger { export class AgentLogger {
private sessionMetadata: SessionMetadata; private readonly sessionMetadata: SessionMetadata;
private agentName: string; private readonly agentName: string;
private attemptNumber: number; private readonly attemptNumber: number;
private timestamp: number; private readonly timestamp: number;
private logPath: string; private readonly logStream: LogStream;
private stream: fs.WriteStream | null = null;
private isOpen: boolean = false;
constructor(sessionMetadata: SessionMetadata, agentName: string, attemptNumber: number) { constructor(sessionMetadata: SessionMetadata, agentName: string, attemptNumber: number) {
this.sessionMetadata = sessionMetadata; this.sessionMetadata = sessionMetadata;
@@ -44,26 +42,19 @@ export class AgentLogger {
this.attemptNumber = attemptNumber; this.attemptNumber = attemptNumber;
this.timestamp = Date.now(); this.timestamp = Date.now();
// Generate log file path const logPath = generateLogPath(sessionMetadata, agentName, this.timestamp, attemptNumber);
this.logPath = generateLogPath(sessionMetadata, agentName, this.timestamp, attemptNumber); this.logStream = new LogStream(logPath);
} }
/** /**
* Initialize the log stream (creates file and opens stream) * Initialize the log stream (creates file and opens stream)
*/ */
async initialize(): Promise<void> { async initialize(): Promise<void> {
if (this.isOpen) { if (this.logStream.isOpen) {
return; // Already initialized return; // Already initialized
} }
// Create write stream with append mode and auto-flush await this.logStream.open();
this.stream = fs.createWriteStream(this.logPath, {
flags: 'a', // Append mode
encoding: 'utf8',
autoClose: true,
});
this.isOpen = true;
// Write header // Write header
await this.writeHeader(); await this.writeHeader();
@@ -83,29 +74,7 @@ export class AgentLogger {
`========================================\n`, `========================================\n`,
].join('\n'); ].join('\n');
return this.writeRaw(header); return this.logStream.write(header);
}
/**
* Write raw text to log file with immediate flush
*/
private writeRaw(text: string): Promise<void> {
return new Promise((resolve, reject) => {
if (!this.isOpen || !this.stream) {
reject(new Error('Logger not initialized'));
return;
}
const needsDrain = !this.stream.write(text, 'utf8', (error) => {
if (error) reject(error);
});
if (needsDrain) {
this.stream.once('drain', resolve);
} else {
resolve();
}
});
} }
/** /**
@@ -120,23 +89,14 @@ export class AgentLogger {
}; };
const eventLine = `${JSON.stringify(event)}\n`; const eventLine = `${JSON.stringify(event)}\n`;
return this.writeRaw(eventLine); return this.logStream.write(eventLine);
} }
/** /**
* Close the log stream * Close the log stream
*/ */
async close(): Promise<void> { async close(): Promise<void> {
if (!this.isOpen || !this.stream) { return this.logStream.close();
return;
}
return new Promise((resolve) => {
this.stream!.end(() => {
this.isOpen = false;
resolve();
});
});
} }
/** /**
+30 -26
View File
@@ -18,7 +18,9 @@ import {
import { atomicWrite, readJson, fileExists } from '../utils/file-io.js'; import { atomicWrite, readJson, fileExists } from '../utils/file-io.js';
import { formatTimestamp, calculatePercentage } from '../utils/formatting.js'; import { formatTimestamp, calculatePercentage } from '../utils/formatting.js';
import { AGENT_PHASE_MAP, type PhaseName } from '../session-manager.js'; import { AGENT_PHASE_MAP, type PhaseName } from '../session-manager.js';
import type { AgentName } from '../types/index.js'; import { PentestError } from '../services/error-handling.js';
import { ErrorCode } from '../types/errors.js';
import type { AgentName, AgentEndResult } from '../types/index.js';
interface AttemptData { interface AttemptData {
attempt_number: number; attempt_number: number;
@@ -30,7 +32,7 @@ interface AttemptData {
error?: string | undefined; error?: string | undefined;
} }
interface AgentMetrics { interface AgentAuditMetrics {
status: 'in-progress' | 'success' | 'failed'; status: 'in-progress' | 'success' | 'failed';
attempts: AttemptData[]; attempts: AttemptData[];
final_duration_ms: number; final_duration_ms: number;
@@ -68,21 +70,10 @@ interface SessionData {
total_duration_ms: number; total_duration_ms: number;
total_cost_usd: number; total_cost_usd: number;
phases: Record<string, PhaseMetrics>; phases: Record<string, PhaseMetrics>;
agents: Record<string, AgentMetrics>; agents: Record<string, AgentAuditMetrics>;
}; };
} }
interface AgentEndResult {
attemptNumber: number;
duration_ms: number;
cost_usd: number;
success: boolean;
model?: string | undefined;
error?: string | undefined;
checkpoint?: string | undefined;
isFinalAttempt?: boolean | undefined;
}
interface ActiveTimer { interface ActiveTimer {
startTime: number; startTime: number;
attemptNumber: number; attemptNumber: number;
@@ -170,10 +161,16 @@ export class MetricsTracker {
*/ */
async endAgent(agentName: string, result: AgentEndResult): Promise<void> { async endAgent(agentName: string, result: AgentEndResult): Promise<void> {
if (!this.data) { if (!this.data) {
throw new Error('MetricsTracker not initialized'); throw new PentestError(
'MetricsTracker not initialized',
'validation',
false,
{},
ErrorCode.AGENT_EXECUTION_FAILED
);
} }
// Initialize agent metrics if not exists // 1. Initialize agent metrics if first time seeing this agent
const existingAgent = this.data.metrics.agents[agentName]; const existingAgent = this.data.metrics.agents[agentName];
const agent = existingAgent ?? { const agent = existingAgent ?? {
status: 'in-progress' as const, status: 'in-progress' as const,
@@ -183,7 +180,7 @@ export class MetricsTracker {
}; };
this.data.metrics.agents[agentName] = agent; this.data.metrics.agents[agentName] = agent;
// Add attempt to array // 2. Build attempt record with optional model/error fields
const attempt: AttemptData = { const attempt: AttemptData = {
attempt_number: result.attemptNumber, attempt_number: result.attemptNumber,
duration_ms: result.duration_ms, duration_ms: result.duration_ms,
@@ -200,16 +197,18 @@ export class MetricsTracker {
attempt.error = result.error; attempt.error = result.error;
} }
// 3. Append attempt to history
agent.attempts.push(attempt); agent.attempts.push(attempt);
// Update total cost (includes failed attempts) // 4. Recalculate total cost across all attempts (includes failures)
agent.total_cost_usd = agent.attempts.reduce((sum, a) => sum + a.cost_usd, 0); agent.total_cost_usd = agent.attempts.reduce((sum, a) => sum + a.cost_usd, 0);
// If successful, update final metrics and status // 5. Update agent status based on outcome
if (result.success) { if (result.success) {
agent.status = 'success'; agent.status = 'success';
agent.final_duration_ms = result.duration_ms; agent.final_duration_ms = result.duration_ms;
// 6. Attach model and checkpoint metadata on success
if (result.model) { if (result.model) {
agent.model = result.model; agent.model = result.model;
} }
@@ -218,19 +217,18 @@ export class MetricsTracker {
agent.checkpoint = result.checkpoint; agent.checkpoint = result.checkpoint;
} }
} else { } else {
// If this was the last attempt, mark as failed
if (result.isFinalAttempt) { if (result.isFinalAttempt) {
agent.status = 'failed'; agent.status = 'failed';
} }
} }
// Clear active timer // 7. Clear active timer
this.activeTimers.delete(agentName); this.activeTimers.delete(agentName);
// Recalculate aggregations // 8. Recalculate phase and session-level aggregations
this.recalculateAggregations(); this.recalculateAggregations();
// Save to disk // 9. Persist to session.json
await this.save(); await this.save();
} }
@@ -262,7 +260,13 @@ export class MetricsTracker {
checkpointHash?: string checkpointHash?: string
): Promise<void> { ): Promise<void> {
if (!this.data) { if (!this.data) {
throw new Error('MetricsTracker not initialized'); throw new PentestError(
'MetricsTracker not initialized',
'validation',
false,
{},
ErrorCode.AGENT_EXECUTION_FAILED
);
} }
// Ensure originalWorkflowId is set (backfill if missing from old sessions) // Ensure originalWorkflowId is set (backfill if missing from old sessions)
@@ -326,9 +330,9 @@ export class MetricsTracker {
* Calculate phase-level metrics * Calculate phase-level metrics
*/ */
private calculatePhaseMetrics( private calculatePhaseMetrics(
successfulAgents: Array<[string, AgentMetrics]> successfulAgents: Array<[string, AgentAuditMetrics]>
): Record<string, PhaseMetrics> { ): Record<string, PhaseMetrics> {
const phases: Record<PhaseName, AgentMetrics[]> = { const phases: Record<PhaseName, AgentAuditMetrics[]> = {
'pre-recon': [], 'pre-recon': [],
'recon': [], 'recon': [],
'vulnerability-analysis': [], 'vulnerability-analysis': [],
+7 -102
View File
@@ -15,20 +15,17 @@ import fs from 'fs/promises';
import path from 'path'; import path from 'path';
import { fileURLToPath } from 'url'; import { fileURLToPath } from 'url';
import { ensureDirectory } from '../utils/file-io.js';
export type { SessionMetadata } from '../types/audit.js';
import type { SessionMetadata } from '../types/audit.js';
const __filename = fileURLToPath(import.meta.url); const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename); const __dirname = path.dirname(__filename);
// Get Shannon repository root // Get Shannon repository root
export const SHANNON_ROOT = path.resolve(__dirname, '..', '..'); const SHANNON_ROOT = path.resolve(__dirname, '..', '..');
export const AUDIT_LOGS_DIR = path.join(SHANNON_ROOT, 'audit-logs'); const AUDIT_LOGS_DIR = path.join(SHANNON_ROOT, 'audit-logs');
export interface SessionMetadata {
id: string;
webUrl: string;
repoPath?: string;
outputPath?: string;
[key: string]: unknown;
}
/** /**
* Extract and sanitize hostname from URL for use in identifiers * Extract and sanitize hostname from URL for use in identifiers
@@ -93,98 +90,6 @@ export function generateWorkflowLogPath(sessionMetadata: SessionMetadata): strin
return path.join(auditPath, 'workflow.log'); return path.join(auditPath, 'workflow.log');
} }
/**
* Ensure directory exists (idempotent, race-safe)
*/
export async function ensureDirectory(dirPath: string): Promise<void> {
try {
await fs.mkdir(dirPath, { recursive: true });
} catch (error) {
// Ignore EEXIST errors (race condition safe)
if ((error as NodeJS.ErrnoException).code !== 'EEXIST') {
throw error;
}
}
}
/**
* Atomic write using temp file + rename pattern
* Guarantees no partial writes or corruption on crash
*/
export async function atomicWrite(filePath: string, data: object | string): Promise<void> {
const tempPath = `${filePath}.tmp`;
const content = typeof data === 'string' ? data : JSON.stringify(data, null, 2);
try {
// Write to temp file
await fs.writeFile(tempPath, content, 'utf8');
// Atomic rename (POSIX guarantee: atomic on same filesystem)
await fs.rename(tempPath, filePath);
} catch (error) {
// Clean up temp file on failure
try {
await fs.unlink(tempPath);
} catch {
// Ignore cleanup errors
}
throw error;
}
}
/**
* Format duration in milliseconds to human-readable string
*/
export function formatDuration(ms: number): string {
if (ms < 1000) {
return `${ms}ms`;
}
const seconds = ms / 1000;
if (seconds < 60) {
return `${seconds.toFixed(1)}s`;
}
const minutes = Math.floor(seconds / 60);
const remainingSeconds = Math.floor(seconds % 60);
return `${minutes}m ${remainingSeconds}s`;
}
/**
* Format timestamp to ISO 8601 string
*/
export function formatTimestamp(timestamp: number = Date.now()): string {
return new Date(timestamp).toISOString();
}
/**
* Calculate percentage
*/
export function calculatePercentage(part: number, total: number): number {
if (total === 0) return 0;
return (part / total) * 100;
}
/**
* Read and parse JSON file
*/
export async function readJson<T = unknown>(filePath: string): Promise<T> {
const content = await fs.readFile(filePath, 'utf8');
return JSON.parse(content) as T;
}
/**
* Check if file exists
*/
export async function fileExists(filePath: string): Promise<boolean> {
try {
await fs.access(filePath);
return true;
} catch {
return false;
}
}
/** /**
* Initialize audit directory structure for a session * Initialize audit directory structure for a session
* Creates: audit-logs/{sessionId}/, agents/, prompts/, deliverables/ * Creates: audit-logs/{sessionId}/, agents/, prompts/, deliverables/
+57 -71
View File
@@ -11,10 +11,10 @@
* Optimized for `tail -f` viewing during concurrent workflow execution. * Optimized for `tail -f` viewing during concurrent workflow execution.
*/ */
import fs from 'fs'; import fs from 'fs/promises';
import path from 'path'; import { generateWorkflowLogPath, type SessionMetadata } from './utils.js';
import { generateWorkflowLogPath, ensureDirectory, type SessionMetadata } from './utils.js';
import { formatDuration, formatTimestamp } from '../utils/formatting.js'; import { formatDuration, formatTimestamp } from '../utils/formatting.js';
import { LogStream } from './log-stream.js';
export interface AgentLogDetails { export interface AgentLogDetails {
attemptNumber?: number; attemptNumber?: number;
@@ -42,38 +42,27 @@ export interface WorkflowSummary {
* WorkflowLogger - Manages the unified workflow log file * WorkflowLogger - Manages the unified workflow log file
*/ */
export class WorkflowLogger { export class WorkflowLogger {
private sessionMetadata: SessionMetadata; private readonly sessionMetadata: SessionMetadata;
private logPath: string; private readonly logStream: LogStream;
private stream: fs.WriteStream | null = null;
private initialized: boolean = false;
constructor(sessionMetadata: SessionMetadata) { constructor(sessionMetadata: SessionMetadata) {
this.sessionMetadata = sessionMetadata; this.sessionMetadata = sessionMetadata;
this.logPath = generateWorkflowLogPath(sessionMetadata); const logPath = generateWorkflowLogPath(sessionMetadata);
this.logStream = new LogStream(logPath);
} }
/** /**
* Initialize the log stream (creates file and writes header) * Initialize the log stream (creates file and writes header)
*/ */
async initialize(): Promise<void> { async initialize(): Promise<void> {
if (this.initialized) { if (this.logStream.isOpen) {
return; return;
} }
// Ensure directory exists await this.logStream.open();
await ensureDirectory(path.dirname(this.logPath));
// Create write stream with append mode
this.stream = fs.createWriteStream(this.logPath, {
flags: 'a',
encoding: 'utf8',
autoClose: true,
});
this.initialized = true;
// Write header only if file is new (empty) // Write header only if file is new (empty)
const stats = await fs.promises.stat(this.logPath).catch(() => null); const stats = await fs.stat(this.logStream.path).catch(() => null);
if (!stats || stats.size === 0) { if (!stats || stats.size === 0) {
await this.writeHeader(); await this.writeHeader();
} }
@@ -94,29 +83,35 @@ export class WorkflowLogger {
``, ``,
].join('\n'); ].join('\n');
return this.writeRaw(header); return this.logStream.write(header);
} }
/** /**
* Write raw text to log file with immediate flush * Write resume header to log file when workflow is resumed
*/ */
private writeRaw(text: string): Promise<void> { async logResumeHeader(resumeInfo: {
return new Promise((resolve, reject) => { previousWorkflowId: string;
if (!this.initialized || !this.stream) { newWorkflowId: string;
reject(new Error('WorkflowLogger not initialized')); checkpointHash: string;
return; completedAgents: string[];
} }): Promise<void> {
await this.ensureInitialized();
const needsDrain = !this.stream.write(text, 'utf8', (error) => { const header = [
if (error) reject(error); ``,
}); `================================================================================`,
`RESUMED`,
`================================================================================`,
`Previous Workflow ID: ${resumeInfo.previousWorkflowId}`,
`New Workflow ID: ${resumeInfo.newWorkflowId}`,
`Resumed At: ${formatTimestamp()}`,
`Checkpoint: ${resumeInfo.checkpointHash}`,
`Completed: ${resumeInfo.completedAgents.length} agents (${resumeInfo.completedAgents.join(', ')})`,
`================================================================================`,
``,
].join('\n');
if (needsDrain) { return this.logStream.write(header);
this.stream.once('drain', resolve);
} else {
resolve();
}
});
} }
/** /**
@@ -138,10 +133,10 @@ export class WorkflowLogger {
// Add blank line before phase start for readability // Add blank line before phase start for readability
if (event === 'start') { if (event === 'start') {
await this.writeRaw('\n'); await this.logStream.write('\n');
} }
await this.writeRaw(line); await this.logStream.write(line);
} }
/** /**
@@ -184,7 +179,7 @@ export class WorkflowLogger {
} }
const line = `[${this.formatLogTime()}] [AGENT] ${message}\n`; const line = `[${this.formatLogTime()}] [AGENT] ${message}\n`;
await this.writeRaw(line); await this.logStream.write(line);
} }
/** /**
@@ -194,7 +189,7 @@ export class WorkflowLogger {
await this.ensureInitialized(); await this.ensureInitialized();
const line = `[${this.formatLogTime()}] [${eventType.toUpperCase()}] ${message}\n`; const line = `[${this.formatLogTime()}] [${eventType.toUpperCase()}] ${message}\n`;
await this.writeRaw(line); await this.logStream.write(line);
} }
/** /**
@@ -205,7 +200,7 @@ export class WorkflowLogger {
const contextStr = context ? ` (${context})` : ''; const contextStr = context ? ` (${context})` : '';
const line = `[${this.formatLogTime()}] [ERROR] ${error.message}${contextStr}\n`; const line = `[${this.formatLogTime()}] [ERROR] ${error.message}${contextStr}\n`;
await this.writeRaw(line); await this.logStream.write(line);
} }
/** /**
@@ -301,7 +296,7 @@ export class WorkflowLogger {
const params = this.formatToolParams(toolName, parameters); const params = this.formatToolParams(toolName, parameters);
const paramStr = params ? `: ${params}` : ''; const paramStr = params ? `: ${params}` : '';
const line = `[${this.formatLogTime()}] [${agentName}] [TOOL] ${toolName}${paramStr}\n`; const line = `[${this.formatLogTime()}] [${agentName}] [TOOL] ${toolName}${paramStr}\n`;
await this.writeRaw(line); await this.logStream.write(line);
} }
/** /**
@@ -313,7 +308,7 @@ export class WorkflowLogger {
// Show full content, replacing newlines with escaped version for single-line output // Show full content, replacing newlines with escaped version for single-line output
const escaped = content.replace(/\n/g, '\\n'); const escaped = content.replace(/\n/g, '\\n');
const line = `[${this.formatLogTime()}] [${agentName}] [LLM] Turn ${turn}: ${escaped}\n`; const line = `[${this.formatLogTime()}] [${agentName}] [LLM] Turn ${turn}: ${escaped}\n`;
await this.writeRaw(line); await this.logStream.write(line);
} }
/** /**
@@ -324,42 +319,42 @@ export class WorkflowLogger {
const status = summary.status === 'completed' ? 'COMPLETED' : 'FAILED'; const status = summary.status === 'completed' ? 'COMPLETED' : 'FAILED';
await this.writeRaw('\n'); await this.logStream.write('\n');
await this.writeRaw(`================================================================================\n`); await this.logStream.write(`================================================================================\n`);
await this.writeRaw(`Workflow ${status}\n`); await this.logStream.write(`Workflow ${status}\n`);
await this.writeRaw(`────────────────────────────────────────\n`); await this.logStream.write(`────────────────────────────────────────\n`);
await this.writeRaw(`Workflow ID: ${this.sessionMetadata.id}\n`); await this.logStream.write(`Workflow ID: ${this.sessionMetadata.id}\n`);
await this.writeRaw(`Status: ${summary.status}\n`); await this.logStream.write(`Status: ${summary.status}\n`);
await this.writeRaw(`Duration: ${formatDuration(summary.totalDurationMs)}\n`); await this.logStream.write(`Duration: ${formatDuration(summary.totalDurationMs)}\n`);
await this.writeRaw(`Total Cost: $${summary.totalCostUsd.toFixed(4)}\n`); await this.logStream.write(`Total Cost: $${summary.totalCostUsd.toFixed(4)}\n`);
await this.writeRaw(`Agents: ${summary.completedAgents.length} completed\n`); await this.logStream.write(`Agents: ${summary.completedAgents.length} completed\n`);
if (summary.error) { if (summary.error) {
await this.writeRaw(`Error: ${summary.error}\n`); await this.logStream.write(`Error: ${summary.error}\n`);
} }
await this.writeRaw(`\n`); await this.logStream.write(`\n`);
await this.writeRaw(`Agent Breakdown:\n`); await this.logStream.write(`Agent Breakdown:\n`);
for (const agentName of summary.completedAgents) { for (const agentName of summary.completedAgents) {
const metrics = summary.agentMetrics[agentName]; const metrics = summary.agentMetrics[agentName];
if (metrics) { if (metrics) {
const duration = formatDuration(metrics.durationMs); const duration = formatDuration(metrics.durationMs);
const cost = metrics.costUsd !== null ? `$${metrics.costUsd.toFixed(4)}` : 'N/A'; const cost = metrics.costUsd !== null ? `$${metrics.costUsd.toFixed(4)}` : 'N/A';
await this.writeRaw(` - ${agentName} (${duration}, ${cost})\n`); await this.logStream.write(` - ${agentName} (${duration}, ${cost})\n`);
} else { } else {
await this.writeRaw(` - ${agentName}\n`); await this.logStream.write(` - ${agentName}\n`);
} }
} }
await this.writeRaw(`================================================================================\n`); await this.logStream.write(`================================================================================\n`);
} }
/** /**
* Ensure initialized (helper for lazy initialization) * Ensure initialized (helper for lazy initialization)
*/ */
private async ensureInitialized(): Promise<void> { private async ensureInitialized(): Promise<void> {
if (!this.initialized) { if (!this.logStream.isOpen) {
await this.initialize(); await this.initialize();
} }
} }
@@ -368,15 +363,6 @@ export class WorkflowLogger {
* Close the log stream * Close the log stream
*/ */
async close(): Promise<void> { async close(): Promise<void> {
if (!this.initialized || !this.stream) { return this.logStream.close();
return;
}
return new Promise((resolve) => {
this.stream!.end(() => {
this.initialized = false;
resolve();
});
});
} }
} }
-59
View File
@@ -1,59 +0,0 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
import { fs, path } from 'zx';
interface ValidationResult {
valid: boolean;
error?: string;
path?: string;
}
// Helper function: Validate web URL
export function validateWebUrl(url: string): ValidationResult {
try {
const parsed = new URL(url);
if (!['http:', 'https:'].includes(parsed.protocol)) {
return { valid: false, error: 'Web URL must use HTTP or HTTPS protocol' };
}
if (!parsed.hostname) {
return { valid: false, error: 'Web URL must have a valid hostname' };
}
return { valid: true };
} catch {
return { valid: false, error: 'Invalid web URL format' };
}
}
// Helper function: Validate local repository path
export async function validateRepoPath(repoPath: string): Promise<ValidationResult> {
try {
// Check if path exists
if (!(await fs.pathExists(repoPath))) {
return { valid: false, error: 'Repository path does not exist' };
}
// Check if it's a directory
const stats = await fs.stat(repoPath);
if (!stats.isDirectory()) {
return { valid: false, error: 'Repository path must be a directory' };
}
// Check if it's readable
try {
await fs.access(repoPath, fs.constants.R_OK);
} catch {
return { valid: false, error: 'Repository path is not readable' };
}
// Convert to absolute path
const absolutePath = path.resolve(repoPath);
return { valid: true, path: absolutePath };
} catch (error) {
const errMsg = error instanceof Error ? error.message : String(error);
return { valid: false, error: `Invalid repository path: ${errMsg}` };
}
}
-49
View File
@@ -1,49 +0,0 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
import chalk from 'chalk';
import { displaySplashScreen } from '../splash-screen.js';
// Helper function: Display help information
export function showHelp(): void {
console.log(chalk.cyan.bold('AI Penetration Testing Agent'));
console.log(chalk.gray('Automated security assessment tool\n'));
console.log(chalk.yellow.bold('USAGE:'));
console.log(' shannon <WEB_URL> <REPO_PATH> [--config config.yaml] [--output /path/to/reports]\n');
console.log(chalk.yellow.bold('OPTIONS:'));
console.log(
' --config <file> YAML configuration file for authentication and testing parameters'
);
console.log(
' --output <path> Custom output directory for session folder (default: ./audit-logs/)'
);
console.log(
' --pipeline-testing Use minimal prompts for fast pipeline testing (creates minimal deliverables)'
);
console.log(
' --disable-loader Disable the animated progress loader (useful when logs interfere with spinner)'
);
console.log(' --help Show this help message\n');
console.log(chalk.yellow.bold('EXAMPLES:'));
console.log(' shannon "https://example.com" "/path/to/local/repo"');
console.log(' shannon "https://example.com" "/path/to/local/repo" --config auth.yaml');
console.log(' shannon "https://example.com" "/path/to/local/repo" --output /path/to/reports');
console.log(' shannon "https://example.com" "/path/to/local/repo" --pipeline-testing\n');
console.log(chalk.yellow.bold('REQUIREMENTS:'));
console.log(' • WEB_URL must start with http:// or https://');
console.log(' • REPO_PATH must be an accessible local directory');
console.log(' • Only test systems you own or have permission to test\n');
console.log(chalk.yellow.bold('ENVIRONMENT VARIABLES:'));
console.log(' PENTEST_MAX_RETRIES Number of retries for AI agents (default: 3)');
}
// Export the splash screen function for use in main
export { displaySplashScreen };
+311 -106
View File
@@ -7,13 +7,13 @@
import { createRequire } from 'module'; import { createRequire } from 'module';
import { fs } from 'zx'; import { fs } from 'zx';
import yaml from 'js-yaml'; import yaml from 'js-yaml';
import { Ajv, type ValidateFunction } from 'ajv'; import { Ajv, type ValidateFunction, type ErrorObject } from 'ajv';
import type { FormatsPlugin } from 'ajv-formats'; import type { FormatsPlugin } from 'ajv-formats';
import { PentestError } from './error-handling.js'; import { PentestError } from './services/error-handling.js';
import { ErrorCode } from './types/errors.js';
import type { import type {
Config, Config,
Rule, Rule,
Rules,
Authentication, Authentication,
DistributedConfig, DistributedConfig,
} from './types/config.js'; } from './types/config.js';
@@ -22,11 +22,9 @@ import type {
const require = createRequire(import.meta.url); const require = createRequire(import.meta.url);
const addFormats: FormatsPlugin = require('ajv-formats'); const addFormats: FormatsPlugin = require('ajv-formats');
// Initialize AJV with formats
const ajv = new Ajv({ allErrors: true, verbose: true }); const ajv = new Ajv({ allErrors: true, verbose: true });
addFormats(ajv); addFormats(ajv);
// Load JSON Schema
let configSchema: object; let configSchema: object;
let validateSchema: ValidateFunction; let validateSchema: ValidateFunction;
@@ -45,7 +43,6 @@ try {
); );
} }
// Security patterns to block
const DANGEROUS_PATTERNS: RegExp[] = [ const DANGEROUS_PATTERNS: RegExp[] = [
/\.\.\//, // Path traversal /\.\.\//, // Path traversal
/[<>]/, // HTML/XML injection /[<>]/, // HTML/XML injection
@@ -54,32 +51,171 @@ const DANGEROUS_PATTERNS: RegExp[] = [
/file:/i, // File URLs /file:/i, // File URLs
]; ];
// Parse and load YAML configuration file with enhanced safety /**
export const parseConfig = async (configPath: string): Promise<Config> => { * Format a single AJV error into a human-readable message.
try { * Translates AJV error keywords into plain English descriptions.
// File existence check */
if (!(await fs.pathExists(configPath))) { function formatAjvError(error: ErrorObject): string {
throw new Error(`Configuration file not found: ${configPath}`); const path = error.instancePath || 'root';
const params = error.params as Record<string, unknown>;
switch (error.keyword) {
case 'required': {
const missingProperty = params.missingProperty as string;
return `Missing required field: "${missingProperty}" at ${path || 'root'}`;
} }
// File size check (prevent extremely large files) case 'type': {
const stats = await fs.stat(configPath); const expectedType = params.type as string;
const maxFileSize = 1024 * 1024; // 1MB return `Invalid type at ${path}: expected ${expectedType}`;
if (stats.size > maxFileSize) { }
throw new Error(
`Configuration file too large: ${stats.size} bytes (maximum: ${maxFileSize} bytes)` case 'enum': {
const allowedValues = params.allowedValues as unknown[];
const formattedValues = allowedValues.map((v) => `"${v}"`).join(', ');
return `Invalid value at ${path}: must be one of [${formattedValues}]`;
}
case 'additionalProperties': {
const additionalProperty = params.additionalProperty as string;
return `Unknown field at ${path}: "${additionalProperty}" is not allowed`;
}
case 'minLength': {
const limit = params.limit as number;
return `Value at ${path} is too short: must have at least ${limit} character(s)`;
}
case 'maxLength': {
const limit = params.limit as number;
return `Value at ${path} is too long: must have at most ${limit} character(s)`;
}
case 'minimum': {
const limit = params.limit as number;
return `Value at ${path} is too small: must be >= ${limit}`;
}
case 'maximum': {
const limit = params.limit as number;
return `Value at ${path} is too large: must be <= ${limit}`;
}
case 'minItems': {
const limit = params.limit as number;
return `Array at ${path} has too few items: must have at least ${limit} item(s)`;
}
case 'maxItems': {
const limit = params.limit as number;
return `Array at ${path} has too many items: must have at most ${limit} item(s)`;
}
case 'pattern': {
const pattern = params.pattern as string;
return `Value at ${path} does not match required pattern: ${pattern}`;
}
case 'format': {
const format = params.format as string;
return `Value at ${path} must be a valid ${format}`;
}
case 'const': {
const allowedValue = params.allowedValue as unknown;
return `Value at ${path} must be exactly "${allowedValue}"`;
}
case 'oneOf': {
return `Value at ${path} must match exactly one schema (matched ${params.passingSchemas ?? 0})`;
}
case 'anyOf': {
return `Value at ${path} must match at least one of the allowed schemas`;
}
case 'not': {
return `Value at ${path} matches a schema it should not match`;
}
case 'if': {
return `Value at ${path} does not satisfy conditional schema requirements`;
}
case 'uniqueItems': {
const i = params.i as number;
const j = params.j as number;
return `Array at ${path} contains duplicate items at positions ${j} and ${i}`;
}
case 'propertyNames': {
const propertyName = params.propertyName as string;
return `Invalid property name at ${path}: "${propertyName}" does not match naming requirements`;
}
case 'dependencies':
case 'dependentRequired': {
const property = params.property as string;
const missingProperty = params.missingProperty as string;
return `Missing dependent field at ${path}: "${missingProperty}" is required when "${property}" is present`;
}
default: {
// Fallback for any unhandled keywords - use AJV's message if available
const message = error.message || `validation failed for keyword "${error.keyword}"`;
return `${path}: ${message}`;
}
}
}
/**
* Format all AJV errors into a list of human-readable messages.
* Returns an array of formatted error strings.
*/
function formatAjvErrors(errors: ErrorObject[]): string[] {
return errors.map(formatAjvError);
}
export const parseConfig = async (configPath: string): Promise<Config> => {
try {
// 1. Verify file exists
if (!(await fs.pathExists(configPath))) {
throw new PentestError(
`Configuration file not found: ${configPath}`,
'config',
false,
{ configPath },
ErrorCode.CONFIG_NOT_FOUND
); );
} }
// Read file content // 2. Check file size
const configContent = await fs.readFile(configPath, 'utf8'); const stats = await fs.stat(configPath);
const maxFileSize = 1024 * 1024; // 1MB
// Basic content validation if (stats.size > maxFileSize) {
if (!configContent.trim()) { throw new PentestError(
throw new Error('Configuration file is empty'); `Configuration file too large: ${stats.size} bytes (maximum: ${maxFileSize} bytes)`,
'config',
false,
{ configPath, fileSize: stats.size, maxFileSize },
ErrorCode.CONFIG_VALIDATION_FAILED
);
} }
// Parse YAML with safety options // 3. Read and check for empty content
const configContent = await fs.readFile(configPath, 'utf8');
if (!configContent.trim()) {
throw new PentestError(
'Configuration file is empty',
'config',
false,
{ configPath },
ErrorCode.CONFIG_VALIDATION_FAILED
);
}
// 4. Parse YAML with safe schema
let config: unknown; let config: unknown;
try { try {
config = yaml.load(configContent, { config = yaml.load(configContent, {
@@ -89,67 +225,82 @@ export const parseConfig = async (configPath: string): Promise<Config> => {
}); });
} catch (yamlError) { } catch (yamlError) {
const errMsg = yamlError instanceof Error ? yamlError.message : String(yamlError); const errMsg = yamlError instanceof Error ? yamlError.message : String(yamlError);
throw new Error(`YAML parsing failed: ${errMsg}`); throw new PentestError(
`YAML parsing failed: ${errMsg}`,
'config',
false,
{ configPath, originalError: errMsg },
ErrorCode.CONFIG_PARSE_ERROR
);
} }
// Additional safety check // 5. Guard against null/undefined parse result
if (config === null || config === undefined) { if (config === null || config === undefined) {
throw new Error('Configuration file resulted in null/undefined after parsing'); throw new PentestError(
'Configuration file resulted in null/undefined after parsing',
'config',
false,
{ configPath },
ErrorCode.CONFIG_PARSE_ERROR
);
} }
// Validate the configuration structure and content // 6. Validate schema, security rules, and return
validateConfig(config as Config); validateConfig(config as Config);
return config as Config; return config as Config;
} catch (error) { } catch (error) {
const errMsg = error instanceof Error ? error.message : String(error); // PentestError instances are already well-formatted, re-throw as-is
// Enhance error message with context if (error instanceof PentestError) {
if (
errMsg.startsWith('Configuration file not found') ||
errMsg.startsWith('YAML parsing failed') ||
errMsg.includes('must be') ||
errMsg.includes('exceeds maximum')
) {
// These are already well-formatted errors, re-throw as-is
throw error; throw error;
} else {
// Wrap other errors with context
throw new Error(`Failed to parse configuration file '${configPath}': ${errMsg}`);
} }
const errMsg = error instanceof Error ? error.message : String(error);
throw new PentestError(
`Failed to parse configuration file '${configPath}': ${errMsg}`,
'config',
false,
{ configPath, originalError: errMsg },
ErrorCode.CONFIG_PARSE_ERROR
);
} }
}; };
// Validate overall configuration structure using JSON Schema
const validateConfig = (config: Config): void => { const validateConfig = (config: Config): void => {
// Basic structure validation
if (!config || typeof config !== 'object') { if (!config || typeof config !== 'object') {
throw new Error('Configuration must be a valid object'); throw new PentestError(
'Configuration must be a valid object',
'config',
false,
{},
ErrorCode.CONFIG_VALIDATION_FAILED
);
} }
if (Array.isArray(config)) { if (Array.isArray(config)) {
throw new Error('Configuration must be an object, not an array'); throw new PentestError(
'Configuration must be an object, not an array',
'config',
false,
{},
ErrorCode.CONFIG_VALIDATION_FAILED
);
} }
// JSON Schema validation
const isValid = validateSchema(config); const isValid = validateSchema(config);
if (!isValid) { if (!isValid) {
const errors = validateSchema.errors || []; const errors = validateSchema.errors || [];
const errorMessages = errors.map((err) => { const errorMessages = formatAjvErrors(errors);
const path = err.instancePath || 'root'; throw new PentestError(
return `${path}: ${err.message}`; `Configuration validation failed:\n - ${errorMessages.join('\n - ')}`,
}); 'config',
throw new Error(`Configuration validation failed:\n - ${errorMessages.join('\n - ')}`); false,
{ validationErrors: errorMessages },
ErrorCode.CONFIG_VALIDATION_FAILED
);
} }
// Additional security validation
performSecurityValidation(config); performSecurityValidation(config);
// Warn if deprecated fields are used
if (config.login) {
console.warn('⚠️ The "login" section is deprecated. Please use "authentication" instead.');
}
// Ensure at least some configuration is provided
if (!config.rules && !config.authentication) { if (!config.rules && !config.authentication) {
console.warn( console.warn(
'⚠️ Configuration file contains no rules or authentication. The pentest will run without any scoping restrictions or login capabilities.' '⚠️ Configuration file contains no rules or authentication. The pentest will run without any scoping restrictions or login capabilities.'
@@ -161,35 +312,58 @@ const validateConfig = (config: Config): void => {
} }
}; };
// Perform additional security validation beyond JSON Schema
const performSecurityValidation = (config: Config): void => { const performSecurityValidation = (config: Config): void => {
// Validate authentication section for security issues
if (config.authentication) { if (config.authentication) {
const auth = config.authentication; const auth = config.authentication;
// Check for dangerous patterns in credentials // Check login_url for dangerous patterns (AJV's "uri" format allows javascript: per RFC 3986)
if (auth.credentials) { if (auth.login_url) {
for (const pattern of DANGEROUS_PATTERNS) { for (const pattern of DANGEROUS_PATTERNS) {
if (pattern.test(auth.credentials.username)) { if (pattern.test(auth.login_url)) {
throw new Error( throw new PentestError(
'authentication.credentials.username contains potentially dangerous pattern' `authentication.login_url contains potentially dangerous pattern: ${pattern.source}`,
); 'config',
} false,
if (pattern.test(auth.credentials.password)) { { field: 'login_url', pattern: pattern.source },
throw new Error( ErrorCode.CONFIG_VALIDATION_FAILED
'authentication.credentials.password contains potentially dangerous pattern' );
}
}
}
if (auth.credentials) {
for (const pattern of DANGEROUS_PATTERNS) {
if (pattern.test(auth.credentials.username)) {
throw new PentestError(
`authentication.credentials.username contains potentially dangerous pattern: ${pattern.source}`,
'config',
false,
{ field: 'credentials.username', pattern: pattern.source },
ErrorCode.CONFIG_VALIDATION_FAILED
);
}
if (pattern.test(auth.credentials.password)) {
throw new PentestError(
`authentication.credentials.password contains potentially dangerous pattern: ${pattern.source}`,
'config',
false,
{ field: 'credentials.password', pattern: pattern.source },
ErrorCode.CONFIG_VALIDATION_FAILED
); );
} }
} }
} }
// Check login flow for dangerous patterns
if (auth.login_flow) { if (auth.login_flow) {
auth.login_flow.forEach((step, index) => { auth.login_flow.forEach((step, index) => {
for (const pattern of DANGEROUS_PATTERNS) { for (const pattern of DANGEROUS_PATTERNS) {
if (pattern.test(step)) { if (pattern.test(step)) {
throw new Error( throw new PentestError(
`authentication.login_flow[${index}] contains potentially dangerous pattern: ${pattern.source}` `authentication.login_flow[${index}] contains potentially dangerous pattern: ${pattern.source}`,
'config',
false,
{ field: `login_flow[${index}]`, pattern: pattern.source },
ErrorCode.CONFIG_VALIDATION_FAILED
); );
} }
} }
@@ -197,48 +371,58 @@ const performSecurityValidation = (config: Config): void => {
} }
} }
// Validate rules section for security issues
if (config.rules) { if (config.rules) {
validateRulesSecurity(config.rules.avoid, 'avoid'); validateRulesSecurity(config.rules.avoid, 'avoid');
validateRulesSecurity(config.rules.focus, 'focus'); validateRulesSecurity(config.rules.focus, 'focus');
// Check for duplicate and conflicting rules
checkForDuplicates(config.rules.avoid || [], 'avoid'); checkForDuplicates(config.rules.avoid || [], 'avoid');
checkForDuplicates(config.rules.focus || [], 'focus'); checkForDuplicates(config.rules.focus || [], 'focus');
checkForConflicts(config.rules.avoid, config.rules.focus); checkForConflicts(config.rules.avoid, config.rules.focus);
} }
}; };
// Validate rules for security issues
const validateRulesSecurity = (rules: Rule[] | undefined, ruleType: string): void => { const validateRulesSecurity = (rules: Rule[] | undefined, ruleType: string): void => {
if (!rules) return; if (!rules) return;
rules.forEach((rule, index) => { rules.forEach((rule, index) => {
// Security validation
for (const pattern of DANGEROUS_PATTERNS) { for (const pattern of DANGEROUS_PATTERNS) {
if (pattern.test(rule.url_path)) { if (pattern.test(rule.url_path)) {
throw new Error( throw new PentestError(
`rules.${ruleType}[${index}].url_path contains potentially dangerous pattern: ${pattern.source}` `rules.${ruleType}[${index}].url_path contains potentially dangerous pattern: ${pattern.source}`,
'config',
false,
{ field: `rules.${ruleType}[${index}].url_path`, pattern: pattern.source },
ErrorCode.CONFIG_VALIDATION_FAILED
); );
} }
if (pattern.test(rule.description)) { if (pattern.test(rule.description)) {
throw new Error( throw new PentestError(
`rules.${ruleType}[${index}].description contains potentially dangerous pattern: ${pattern.source}` `rules.${ruleType}[${index}].description contains potentially dangerous pattern: ${pattern.source}`,
'config',
false,
{ field: `rules.${ruleType}[${index}].description`, pattern: pattern.source },
ErrorCode.CONFIG_VALIDATION_FAILED
); );
} }
} }
// Type-specific validation
validateRuleTypeSpecific(rule, ruleType, index); validateRuleTypeSpecific(rule, ruleType, index);
}); });
}; };
// Validate rule based on its specific type
const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number): void => { const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number): void => {
const field = `rules.${ruleType}[${index}].url_path`;
switch (rule.type) { switch (rule.type) {
case 'path': case 'path':
if (!rule.url_path.startsWith('/')) { if (!rule.url_path.startsWith('/')) {
throw new Error(`rules.${ruleType}[${index}].url_path for type 'path' must start with '/'`); throw new PentestError(
`${field} for type 'path' must start with '/'`,
'config',
false,
{ field, ruleType: rule.type },
ErrorCode.CONFIG_VALIDATION_FAILED
);
} }
break; break;
@@ -246,14 +430,22 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number):
case 'domain': case 'domain':
// Basic domain validation - no slashes allowed // Basic domain validation - no slashes allowed
if (rule.url_path.includes('/')) { if (rule.url_path.includes('/')) {
throw new Error( throw new PentestError(
`rules.${ruleType}[${index}].url_path for type '${rule.type}' cannot contain '/' characters` `${field} for type '${rule.type}' cannot contain '/' characters`,
'config',
false,
{ field, ruleType: rule.type },
ErrorCode.CONFIG_VALIDATION_FAILED
); );
} }
// Must contain at least one dot for domains // Must contain at least one dot for domains
if (rule.type === 'domain' && !rule.url_path.includes('.')) { if (rule.type === 'domain' && !rule.url_path.includes('.')) {
throw new Error( throw new PentestError(
`rules.${ruleType}[${index}].url_path for type 'domain' must be a valid domain name` `${field} for type 'domain' must be a valid domain name`,
'config',
false,
{ field, ruleType: rule.type },
ErrorCode.CONFIG_VALIDATION_FAILED
); );
} }
break; break;
@@ -261,62 +453,77 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number):
case 'method': { case 'method': {
const allowedMethods = ['GET', 'POST', 'PUT', 'DELETE', 'PATCH', 'HEAD', 'OPTIONS']; const allowedMethods = ['GET', 'POST', 'PUT', 'DELETE', 'PATCH', 'HEAD', 'OPTIONS'];
if (!allowedMethods.includes(rule.url_path.toUpperCase())) { if (!allowedMethods.includes(rule.url_path.toUpperCase())) {
throw new Error( throw new PentestError(
`rules.${ruleType}[${index}].url_path for type 'method' must be one of: ${allowedMethods.join(', ')}` `${field} for type 'method' must be one of: ${allowedMethods.join(', ')}`,
'config',
false,
{ field, ruleType: rule.type, allowedMethods },
ErrorCode.CONFIG_VALIDATION_FAILED
); );
} }
break; break;
} }
case 'header': case 'header':
// Header name validation (basic)
if (!rule.url_path.match(/^[a-zA-Z0-9\-_]+$/)) { if (!rule.url_path.match(/^[a-zA-Z0-9\-_]+$/)) {
throw new Error( throw new PentestError(
`rules.${ruleType}[${index}].url_path for type 'header' must be a valid header name (alphanumeric, hyphens, underscores only)` `${field} for type 'header' must be a valid header name (alphanumeric, hyphens, underscores only)`,
'config',
false,
{ field, ruleType: rule.type },
ErrorCode.CONFIG_VALIDATION_FAILED
); );
} }
break; break;
case 'parameter': case 'parameter':
// Parameter name validation (basic)
if (!rule.url_path.match(/^[a-zA-Z0-9\-_]+$/)) { if (!rule.url_path.match(/^[a-zA-Z0-9\-_]+$/)) {
throw new Error( throw new PentestError(
`rules.${ruleType}[${index}].url_path for type 'parameter' must be a valid parameter name (alphanumeric, hyphens, underscores only)` `${field} for type 'parameter' must be a valid parameter name (alphanumeric, hyphens, underscores only)`,
'config',
false,
{ field, ruleType: rule.type },
ErrorCode.CONFIG_VALIDATION_FAILED
); );
} }
break; break;
} }
}; };
// Check for duplicate rules
const checkForDuplicates = (rules: Rule[], ruleType: string): void => { const checkForDuplicates = (rules: Rule[], ruleType: string): void => {
const seen = new Set<string>(); const seen = new Set<string>();
rules.forEach((rule, index) => { rules.forEach((rule, index) => {
const key = `${rule.type}:${rule.url_path}`; const key = `${rule.type}:${rule.url_path}`;
if (seen.has(key)) { if (seen.has(key)) {
throw new Error( throw new PentestError(
`Duplicate rule found in rules.${ruleType}[${index}]: ${rule.type} '${rule.url_path}'` `Duplicate rule found in rules.${ruleType}[${index}]: ${rule.type} '${rule.url_path}'`,
'config',
false,
{ field: `rules.${ruleType}[${index}]`, ruleType: rule.type, urlPath: rule.url_path },
ErrorCode.CONFIG_VALIDATION_FAILED
); );
} }
seen.add(key); seen.add(key);
}); });
}; };
// Check for conflicting rules between avoid and focus
const checkForConflicts = (avoidRules: Rule[] = [], focusRules: Rule[] = []): void => { const checkForConflicts = (avoidRules: Rule[] = [], focusRules: Rule[] = []): void => {
const avoidSet = new Set(avoidRules.map((rule) => `${rule.type}:${rule.url_path}`)); const avoidSet = new Set(avoidRules.map((rule) => `${rule.type}:${rule.url_path}`));
focusRules.forEach((rule, index) => { focusRules.forEach((rule, index) => {
const key = `${rule.type}:${rule.url_path}`; const key = `${rule.type}:${rule.url_path}`;
if (avoidSet.has(key)) { if (avoidSet.has(key)) {
throw new Error( throw new PentestError(
`Conflicting rule found: rules.focus[${index}] '${rule.url_path}' also exists in rules.avoid` `Conflicting rule found: rules.focus[${index}] '${rule.url_path}' also exists in rules.avoid`,
'config',
false,
{ field: `rules.focus[${index}]`, urlPath: rule.url_path },
ErrorCode.CONFIG_VALIDATION_FAILED
); );
} }
}); });
}; };
// Sanitize and normalize rule values
const sanitizeRule = (rule: Rule): Rule => { const sanitizeRule = (rule: Rule): Rule => {
return { return {
description: rule.description.trim(), description: rule.description.trim(),
@@ -325,7 +532,6 @@ const sanitizeRule = (rule: Rule): Rule => {
}; };
}; };
// Distribute configuration sections to different agents with sanitization
export const distributeConfig = (config: Config | null): DistributedConfig => { export const distributeConfig = (config: Config | null): DistributedConfig => {
const avoid = config?.rules?.avoid || []; const avoid = config?.rules?.avoid || [];
const focus = config?.rules?.focus || []; const focus = config?.rules?.focus || [];
@@ -338,7 +544,6 @@ export const distributeConfig = (config: Config | null): DistributedConfig => {
}; };
}; };
// Sanitize and normalize authentication values
const sanitizeAuthentication = (auth: Authentication): Authentication => { const sanitizeAuthentication = (auth: Authentication): Authentication => {
return { return {
login_type: auth.login_type.toLowerCase().trim() as Authentication['login_type'], login_type: auth.login_type.toLowerCase().trim() as Authentication['login_type'],
@@ -348,7 +553,7 @@ const sanitizeAuthentication = (auth: Authentication): Authentication => {
password: auth.credentials.password, password: auth.credentials.password,
...(auth.credentials.totp_secret && { totp_secret: auth.credentials.totp_secret.trim() }), ...(auth.credentials.totp_secret && { totp_secret: auth.credentials.totp_secret.trim() }),
}, },
login_flow: auth.login_flow.map((step) => step.trim()), ...(auth.login_flow && { login_flow: auth.login_flow.map((step) => step.trim()) }),
success_condition: { success_condition: {
type: auth.success_condition.type.toLowerCase().trim() as Authentication['success_condition']['type'], type: auth.success_condition.type.toLowerCase().trim() as Authentication['success_condition']['type'],
value: auth.success_condition.value.trim(), value: auth.success_condition.value.trim(),
-110
View File
@@ -1,110 +0,0 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
import { path, fs } from 'zx';
import chalk from 'chalk';
import { validateQueueAndDeliverable, type VulnType } from './queue-validation.js';
import type { AgentName, PromptName, PlaywrightAgent, AgentValidator } from './types/agents.js';
// Factory function for vulnerability queue validators
function createVulnValidator(vulnType: VulnType): AgentValidator {
return async (sourceDir: string): Promise<boolean> => {
try {
await validateQueueAndDeliverable(vulnType, sourceDir);
return true;
} catch (error) {
const errMsg = error instanceof Error ? error.message : String(error);
console.log(chalk.yellow(` Queue validation failed for ${vulnType}: ${errMsg}`));
return false;
}
};
}
// Factory function for exploit deliverable validators
function createExploitValidator(vulnType: VulnType): AgentValidator {
return async (sourceDir: string): Promise<boolean> => {
const evidenceFile = path.join(sourceDir, 'deliverables', `${vulnType}_exploitation_evidence.md`);
return await fs.pathExists(evidenceFile);
};
}
// MCP agent mapping - assigns each agent to a specific Playwright instance to prevent conflicts
export const MCP_AGENT_MAPPING: Record<PromptName, PlaywrightAgent> = Object.freeze({
// Phase 1: Pre-reconnaissance (actual prompt name is 'pre-recon-code')
// NOTE: Pre-recon is pure code analysis and doesn't use browser automation,
// but assigning MCP server anyway for consistency and future extensibility
'pre-recon-code': 'playwright-agent1',
// Phase 2: Reconnaissance (actual prompt name is 'recon')
recon: 'playwright-agent2',
// Phase 3: Vulnerability Analysis (5 parallel agents)
'vuln-injection': 'playwright-agent1',
'vuln-xss': 'playwright-agent2',
'vuln-auth': 'playwright-agent3',
'vuln-ssrf': 'playwright-agent4',
'vuln-authz': 'playwright-agent5',
// Phase 4: Exploitation (5 parallel agents - same as vuln counterparts)
'exploit-injection': 'playwright-agent1',
'exploit-xss': 'playwright-agent2',
'exploit-auth': 'playwright-agent3',
'exploit-ssrf': 'playwright-agent4',
'exploit-authz': 'playwright-agent5',
// Phase 5: Reporting (actual prompt name is 'report-executive')
// NOTE: Report generation is typically text-based and doesn't use browser automation,
// but assigning MCP server anyway for potential screenshot inclusion or future needs
'report-executive': 'playwright-agent3',
});
// Direct agent-to-validator mapping - much simpler than pattern matching
export const AGENT_VALIDATORS: Record<AgentName, AgentValidator> = Object.freeze({
// Pre-reconnaissance agent - validates the code analysis deliverable created by the agent
'pre-recon': async (sourceDir: string): Promise<boolean> => {
const codeAnalysisFile = path.join(sourceDir, 'deliverables', 'code_analysis_deliverable.md');
return await fs.pathExists(codeAnalysisFile);
},
// Reconnaissance agent
recon: async (sourceDir: string): Promise<boolean> => {
const reconFile = path.join(sourceDir, 'deliverables', 'recon_deliverable.md');
return await fs.pathExists(reconFile);
},
// Vulnerability analysis agents
'injection-vuln': createVulnValidator('injection'),
'xss-vuln': createVulnValidator('xss'),
'auth-vuln': createVulnValidator('auth'),
'ssrf-vuln': createVulnValidator('ssrf'),
'authz-vuln': createVulnValidator('authz'),
// Exploitation agents
'injection-exploit': createExploitValidator('injection'),
'xss-exploit': createExploitValidator('xss'),
'auth-exploit': createExploitValidator('auth'),
'ssrf-exploit': createExploitValidator('ssrf'),
'authz-exploit': createExploitValidator('authz'),
// Executive report agent
report: async (sourceDir: string): Promise<boolean> => {
const reportFile = path.join(
sourceDir,
'deliverables',
'comprehensive_security_assessment_report.md'
);
const reportExists = await fs.pathExists(reportFile);
if (!reportExists) {
console.log(
chalk.red(` ❌ Missing required deliverable: comprehensive_security_assessment_report.md`)
);
}
return reportExists;
},
});
-381
View File
@@ -1,381 +0,0 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
import { $, fs, path } from 'zx';
import chalk from 'chalk';
import { Timer } from '../utils/metrics.js';
import { formatDuration } from '../utils/formatting.js';
import { handleToolError, PentestError } from '../error-handling.js';
import { AGENTS } from '../session-manager.js';
import { runClaudePromptWithRetry } from '../ai/claude-executor.js';
import { loadPrompt } from '../prompts/prompt-manager.js';
import type { ToolAvailability } from '../tool-checker.js';
import type { DistributedConfig } from '../types/config.js';
interface AgentResult {
success: boolean;
duration: number;
cost?: number | undefined;
error?: string | undefined;
retryable?: boolean | undefined;
}
type ToolName = 'nmap' | 'subfinder' | 'whatweb' | 'schemathesis';
type ToolStatus = 'success' | 'skipped' | 'error';
interface TerminalScanResult {
tool: ToolName;
output: string;
status: ToolStatus;
duration: number;
success?: boolean;
error?: Error;
}
interface PromptVariables {
webUrl: string;
repoPath: string;
}
// Discriminated union for Wave1 tool results - clearer than loose union types
type Wave1ToolResult =
| { kind: 'scan'; result: TerminalScanResult }
| { kind: 'skipped'; message: string }
| { kind: 'agent'; result: AgentResult };
interface Wave1Results {
nmap: Wave1ToolResult;
subfinder: Wave1ToolResult;
whatweb: Wave1ToolResult;
naabu?: Wave1ToolResult;
codeAnalysis: AgentResult;
}
interface Wave2Results {
schemathesis: TerminalScanResult;
}
interface PreReconResult {
duration: number;
report: string;
}
// Runs external security tools (nmap, whatweb, etc). Schemathesis requires schemas from code analysis.
async function runTerminalScan(tool: ToolName, target: string, sourceDir: string | null = null): Promise<TerminalScanResult> {
const timer = new Timer(`command-${tool}`);
try {
let result;
switch (tool) {
case 'nmap': {
console.log(chalk.blue(` 🔍 Running ${tool} scan...`));
const nmapHostname = new URL(target).hostname;
result = await $({ silent: true, stdio: ['ignore', 'pipe', 'ignore'] })`nmap -sV -sC ${nmapHostname}`;
const duration = timer.stop();
console.log(chalk.green(`${tool} completed in ${formatDuration(duration)}`));
return { tool: 'nmap', output: result.stdout, status: 'success', duration };
}
case 'subfinder': {
console.log(chalk.blue(` 🔍 Running ${tool} scan...`));
const hostname = new URL(target).hostname;
result = await $({ silent: true, stdio: ['ignore', 'pipe', 'ignore'] })`subfinder -d ${hostname}`;
const subfinderDuration = timer.stop();
console.log(chalk.green(`${tool} completed in ${formatDuration(subfinderDuration)}`));
return { tool: 'subfinder', output: result.stdout, status: 'success', duration: subfinderDuration };
}
case 'whatweb': {
console.log(chalk.blue(` 🔍 Running ${tool} scan...`));
const command = `whatweb --open-timeout 30 --read-timeout 60 ${target}`;
console.log(chalk.gray(` Command: ${command}`));
result = await $({ silent: true, stdio: ['ignore', 'pipe', 'ignore'] })`whatweb --open-timeout 30 --read-timeout 60 ${target}`;
const whatwebDuration = timer.stop();
console.log(chalk.green(`${tool} completed in ${formatDuration(whatwebDuration)}`));
return { tool: 'whatweb', output: result.stdout, status: 'success', duration: whatwebDuration };
}
case 'schemathesis': {
// Schemathesis depends on code analysis output - skip if no schemas found
const schemasDir = path.join(sourceDir || '.', 'outputs', 'schemas');
if (await fs.pathExists(schemasDir)) {
const schemaFiles = await fs.readdir(schemasDir) as string[];
const apiSchemas = schemaFiles.filter((f: string) => f.endsWith('.json') || f.endsWith('.yml') || f.endsWith('.yaml'));
if (apiSchemas.length > 0) {
console.log(chalk.blue(` 🔍 Running ${tool} scan...`));
const allResults: string[] = [];
// Run schemathesis on each schema file
for (const schemaFile of apiSchemas) {
const schemaPath = path.join(schemasDir, schemaFile);
try {
result = await $({ silent: true, stdio: ['ignore', 'pipe', 'ignore'] })`schemathesis run ${schemaPath} -u ${target} --max-failures=5`;
allResults.push(`Schema: ${schemaFile}\n${result.stdout}`);
} catch (schemaError) {
const err = schemaError as { stdout?: string; message?: string };
allResults.push(`Schema: ${schemaFile}\nError: ${err.stdout || err.message}`);
}
}
const schemaDuration = timer.stop();
console.log(chalk.green(`${tool} completed in ${formatDuration(schemaDuration)}`));
return { tool: 'schemathesis', output: allResults.join('\n\n'), status: 'success', duration: schemaDuration };
} else {
console.log(chalk.gray(` ⏭️ ${tool} - no API schemas found`));
return { tool: 'schemathesis', output: 'No API schemas found', status: 'skipped', duration: timer.stop() };
}
} else {
console.log(chalk.gray(` ⏭️ ${tool} - schemas directory not found`));
return { tool: 'schemathesis', output: 'Schemas directory not found', status: 'skipped', duration: timer.stop() };
}
}
default:
throw new Error(`Unknown tool: ${tool}`);
}
} catch (error) {
const duration = timer.stop();
console.log(chalk.red(`${tool} failed in ${formatDuration(duration)}`));
return handleToolError(tool, error as Error & { code?: string }) as TerminalScanResult;
}
}
// Wave 1: Initial footprinting + authentication
async function runPreReconWave1(
webUrl: string,
sourceDir: string,
variables: PromptVariables,
config: DistributedConfig | null,
pipelineTestingMode: boolean = false,
sessionId: string | null = null,
outputPath: string | null = null
): Promise<Wave1Results> {
console.log(chalk.blue(' → Launching Wave 1 operations in parallel...'));
const operations: Promise<TerminalScanResult | AgentResult>[] = [];
const skippedResult = (message: string): Wave1ToolResult => ({ kind: 'skipped', message });
// Skip external commands in pipeline testing mode
if (pipelineTestingMode) {
console.log(chalk.gray(' ⏭️ Skipping external tools (pipeline testing mode)'));
operations.push(
runClaudePromptWithRetry(
await loadPrompt('pre-recon-code', variables, null, pipelineTestingMode),
sourceDir,
'*',
'',
AGENTS['pre-recon'].displayName,
'pre-recon', // Agent name for snapshot creation
chalk.cyan,
{ id: sessionId!, webUrl, repoPath: sourceDir, ...(outputPath && { outputPath }) } // Session metadata for audit logging (STANDARD: use 'id' field)
)
);
const [codeAnalysis] = await Promise.all(operations);
return {
nmap: skippedResult('Skipped (pipeline testing mode)'),
subfinder: skippedResult('Skipped (pipeline testing mode)'),
whatweb: skippedResult('Skipped (pipeline testing mode)'),
codeAnalysis: codeAnalysis as AgentResult
};
} else {
operations.push(
runTerminalScan('nmap', webUrl),
runTerminalScan('subfinder', webUrl),
runTerminalScan('whatweb', webUrl),
runClaudePromptWithRetry(
await loadPrompt('pre-recon-code', variables, null, pipelineTestingMode),
sourceDir,
'*',
'',
AGENTS['pre-recon'].displayName,
'pre-recon', // Agent name for snapshot creation
chalk.cyan,
{ id: sessionId!, webUrl, repoPath: sourceDir, ...(outputPath && { outputPath }) } // Session metadata for audit logging (STANDARD: use 'id' field)
)
);
}
// Check if authentication config is provided for login instructions injection
console.log(chalk.gray(` → Config check: ${config ? 'present' : 'missing'}, Auth: ${config?.authentication ? 'present' : 'missing'}`));
const [nmap, subfinder, whatweb, codeAnalysis] = await Promise.all(operations);
return {
nmap: { kind: 'scan', result: nmap as TerminalScanResult },
subfinder: { kind: 'scan', result: subfinder as TerminalScanResult },
whatweb: { kind: 'scan', result: whatweb as TerminalScanResult },
codeAnalysis: codeAnalysis as AgentResult
};
}
// Wave 2: Additional scanning
async function runPreReconWave2(
webUrl: string,
sourceDir: string,
toolAvailability: ToolAvailability,
pipelineTestingMode: boolean = false
): Promise<Wave2Results> {
console.log(chalk.blue(' → Running Wave 2 additional scans in parallel...'));
// Skip external commands in pipeline testing mode
if (pipelineTestingMode) {
console.log(chalk.gray(' ⏭️ Skipping external tools (pipeline testing mode)'));
return {
schemathesis: { tool: 'schemathesis', output: 'Skipped (pipeline testing mode)', status: 'skipped', duration: 0 }
};
}
const operations: Promise<TerminalScanResult>[] = [];
// Parallel additional scans (only run if tools are available)
if (toolAvailability.schemathesis) {
operations.push(runTerminalScan('schemathesis', webUrl, sourceDir));
}
// If no tools are available, return early
if (operations.length === 0) {
console.log(chalk.gray(' ⏭️ No Wave 2 tools available'));
return {
schemathesis: { tool: 'schemathesis', output: 'Tool not available', status: 'skipped', duration: 0 }
};
}
// Run all operations in parallel
const results = await Promise.all(operations);
// Map results back to named properties
const response: Wave2Results = {
schemathesis: { tool: 'schemathesis', output: 'Tool not available', status: 'skipped', duration: 0 }
};
let resultIndex = 0;
if (toolAvailability.schemathesis) {
response.schemathesis = results[resultIndex++]!;
} else {
console.log(chalk.gray(' ⏭️ schemathesis - tool not available'));
}
return response;
}
// Extracts status and output from a Wave1 tool result
function extractResult(r: Wave1ToolResult | undefined): { status: string; output: string } {
if (!r) return { status: 'Skipped', output: 'No output' };
switch (r.kind) {
case 'scan':
return { status: r.result.status || 'Skipped', output: r.result.output || 'No output' };
case 'skipped':
return { status: 'Skipped', output: r.message };
case 'agent':
return { status: r.result.success ? 'success' : 'error', output: 'See agent output' };
}
}
// Combines tool outputs into single deliverable. Falls back to reference if file missing.
async function stitchPreReconOutputs(wave1: Wave1Results, additionalScans: TerminalScanResult[], sourceDir: string): Promise<string> {
// Try to read the code analysis deliverable file
let codeAnalysisContent = 'No analysis available';
try {
const codeAnalysisPath = path.join(sourceDir, 'deliverables', 'code_analysis_deliverable.md');
codeAnalysisContent = await fs.readFile(codeAnalysisPath, 'utf8');
} catch (error) {
const err = error as Error;
console.log(chalk.yellow(`⚠️ Could not read code analysis deliverable: ${err.message}`));
codeAnalysisContent = 'Analysis located in deliverables/code_analysis_deliverable.md';
}
// Build additional scans section
let additionalSection = '';
if (additionalScans.length > 0) {
additionalSection = '\n## Authenticated Scans\n';
for (const scan of additionalScans) {
additionalSection += `
### ${scan.tool.toUpperCase()}
Status: ${scan.status}
${scan.output}
`;
}
}
const nmap = extractResult(wave1.nmap);
const subfinder = extractResult(wave1.subfinder);
const whatweb = extractResult(wave1.whatweb);
const naabu = extractResult(wave1.naabu);
const report = `
# Pre-Reconnaissance Report
## Port Discovery (naabu)
Status: ${naabu.status}
${naabu.output}
## Network Scanning (nmap)
Status: ${nmap.status}
${nmap.output}
## Subdomain Discovery (subfinder)
Status: ${subfinder.status}
${subfinder.output}
## Technology Detection (whatweb)
Status: ${whatweb.status}
${whatweb.output}
## Code Analysis
${codeAnalysisContent}
${additionalSection}
---
Report generated at: ${new Date().toISOString()}
`.trim();
// Ensure deliverables directory exists in the cloned repo
try {
const deliverablePath = path.join(sourceDir, 'deliverables', 'pre_recon_deliverable.md');
await fs.ensureDir(path.join(sourceDir, 'deliverables'));
// Write to file in the cloned repository
await fs.writeFile(deliverablePath, report);
} catch (error) {
const err = error as Error;
throw new PentestError(
`Failed to write pre-recon report: ${err.message}`,
'filesystem',
false,
{ sourceDir, originalError: err.message }
);
}
return report;
}
// Main pre-recon phase execution function
export async function executePreReconPhase(
webUrl: string,
sourceDir: string,
variables: PromptVariables,
config: DistributedConfig | null,
toolAvailability: ToolAvailability,
pipelineTestingMode: boolean,
sessionId: string | null = null,
outputPath: string | null = null
): Promise<PreReconResult> {
console.log(chalk.yellow.bold('\n🔍 PHASE 1: PRE-RECONNAISSANCE'));
const timer = new Timer('phase-1-pre-recon');
console.log(chalk.yellow('Wave 1: Initial footprinting...'));
const wave1Results = await runPreReconWave1(webUrl, sourceDir, variables, config, pipelineTestingMode, sessionId, outputPath);
console.log(chalk.green(' ✅ Wave 1 operations completed'));
console.log(chalk.yellow('Wave 2: Additional scanning...'));
const wave2Results = await runPreReconWave2(webUrl, sourceDir, toolAvailability, pipelineTestingMode);
console.log(chalk.green(' ✅ Wave 2 operations completed'));
console.log(chalk.blue('📝 Stitching pre-recon outputs...'));
const additionalScans = wave2Results.schemathesis ? [wave2Results.schemathesis] : [];
const preReconReport = await stitchPreReconOutputs(wave1Results, additionalScans, sourceDir);
const duration = timer.stop();
console.log(chalk.green(`✅ Pre-reconnaissance complete in ${formatDuration(duration)}`));
console.log(chalk.green(`💾 Saved to ${sourceDir}/deliverables/pre_recon_deliverable.md`));
return { duration, report: preReconReport };
}
+2 -6
View File
@@ -4,8 +4,6 @@
// it under the terms of the GNU Affero General Public License version 3 // it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation. // as published by the Free Software Foundation.
import chalk from 'chalk';
export class ProgressIndicator { export class ProgressIndicator {
private message: string; private message: string;
private frames: string[] = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏']; private frames: string[] = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'];
@@ -25,9 +23,7 @@ export class ProgressIndicator {
this.interval = setInterval(() => { this.interval = setInterval(() => {
// Clear the line and write the spinner // Clear the line and write the spinner
process.stdout.write( process.stdout.write(`\r${this.frames[this.frameIndex]} ${this.message}`);
`\r${chalk.cyan(this.frames[this.frameIndex])} ${chalk.dim(this.message)}`
);
this.frameIndex = (this.frameIndex + 1) % this.frames.length; this.frameIndex = (this.frameIndex + 1) % this.frames.length;
}, 100); }, 100);
} }
@@ -47,6 +43,6 @@ export class ProgressIndicator {
finish(successMessage: string = 'Complete'): void { finish(successMessage: string = 'Complete'): void {
this.stop(); this.stop();
console.log(chalk.green(`${successMessage}`)); console.log(`${successMessage}`);
} }
} }
+291
View File
@@ -0,0 +1,291 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
/**
* Agent Execution Service
*
* Handles the full agent lifecycle:
* - Load config via ConfigLoaderService
* - Load prompt template using AGENTS[agentName].promptTemplate
* - Create git checkpoint
* - Start audit logging
* - Invoke Claude SDK via runClaudePrompt
* - Spending cap check using isSpendingCapBehavior
* - Handle failure (rollback, audit)
* - Validate output using AGENTS[agentName].deliverableFilename
* - Commit on success, log metrics
*
* No Temporal dependencies - pure domain logic.
*/
import type { ActivityLogger } from '../types/activity-logger.js';
import { Result, ok, err, isErr } from '../types/result.js';
import { ErrorCode, type PentestErrorType } from '../types/errors.js';
import { PentestError } from './error-handling.js';
import { isSpendingCapBehavior } from '../utils/billing-detection.js';
import { AGENTS } from '../session-manager.js';
import { loadPrompt } from './prompt-manager.js';
import {
runClaudePrompt,
validateAgentOutput,
type ClaudePromptResult,
} from '../ai/claude-executor.js';
import {
createGitCheckpoint,
commitGitSuccess,
rollbackGitWorkspace,
getGitCommitHash,
} from './git-manager.js';
import { AuditSession } from '../audit/index.js';
import type { AgentEndResult } from '../types/audit.js';
import type { AgentName } from '../types/agents.js';
import type { ConfigLoaderService } from './config-loader.js';
import type { AgentMetrics } from '../types/metrics.js';
/**
* Input for agent execution.
*/
export interface AgentExecutionInput {
webUrl: string;
repoPath: string;
configPath?: string | undefined;
pipelineTestingMode?: boolean | undefined;
attemptNumber: number;
}
interface FailAgentOpts {
attemptNumber: number;
result: ClaudePromptResult;
rollbackReason: string;
errorMessage: string;
errorCode: ErrorCode;
category: PentestErrorType;
retryable: boolean;
context: Record<string, unknown>;
}
/**
* Service for executing agents with full lifecycle management.
*
* NOTE: AuditSession is passed per-execution, NOT stored on the service.
* This is critical for parallel agent execution - each agent needs its own
* AuditSession instance because AuditSession uses instance state (currentAgentName)
* to track which agent is currently logging.
*/
export class AgentExecutionService {
private readonly configLoader: ConfigLoaderService;
constructor(configLoader: ConfigLoaderService) {
this.configLoader = configLoader;
}
/**
* Execute an agent with full lifecycle management.
*
* @param agentName - Name of the agent to execute
* @param input - Execution input parameters
* @param auditSession - Audit session for this specific agent execution
* @returns Result containing AgentEndResult on success, PentestError on failure
*/
async execute(
agentName: AgentName,
input: AgentExecutionInput,
auditSession: AuditSession,
logger: ActivityLogger
): Promise<Result<AgentEndResult, PentestError>> {
const { webUrl, repoPath, configPath, pipelineTestingMode = false, attemptNumber } = input;
// 1. Load config (if provided)
const configResult = await this.configLoader.loadOptional(configPath);
if (isErr(configResult)) {
return configResult;
}
const distributedConfig = configResult.value;
// 2. Load prompt
const promptTemplate = AGENTS[agentName].promptTemplate;
let prompt: string;
try {
prompt = await loadPrompt(
promptTemplate,
{ webUrl, repoPath },
distributedConfig,
pipelineTestingMode,
logger
);
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
return err(
new PentestError(
`Failed to load prompt for ${agentName}: ${errorMessage}`,
'prompt',
false,
{ agentName, promptTemplate, originalError: errorMessage },
ErrorCode.PROMPT_LOAD_FAILED
)
);
}
// 3. Create git checkpoint before execution
try {
await createGitCheckpoint(repoPath, agentName, attemptNumber, logger);
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
return err(
new PentestError(
`Failed to create git checkpoint for ${agentName}: ${errorMessage}`,
'filesystem',
false,
{ agentName, repoPath, originalError: errorMessage },
ErrorCode.GIT_CHECKPOINT_FAILED
)
);
}
// 4. Start audit logging
await auditSession.startAgent(agentName, prompt, attemptNumber);
// 5. Execute agent
const result: ClaudePromptResult = await runClaudePrompt(
prompt,
repoPath,
'', // context
agentName, // description
agentName,
auditSession,
logger
);
// 6. Spending cap check - defense-in-depth
if (result.success && (result.turns ?? 0) <= 2 && (result.cost || 0) === 0) {
const resultText = result.result || '';
if (isSpendingCapBehavior(result.turns ?? 0, result.cost || 0, resultText)) {
return this.failAgent(agentName, repoPath, auditSession, logger, {
attemptNumber, result,
rollbackReason: 'spending cap detected',
errorMessage: `Spending cap likely reached: ${resultText.slice(0, 100)}`,
errorCode: ErrorCode.SPENDING_CAP_REACHED,
category: 'billing',
retryable: true,
context: { agentName, turns: result.turns, cost: result.cost },
});
}
}
// 7. Handle execution failure
if (!result.success) {
return this.failAgent(agentName, repoPath, auditSession, logger, {
attemptNumber, result,
rollbackReason: 'execution failure',
errorMessage: result.error || 'Agent execution failed',
errorCode: ErrorCode.AGENT_EXECUTION_FAILED,
category: 'validation',
retryable: result.retryable ?? true,
context: { agentName, originalError: result.error },
});
}
// 8. Validate output
const validationPassed = await validateAgentOutput(result, agentName, repoPath, logger);
if (!validationPassed) {
return this.failAgent(agentName, repoPath, auditSession, logger, {
attemptNumber, result,
rollbackReason: 'validation failure',
errorMessage: `Agent ${agentName} failed output validation`,
errorCode: ErrorCode.OUTPUT_VALIDATION_FAILED,
category: 'validation',
retryable: true,
context: { agentName, deliverableFilename: AGENTS[agentName].deliverableFilename },
});
}
// 9. Success - commit deliverables, then capture checkpoint hash
await commitGitSuccess(repoPath, agentName, logger);
const commitHash = await getGitCommitHash(repoPath);
const endResult: AgentEndResult = {
attemptNumber,
duration_ms: result.duration,
cost_usd: result.cost || 0,
success: true,
model: result.model,
...(commitHash && { checkpoint: commitHash }),
};
await auditSession.endAgent(agentName, endResult);
return ok(endResult);
}
private async failAgent(
agentName: AgentName,
repoPath: string,
auditSession: AuditSession,
logger: ActivityLogger,
opts: FailAgentOpts
): Promise<Result<AgentEndResult, PentestError>> {
await rollbackGitWorkspace(repoPath, opts.rollbackReason, logger);
const endResult: AgentEndResult = {
attemptNumber: opts.attemptNumber,
duration_ms: opts.result.duration,
cost_usd: opts.result.cost || 0,
success: false,
model: opts.result.model,
error: opts.errorMessage,
};
await auditSession.endAgent(agentName, endResult);
return err(
new PentestError(
opts.errorMessage,
opts.category,
opts.retryable,
opts.context,
opts.errorCode
)
);
}
/**
* Execute an agent, throwing PentestError on failure.
*
* This is the preferred method for Temporal activities, which need to
* catch errors and classify them into ApplicationFailure. Avoids requiring
* activities to import Result utilities, keeping the boundary clean.
*
* @param agentName - Name of the agent to execute
* @param input - Execution input parameters
* @param auditSession - Audit session for this specific agent execution
* @returns AgentEndResult on success
* @throws PentestError on failure
*/
async executeOrThrow(
agentName: AgentName,
input: AgentExecutionInput,
auditSession: AuditSession,
logger: ActivityLogger
): Promise<AgentEndResult> {
const result = await this.execute(agentName, input, auditSession, logger);
if (isErr(result)) {
throw result.error;
}
return result.value;
}
/**
* Convert AgentEndResult to AgentMetrics for workflow state.
*/
static toMetrics(endResult: AgentEndResult, result: ClaudePromptResult): AgentMetrics {
return {
durationMs: endResult.duration_ms,
inputTokens: null, // Not currently exposed by SDK wrapper
outputTokens: null,
costUsd: endResult.cost_usd,
numTurns: result.turns ?? null,
model: result.model,
};
}
}
+75
View File
@@ -0,0 +1,75 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
/**
* Config Loader Service
*
* Wraps parseConfig + distributeConfig with Result type for explicit error handling.
* Pure service with no Temporal dependencies.
*/
import { parseConfig, distributeConfig } from '../config-parser.js';
import { PentestError } from './error-handling.js';
import { Result, ok, err } from '../types/result.js';
import { ErrorCode } from '../types/errors.js';
import type { DistributedConfig } from '../types/config.js';
/**
* Service for loading and distributing configuration files.
*
* Provides a Result-based API for explicit error handling,
* allowing callers to decide how to handle failures.
*/
export class ConfigLoaderService {
/**
* Load and distribute a configuration file.
*
* @param configPath - Path to the YAML configuration file
* @returns Result containing DistributedConfig on success, PentestError on failure
*/
async load(configPath: string): Promise<Result<DistributedConfig, PentestError>> {
try {
const config = await parseConfig(configPath);
const distributed = distributeConfig(config);
return ok(distributed);
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
// Determine appropriate error code based on error message
let errorCode = ErrorCode.CONFIG_PARSE_ERROR;
if (errorMessage.includes('not found') || errorMessage.includes('ENOENT')) {
errorCode = ErrorCode.CONFIG_NOT_FOUND;
} else if (errorMessage.includes('validation failed')) {
errorCode = ErrorCode.CONFIG_VALIDATION_FAILED;
}
return err(
new PentestError(
`Failed to load config ${configPath}: ${errorMessage}`,
'config',
false,
{ configPath, originalError: errorMessage },
errorCode
)
);
}
}
/**
* Load config if path is provided, otherwise return null config.
*
* @param configPath - Optional path to the YAML configuration file
* @returns Result containing DistributedConfig (or null) on success, PentestError on failure
*/
async loadOptional(
configPath: string | undefined
): Promise<Result<DistributedConfig | null, PentestError>> {
if (!configPath) {
return ok(null);
}
return this.load(configPath);
}
}
+117
View File
@@ -0,0 +1,117 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
/**
* Dependency Injection Container
*
* Provides a per-workflow container for service instances.
* Services are wired with explicit constructor injection.
*
* Usage:
* const container = getOrCreateContainer(workflowId, sessionMetadata);
* const auditSession = new AuditSession(sessionMetadata); // Per-agent
* await auditSession.initialize(workflowId);
* const result = await container.agentExecution.executeOrThrow(agentName, input, auditSession);
*/
import type { SessionMetadata } from '../audit/utils.js';
import { AgentExecutionService } from './agent-execution.js';
import { ConfigLoaderService } from './config-loader.js';
import { ExploitationCheckerService } from './exploitation-checker.js';
/**
* Dependencies required to create a Container.
*
* NOTE: AuditSession is NOT stored in the container.
* Each agent execution receives its own AuditSession instance
* because AuditSession uses instance state (currentAgentName) that
* cannot be shared across parallel agents.
*/
export interface ContainerDependencies {
readonly sessionMetadata: SessionMetadata;
}
/**
* DI Container for a single workflow.
*
* Holds all service instances for the workflow lifecycle.
* Services are instantiated once and reused across agent executions.
*
* NOTE: AuditSession is NOT stored here - it's passed per agent execution
* to support parallel agents each having their own logging context.
*/
export class Container {
readonly sessionMetadata: SessionMetadata;
readonly agentExecution: AgentExecutionService;
readonly configLoader: ConfigLoaderService;
readonly exploitationChecker: ExploitationCheckerService;
constructor(deps: ContainerDependencies) {
this.sessionMetadata = deps.sessionMetadata;
// Wire services with explicit constructor injection
this.configLoader = new ConfigLoaderService();
this.exploitationChecker = new ExploitationCheckerService();
this.agentExecution = new AgentExecutionService(this.configLoader);
}
}
/**
* Map of workflowId to Container instance.
* Each workflow gets its own container scoped to its lifecycle.
*/
const containers = new Map<string, Container>();
/**
* Get or create a Container for a workflow.
*
* If a container already exists for the workflowId, returns it.
* Otherwise, creates a new container with the provided dependencies.
*
* @param workflowId - Unique workflow identifier
* @param sessionMetadata - Session metadata for audit paths
* @returns Container instance for the workflow
*/
export function getOrCreateContainer(
workflowId: string,
sessionMetadata: SessionMetadata
): Container {
let container = containers.get(workflowId);
if (!container) {
container = new Container({ sessionMetadata });
containers.set(workflowId, container);
}
return container;
}
/**
* Remove a Container when a workflow completes.
*
* Should be called in logWorkflowComplete to clean up resources.
*
* @param workflowId - Unique workflow identifier
*/
export function removeContainer(workflowId: string): void {
containers.delete(workflowId);
}
/**
* Get an existing Container for a workflow, if one exists.
*
* Unlike getOrCreateContainer, this does NOT create a new container.
* Returns undefined if no container exists for the workflowId.
*
* Useful for lightweight activities that can benefit from an existing
* container but don't need to create one.
*
* @param workflowId - Unique workflow identifier
* @returns Container instance or undefined
*/
export function getContainer(workflowId: string): Container | undefined {
return containers.get(workflowId);
}
@@ -4,116 +4,44 @@
// it under the terms of the GNU Affero General Public License version 3 // it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation. // as published by the Free Software Foundation.
import chalk from 'chalk'; import {
import { fs, path } from 'zx'; ErrorCode,
import type { type PentestErrorType,
PentestErrorType, type PentestErrorContext,
PentestErrorContext, type PromptErrorResult,
LogEntry, } from '../types/errors.js';
ToolErrorResult, import {
PromptErrorResult, matchesBillingApiPattern,
} from './types/errors.js'; matchesBillingTextPattern,
} from '../utils/billing-detection.js';
// Temporal error classification for ApplicationFailure wrapping
export interface TemporalErrorClassification {
type: string;
retryable: boolean;
}
// Custom error class for pentest operations
export class PentestError extends Error { export class PentestError extends Error {
name = 'PentestError' as const; override name = 'PentestError' as const;
type: PentestErrorType; type: PentestErrorType;
retryable: boolean; retryable: boolean;
context: PentestErrorContext; context: PentestErrorContext;
timestamp: string; timestamp: string;
/** Optional specific error code for reliable classification */
code?: ErrorCode;
constructor( constructor(
message: string, message: string,
type: PentestErrorType, type: PentestErrorType,
retryable: boolean = false, retryable: boolean = false,
context: PentestErrorContext = {} context: PentestErrorContext = {},
code?: ErrorCode
) { ) {
super(message); super(message);
this.type = type; this.type = type;
this.retryable = retryable; this.retryable = retryable;
this.context = context; this.context = context;
this.timestamp = new Date().toISOString(); this.timestamp = new Date().toISOString();
} if (code !== undefined) {
} this.code = code;
// Centralized error logging function
export async function logError(
error: Error & { type?: PentestErrorType; retryable?: boolean; context?: PentestErrorContext },
contextMsg: string,
sourceDir: string | null = null
): Promise<LogEntry> {
const timestamp = new Date().toISOString();
const logEntry: LogEntry = {
timestamp,
context: contextMsg,
error: {
name: error.name || error.constructor.name,
message: error.message,
type: error.type || 'unknown',
retryable: error.retryable || false,
},
};
// Only add stack if it exists
if (error.stack) {
logEntry.error.stack = error.stack;
}
// Console logging with color
const prefix = error.retryable ? '⚠️' : '❌';
const color = error.retryable ? chalk.yellow : chalk.red;
console.log(color(`${prefix} ${contextMsg}:`));
console.log(color(` ${error.message}`));
if (error.context && Object.keys(error.context).length > 0) {
console.log(chalk.gray(` Context: ${JSON.stringify(error.context)}`));
}
// File logging (if source directory available)
if (sourceDir) {
try {
const logPath = path.join(sourceDir, 'error.log');
await fs.appendFile(logPath, JSON.stringify(logEntry) + '\n');
} catch (logErr) {
const errMsg = logErr instanceof Error ? logErr.message : String(logErr);
console.log(chalk.gray(` (Failed to write error log: ${errMsg})`));
} }
} }
return logEntry;
} }
// Handle tool execution errors
export function handleToolError(
toolName: string,
error: Error & { code?: string }
): ToolErrorResult {
const isRetryable =
error.code === 'ECONNRESET' ||
error.code === 'ETIMEDOUT' ||
error.code === 'ENOTFOUND';
return {
tool: toolName,
output: `Error: ${error.message}`,
status: 'error',
duration: 0,
success: false,
error: new PentestError(
`${toolName} execution failed: ${error.message}`,
'tool',
isRetryable,
{ toolName, originalError: error.message, errorCode: error.code }
),
};
}
// Handle prompt loading errors
export function handlePromptError( export function handlePromptError(
promptName: string, promptName: string,
error: Error error: Error
@@ -129,7 +57,6 @@ export function handlePromptError(
}; };
} }
// Patterns that indicate retryable errors
const RETRYABLE_PATTERNS = [ const RETRYABLE_PATTERNS = [
// Network and connection errors // Network and connection errors
'network', 'network',
@@ -173,28 +100,58 @@ const NON_RETRYABLE_PATTERNS = [
export function isRetryableError(error: Error): boolean { export function isRetryableError(error: Error): boolean {
const message = error.message.toLowerCase(); const message = error.message.toLowerCase();
// Check for explicit non-retryable patterns first
if (NON_RETRYABLE_PATTERNS.some((pattern) => message.includes(pattern))) { if (NON_RETRYABLE_PATTERNS.some((pattern) => message.includes(pattern))) {
return false; return false;
} }
// Check for retryable patterns
return RETRYABLE_PATTERNS.some((pattern) => message.includes(pattern)); return RETRYABLE_PATTERNS.some((pattern) => message.includes(pattern));
} }
// Rate limit errors get longer base delay (30s) vs standard exponential backoff (2s) /**
export function getRetryDelay(error: Error, attempt: number): number { * Classifies errors by ErrorCode for reliable, code-based classification.
const message = error.message.toLowerCase(); * Used when error is a PentestError with a specific ErrorCode.
*/
function classifyByErrorCode(
code: ErrorCode,
retryableFromError: boolean
): { type: string; retryable: boolean } {
switch (code) {
// Billing errors - retryable (wait for cap reset or credits added)
case ErrorCode.SPENDING_CAP_REACHED:
case ErrorCode.INSUFFICIENT_CREDITS:
return { type: 'BillingError', retryable: true };
// Rate limiting gets longer delays case ErrorCode.API_RATE_LIMITED:
if (message.includes('rate limit') || message.includes('429')) { return { type: 'RateLimitError', retryable: true };
return Math.min(30000 + attempt * 10000, 120000); // 30s, 40s, 50s, max 2min
// Config errors - non-retryable (need manual fix)
case ErrorCode.CONFIG_NOT_FOUND:
case ErrorCode.CONFIG_VALIDATION_FAILED:
case ErrorCode.CONFIG_PARSE_ERROR:
return { type: 'ConfigurationError', retryable: false };
// Prompt errors - non-retryable (need manual fix)
case ErrorCode.PROMPT_LOAD_FAILED:
return { type: 'ConfigurationError', retryable: false };
// Git errors - non-retryable (indicates workspace corruption)
case ErrorCode.GIT_CHECKPOINT_FAILED:
case ErrorCode.GIT_ROLLBACK_FAILED:
return { type: 'GitError', retryable: false };
// Validation errors - retryable (agent may succeed on retry)
case ErrorCode.OUTPUT_VALIDATION_FAILED:
case ErrorCode.DELIVERABLE_NOT_FOUND:
return { type: 'OutputValidationError', retryable: true };
// Agent execution - use the retryable flag from the error
case ErrorCode.AGENT_EXECUTION_FAILED:
return { type: 'AgentExecutionError', retryable: retryableFromError };
default:
// Unknown code - fall through to string matching
return { type: 'UnknownError', retryable: retryableFromError };
} }
// Exponential backoff with jitter for other retryable errors
const baseDelay = Math.pow(2, attempt) * 1000; // 2s, 4s, 8s
const jitter = Math.random() * 1000; // 0-1s random
return Math.min(baseDelay + jitter, 30000); // Max 30s
} }
/** /**
@@ -204,31 +161,25 @@ export function getRetryDelay(error: Error, attempt: number): number {
* Used by activities to wrap errors in ApplicationFailure: * Used by activities to wrap errors in ApplicationFailure:
* - Retryable errors: Temporal retries with configured backoff * - Retryable errors: Temporal retries with configured backoff
* - Non-retryable errors: Temporal fails immediately * - Non-retryable errors: Temporal fails immediately
*
* Classification priority:
* 1. If error is PentestError with ErrorCode, classify by code (reliable)
* 2. Fall through to string matching for external errors (SDK, network, etc.)
*/ */
export function classifyErrorForTemporal(error: unknown): TemporalErrorClassification { export function classifyErrorForTemporal(error: unknown): { type: string; retryable: boolean } {
// === CODE-BASED CLASSIFICATION (Preferred for internal errors) ===
if (error instanceof PentestError && error.code !== undefined) {
return classifyByErrorCode(error.code, error.retryable);
}
// === STRING-BASED CLASSIFICATION (Fallback for external errors) ===
const message = (error instanceof Error ? error.message : String(error)).toLowerCase(); const message = (error instanceof Error ? error.message : String(error)).toLowerCase();
// === BILLING ERRORS (Retryable with long backoff) === // === BILLING ERRORS (Retryable with long backoff) ===
// Anthropic returns billing as 400 invalid_request_error // Anthropic returns billing as 400 invalid_request_error
// Human can add credits OR wait for spending cap to reset (5-30 min backoff) // Human can add credits OR wait for spending cap to reset (5-30 min backoff)
if ( // Check both API patterns and text patterns for comprehensive detection
message.includes('billing_error') || if (matchesBillingApiPattern(message) || matchesBillingTextPattern(message)) {
message.includes('credit balance is too low') ||
message.includes('insufficient credits') ||
message.includes('usage is blocked due to insufficient credits') ||
message.includes('please visit plans & billing') ||
message.includes('please visit plans and billing') ||
message.includes('usage limit reached') ||
message.includes('quota exceeded') ||
message.includes('daily rate limit') ||
message.includes('limit will reset') ||
// Claude Code spending cap patterns (returns short message instead of error)
message.includes('spending cap') ||
message.includes('spending limit') ||
message.includes('cap reached') ||
message.includes('budget exceeded') ||
message.includes('billing limit reached')
) {
return { type: 'BillingError', retryable: true }; return { type: 'BillingError', retryable: true };
} }
+71
View File
@@ -0,0 +1,71 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
/**
* Exploitation Checker Service
*
* Pure domain logic for determining whether exploitation should run.
* Reads queue file, parses JSON, returns decision.
*
* No Temporal dependencies - this is pure business logic.
*/
import {
validateQueueSafe,
type VulnType,
type ExploitationDecision,
} from './queue-validation.js';
import { isOk } from '../types/result.js';
import type { ActivityLogger } from '../types/activity-logger.js';
/**
* Service for checking exploitation queue decisions.
*
* Determines whether an exploit agent should run based on
* the vulnerability analysis deliverables and queue files.
*/
export class ExploitationCheckerService {
/**
* Check if exploitation should run for a given vulnerability type.
*
* Reads the vulnerability queue file and returns the decision.
* This is pure domain logic - reads queue file, parses JSON, returns decision.
*
* @param vulnType - Type of vulnerability (injection, xss, auth, ssrf, authz)
* @param repoPath - Path to the repository containing deliverables
* @param logger - ActivityLogger for structured logging
* @returns ExploitationDecision indicating whether to exploit
* @throws PentestError if validation fails and is retryable
*/
async checkQueue(vulnType: VulnType, repoPath: string, logger: ActivityLogger): Promise<ExploitationDecision> {
const result = await validateQueueSafe(vulnType, repoPath);
if (isOk(result)) {
const decision = result.value;
logger.info(
`${vulnType}: ${decision.shouldExploit ? `${decision.vulnerabilityCount} vulnerabilities found` : 'no vulnerabilities, skipping exploitation'}`
);
return decision;
}
// Validation failed - check if we should retry or skip
const error = result.error;
if (error.retryable) {
// Re-throw retryable errors so caller can handle retry
logger.warn(`${vulnType}: ${error.message} (retryable)`);
throw error;
}
// Non-retryable error - skip exploitation gracefully
logger.warn(`${vulnType}: ${error.message}, skipping exploitation`);
return {
shouldExploit: false,
shouldRetry: false,
vulnerabilityCount: 0,
vulnType,
};
}
}
@@ -5,7 +5,9 @@
// as published by the Free Software Foundation. // as published by the Free Software Foundation.
import { $ } from 'zx'; import { $ } from 'zx';
import chalk from 'chalk'; import { PentestError } from './error-handling.js';
import { ErrorCode } from '../types/errors.js';
import type { ActivityLogger } from '../types/activity-logger.js';
/** /**
* Check if a directory is a git repository. * Check if a directory is a git repository.
@@ -51,17 +53,19 @@ function logChangeSummary(
changes: string[], changes: string[],
messageWithChanges: string, messageWithChanges: string,
messageWithoutChanges: string, messageWithoutChanges: string,
color: typeof chalk.green, logger: ActivityLogger,
level: 'info' | 'warn' = 'info',
maxToShow: number = 5 maxToShow: number = 5
): void { ): void {
if (changes.length > 0) { if (changes.length > 0) {
console.log(color(messageWithChanges.replace('{count}', String(changes.length)))); const msg = messageWithChanges.replace('{count}', String(changes.length));
changes.slice(0, maxToShow).forEach((change) => console.log(chalk.gray(` ${change}`))); const fileList = changes.slice(0, maxToShow).map((c) => ` ${c}`).join(', ');
if (changes.length > maxToShow) { const suffix = changes.length > maxToShow
console.log(chalk.gray(` ... and ${changes.length - maxToShow} more files`)); ? ` ... and ${changes.length - maxToShow} more files`
} : '';
logger[level](`${msg} ${fileList}${suffix}`);
} else { } else {
console.log(color(messageWithoutChanges)); logger[level](messageWithoutChanges);
} }
} }
@@ -136,10 +140,10 @@ export async function executeGitCommandWithRetry(
if (isGitLockError(errMsg) && attempt < maxRetries) { if (isGitLockError(errMsg) && attempt < maxRetries) {
const delay = Math.pow(2, attempt - 1) * 1000; const delay = Math.pow(2, attempt - 1) * 1000;
console.log( // executeGitCommandWithRetry is also called outside activity context
chalk.yellow( // (e.g., from resume logic), so we use console.warn as a fallback here
` ⚠️ Git lock conflict during ${description} (attempt ${attempt}/${maxRetries}). Retrying in ${delay}ms...` console.warn(
) `Git lock conflict during ${description} (attempt ${attempt}/${maxRetries}). Retrying in ${delay}ms...`
); );
await new Promise((resolve) => setTimeout(resolve, delay)); await new Promise((resolve) => setTimeout(resolve, delay));
continue; continue;
@@ -148,7 +152,13 @@ export async function executeGitCommandWithRetry(
throw error; throw error;
} }
} }
throw new Error(`Git command failed after ${maxRetries} retries`); throw new PentestError(
`Git command failed after ${maxRetries} retries`,
'filesystem',
true, // Retryable - transient git lock issues
{ maxRetries, description },
ErrorCode.GIT_CHECKPOINT_FAILED
);
} finally { } finally {
gitSemaphore.release(); gitSemaphore.release();
} }
@@ -157,15 +167,16 @@ export async function executeGitCommandWithRetry(
// Two-phase reset: hard reset (tracked files) + clean (untracked files) // Two-phase reset: hard reset (tracked files) + clean (untracked files)
export async function rollbackGitWorkspace( export async function rollbackGitWorkspace(
sourceDir: string, sourceDir: string,
reason: string = 'retry preparation' reason: string = 'retry preparation',
logger: ActivityLogger
): Promise<GitOperationResult> { ): Promise<GitOperationResult> {
// Skip git operations if not a git repository // Skip git operations if not a git repository
if (!(await isGitRepository(sourceDir))) { if (!(await isGitRepository(sourceDir))) {
console.log(chalk.gray(` ⏭️ Skipping git rollback (not a git repository)`)); logger.info('Skipping git rollback (not a git repository)');
return { success: true }; return { success: true };
} }
console.log(chalk.yellow(` 🔄 Rolling back workspace for ${reason}`)); logger.info(`Rolling back workspace for ${reason}`);
try { try {
const changes = await getChangedFiles(sourceDir, 'status check for rollback'); const changes = await getChangedFiles(sourceDir, 'status check for rollback');
@@ -182,16 +193,26 @@ export async function rollbackGitWorkspace(
logChangeSummary( logChangeSummary(
changes, changes,
'Rollback completed - removed {count} contaminated changes:', 'Rollback completed - removed {count} contaminated changes:',
'Rollback completed - no changes to remove', 'Rollback completed - no changes to remove',
chalk.yellow, logger,
'info',
3 3
); );
return { success: true }; return { success: true };
} catch (error) { } catch (error) {
const result = toErrorResult(error); const errMsg = error instanceof Error ? error.message : String(error);
console.log(chalk.red(`Rollback failed after retries: ${result.error?.message}`)); logger.error(`Rollback failed after retries: ${errMsg}`);
return result; return {
success: false,
error: new PentestError(
`Git rollback failed: ${errMsg}`,
'filesystem',
false, // Non-retryable - rollback is best-effort cleanup
{ sourceDir, reason },
ErrorCode.GIT_ROLLBACK_FAILED
),
};
} }
} }
@@ -199,29 +220,30 @@ export async function rollbackGitWorkspace(
export async function createGitCheckpoint( export async function createGitCheckpoint(
sourceDir: string, sourceDir: string,
description: string, description: string,
attempt: number attempt: number,
logger: ActivityLogger
): Promise<GitOperationResult> { ): Promise<GitOperationResult> {
// Skip git operations if not a git repository // Skip git operations if not a git repository
if (!(await isGitRepository(sourceDir))) { if (!(await isGitRepository(sourceDir))) {
console.log(chalk.gray(` ⏭️ Skipping git checkpoint (not a git repository)`)); logger.info('Skipping git checkpoint (not a git repository)');
return { success: true }; return { success: true };
} }
console.log(chalk.blue(` 📍 Creating checkpoint for ${description} (attempt ${attempt})`)); logger.info(`Creating checkpoint for ${description} (attempt ${attempt})`);
try { try {
// First attempt: preserve existing deliverables. Retries: clean workspace to prevent pollution // 1. On retries, clean workspace to prevent pollution from previous attempt
if (attempt > 1) { if (attempt > 1) {
const cleanResult = await rollbackGitWorkspace(sourceDir, `${description} (retry cleanup)`); const cleanResult = await rollbackGitWorkspace(sourceDir, `${description} (retry cleanup)`, logger);
if (!cleanResult.success) { if (!cleanResult.success) {
console.log( logger.warn(`Workspace cleanup failed, continuing anyway: ${cleanResult.error?.message}`);
chalk.yellow(` ⚠️ Workspace cleanup failed, continuing anyway: ${cleanResult.error?.message}`)
);
} }
} }
// 2. Detect existing changes
const changes = await getChangedFiles(sourceDir, 'status check'); const changes = await getChangedFiles(sourceDir, 'status check');
const hasChanges = changes.length > 0; const hasChanges = changes.length > 0;
// 3. Stage and commit checkpoint
await executeGitCommandWithRetry(['git', 'add', '-A'], sourceDir, 'staging changes'); await executeGitCommandWithRetry(['git', 'add', '-A'], sourceDir, 'staging changes');
await executeGitCommandWithRetry( await executeGitCommandWithRetry(
['git', 'commit', '-m', `📍 Checkpoint: ${description} (attempt ${attempt})`, '--allow-empty'], ['git', 'commit', '-m', `📍 Checkpoint: ${description} (attempt ${attempt})`, '--allow-empty'],
@@ -229,30 +251,32 @@ export async function createGitCheckpoint(
'creating commit' 'creating commit'
); );
// 4. Log result
if (hasChanges) { if (hasChanges) {
console.log(chalk.blue(`Checkpoint created with uncommitted changes staged`)); logger.info('Checkpoint created with uncommitted changes staged');
} else { } else {
console.log(chalk.blue(`Empty checkpoint created (no workspace changes)`)); logger.info('Empty checkpoint created (no workspace changes)');
} }
return { success: true }; return { success: true };
} catch (error) { } catch (error) {
const result = toErrorResult(error); const result = toErrorResult(error);
console.log(chalk.yellow(` ⚠️ Checkpoint creation failed after retries: ${result.error?.message}`)); logger.warn(`Checkpoint creation failed after retries: ${result.error?.message}`);
return result; return result;
} }
} }
export async function commitGitSuccess( export async function commitGitSuccess(
sourceDir: string, sourceDir: string,
description: string description: string,
logger: ActivityLogger
): Promise<GitOperationResult> { ): Promise<GitOperationResult> {
// Skip git operations if not a git repository // Skip git operations if not a git repository
if (!(await isGitRepository(sourceDir))) { if (!(await isGitRepository(sourceDir))) {
console.log(chalk.gray(` ⏭️ Skipping git commit (not a git repository)`)); logger.info('Skipping git commit (not a git repository)');
return { success: true }; return { success: true };
} }
console.log(chalk.green(` 💾 Committing successful results for ${description}`)); logger.info(`Committing successful results for ${description}`);
try { try {
const changes = await getChangedFiles(sourceDir, 'status check for success commit'); const changes = await getChangedFiles(sourceDir, 'status check for success commit');
@@ -269,15 +293,14 @@ export async function commitGitSuccess(
logChangeSummary( logChangeSummary(
changes, changes,
'Success commit created with {count} file changes:', 'Success commit created with {count} file changes:',
'Empty success commit created (agent made no file changes)', 'Empty success commit created (agent made no file changes)',
chalk.green, logger
5
); );
return { success: true }; return { success: true };
} catch (error) { } catch (error) {
const result = toErrorResult(error); const result = toErrorResult(error);
console.log(chalk.yellow(` ⚠️ Success commit failed after retries: ${result.error?.message}`)); logger.warn(`Success commit failed after retries: ${result.error?.message}`);
return result; return result;
} }
} }
+23
View File
@@ -0,0 +1,23 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
/**
* Services Module
*
* Exports DI container and service classes for Shannon agent execution.
* Services are pure domain logic with no Temporal dependencies.
*/
export { Container, getOrCreateContainer, removeContainer } from './container.js';
export type { ContainerDependencies } from './container.js';
export { ConfigLoaderService } from './config-loader.js';
export { ExploitationCheckerService } from './exploitation-checker.js';
export { AgentExecutionService } from './agent-execution.js';
export type { AgentExecutionInput } from './agent-execution.js';
export { assembleFinalReport, injectModelIntoReport } from './reporting.js';
export { loadPrompt } from './prompt-manager.js';
@@ -5,10 +5,10 @@
// as published by the Free Software Foundation. // as published by the Free Software Foundation.
import { fs, path } from 'zx'; import { fs, path } from 'zx';
import chalk from 'chalk'; import { PentestError, handlePromptError } from './error-handling.js';
import { PentestError, handlePromptError } from '../error-handling.js'; import { MCP_AGENT_MAPPING } from '../session-manager.js';
import { MCP_AGENT_MAPPING } from '../constants.js';
import type { Authentication, DistributedConfig } from '../types/config.js'; import type { Authentication, DistributedConfig } from '../types/config.js';
import type { ActivityLogger } from '../types/activity-logger.js';
interface PromptVariables { interface PromptVariables {
webUrl: string; webUrl: string;
@@ -22,9 +22,9 @@ interface IncludeReplacement {
} }
// Pure function: Build complete login instructions from config // Pure function: Build complete login instructions from config
async function buildLoginInstructions(authentication: Authentication): Promise<string> { async function buildLoginInstructions(authentication: Authentication, logger: ActivityLogger): Promise<string> {
try { try {
// Load the login instructions template // 1. Load the login instructions template
const loginInstructionsPath = path.join(import.meta.dirname, '..', '..', 'prompts', 'shared', 'login-instructions.txt'); const loginInstructionsPath = path.join(import.meta.dirname, '..', '..', 'prompts', 'shared', 'login-instructions.txt');
if (!await fs.pathExists(loginInstructionsPath)) { if (!await fs.pathExists(loginInstructionsPath)) {
@@ -38,37 +38,33 @@ async function buildLoginInstructions(authentication: Authentication): Promise<s
const fullTemplate = await fs.readFile(loginInstructionsPath, 'utf8'); const fullTemplate = await fs.readFile(loginInstructionsPath, 'utf8');
// Helper function to extract sections based on markers
const getSection = (content: string, sectionName: string): string => { const getSection = (content: string, sectionName: string): string => {
const regex = new RegExp(`<!-- BEGIN:${sectionName} -->([\\s\\S]*?)<!-- END:${sectionName} -->`, 'g'); const regex = new RegExp(`<!-- BEGIN:${sectionName} -->([\\s\\S]*?)<!-- END:${sectionName} -->`, 'g');
const match = regex.exec(content); const match = regex.exec(content);
return match ? match[1]!.trim() : ''; return match ? match[1]!.trim() : '';
}; };
// Extract sections based on login type // 2. Extract sections based on login type
const loginType = authentication.login_type?.toUpperCase(); const loginType = authentication.login_type?.toUpperCase();
let loginInstructions = ''; let loginInstructions = '';
// Build instructions with only relevant sections
const commonSection = getSection(fullTemplate, 'COMMON'); const commonSection = getSection(fullTemplate, 'COMMON');
const authSection = loginType ? getSection(fullTemplate, loginType) : ''; // FORM or SSO const authSection = loginType ? getSection(fullTemplate, loginType) : ''; // FORM or SSO
const verificationSection = getSection(fullTemplate, 'VERIFICATION'); const verificationSection = getSection(fullTemplate, 'VERIFICATION');
// Fallback to full template if markers are missing (backward compatibility) // 3. Assemble instructions from sections (fallback to full template if markers missing)
if (!commonSection && !authSection && !verificationSection) { if (!commonSection && !authSection && !verificationSection) {
console.log(chalk.yellow('⚠️ Section markers not found, using full login instructions template')); logger.warn('Section markers not found, using full login instructions template');
loginInstructions = fullTemplate; loginInstructions = fullTemplate;
} else { } else {
// Combine relevant sections
loginInstructions = [commonSection, authSection, verificationSection] loginInstructions = [commonSection, authSection, verificationSection]
.filter(section => section) // Remove empty sections .filter(section => section)
.join('\n\n'); .join('\n\n');
} }
// Replace the user instructions placeholder with the login flow from config // 4. Interpolate login flow and credential placeholders
let userInstructions = (authentication.login_flow ?? []).join('\n'); let userInstructions = (authentication.login_flow ?? []).join('\n');
// Replace credential placeholders within the user instructions
if (authentication.credentials) { if (authentication.credentials) {
if (authentication.credentials.username) { if (authentication.credentials.username) {
userInstructions = userInstructions.replace(/\$username/g, authentication.credentials.username); userInstructions = userInstructions.replace(/\$username/g, authentication.credentials.username);
@@ -83,7 +79,7 @@ async function buildLoginInstructions(authentication: Authentication): Promise<s
loginInstructions = loginInstructions.replace(/{{user_instructions}}/g, userInstructions); loginInstructions = loginInstructions.replace(/{{user_instructions}}/g, userInstructions);
// Replace TOTP secret placeholder if present in template // 5. Replace TOTP secret placeholder if present in template
if (authentication.credentials?.totp_secret) { if (authentication.credentials?.totp_secret) {
loginInstructions = loginInstructions.replace(/{{totp_secret}}/g, authentication.credentials.totp_secret); loginInstructions = loginInstructions.replace(/{{totp_secret}}/g, authentication.credentials.totp_secret);
} }
@@ -128,7 +124,8 @@ async function processIncludes(content: string, baseDir: string): Promise<string
async function interpolateVariables( async function interpolateVariables(
template: string, template: string,
variables: PromptVariables, variables: PromptVariables,
config: DistributedConfig | null = null config: DistributedConfig | null = null,
logger: ActivityLogger
): Promise<string> { ): Promise<string> {
try { try {
if (!template || typeof template !== 'string') { if (!template || typeof template !== 'string') {
@@ -174,7 +171,7 @@ async function interpolateVariables(
// Extract and inject login instructions from config // Extract and inject login instructions from config
if (config.authentication?.login_flow) { if (config.authentication?.login_flow) {
const loginInstructions = await buildLoginInstructions(config.authentication); const loginInstructions = await buildLoginInstructions(config.authentication, logger);
result = result.replace(/{{LOGIN_INSTRUCTIONS}}/g, loginInstructions); result = result.replace(/{{LOGIN_INSTRUCTIONS}}/g, loginInstructions);
} else { } else {
result = result.replace(/{{LOGIN_INSTRUCTIONS}}/g, ''); result = result.replace(/{{LOGIN_INSTRUCTIONS}}/g, '');
@@ -189,7 +186,7 @@ async function interpolateVariables(
// Validate that all placeholders have been replaced (excluding instructional text) // Validate that all placeholders have been replaced (excluding instructional text)
const remainingPlaceholders = result.match(/\{\{[^}]+\}\}/g); const remainingPlaceholders = result.match(/\{\{[^}]+\}\}/g);
if (remainingPlaceholders) { if (remainingPlaceholders) {
console.log(chalk.yellow(`⚠️ Warning: Found unresolved placeholders in prompt: ${remainingPlaceholders.join(', ')}`)); logger.warn(`Found unresolved placeholders in prompt: ${remainingPlaceholders.join(', ')}`);
} }
return result; return result;
@@ -212,20 +209,19 @@ export async function loadPrompt(
promptName: string, promptName: string,
variables: PromptVariables, variables: PromptVariables,
config: DistributedConfig | null = null, config: DistributedConfig | null = null,
pipelineTestingMode: boolean = false pipelineTestingMode: boolean = false,
logger: ActivityLogger
): Promise<string> { ): Promise<string> {
try { try {
// Use pipeline testing prompts if pipeline testing mode is enabled // 1. Resolve prompt file path
const baseDir = pipelineTestingMode ? 'prompts/pipeline-testing' : 'prompts'; const baseDir = pipelineTestingMode ? 'prompts/pipeline-testing' : 'prompts';
const promptsDir = path.join(import.meta.dirname, '..', '..', baseDir); const promptsDir = path.join(import.meta.dirname, '..', '..', baseDir);
const promptPath = path.join(promptsDir, `${promptName}.txt`); const promptPath = path.join(promptsDir, `${promptName}.txt`);
// Debug message for pipeline testing mode
if (pipelineTestingMode) { if (pipelineTestingMode) {
console.log(chalk.yellow(`Using pipeline testing prompt: ${promptPath}`)); logger.info(`Using pipeline testing prompt: ${promptPath}`);
} }
// Check if file exists first
if (!await fs.pathExists(promptPath)) { if (!await fs.pathExists(promptPath)) {
throw new PentestError( throw new PentestError(
`Prompt file not found: ${promptPath}`, `Prompt file not found: ${promptPath}`,
@@ -235,26 +231,26 @@ export async function loadPrompt(
); );
} }
// Add MCP server assignment to variables // 2. Assign MCP server based on agent name
const enhancedVariables: PromptVariables = { ...variables }; const enhancedVariables: PromptVariables = { ...variables };
// Assign MCP server based on prompt name (agent name)
const mcpServer = MCP_AGENT_MAPPING[promptName as keyof typeof MCP_AGENT_MAPPING]; const mcpServer = MCP_AGENT_MAPPING[promptName as keyof typeof MCP_AGENT_MAPPING];
if (mcpServer) { if (mcpServer) {
enhancedVariables.MCP_SERVER = mcpServer; enhancedVariables.MCP_SERVER = mcpServer;
console.log(chalk.gray(` 🎭 Assigned ${promptName} ${enhancedVariables.MCP_SERVER}`)); logger.info(`Assigned ${promptName} -> ${enhancedVariables.MCP_SERVER}`);
} else { } else {
// Fallback for unknown agents
enhancedVariables.MCP_SERVER = 'playwright-agent1'; enhancedVariables.MCP_SERVER = 'playwright-agent1';
console.log(chalk.yellow(` 🎭 Unknown agent ${promptName}, using fallback ${enhancedVariables.MCP_SERVER}`)); logger.warn(`Unknown agent ${promptName}, using fallback -> ${enhancedVariables.MCP_SERVER}`);
} }
// 3. Read template file
let template = await fs.readFile(promptPath, 'utf8'); let template = await fs.readFile(promptPath, 'utf8');
// Pre-process the template to handle @include directives // 4. Process @include directives
template = await processIncludes(template, promptsDir); template = await processIncludes(template, promptsDir);
return await interpolateVariables(template, enhancedVariables, config); // 5. Interpolate variables and return final prompt
return await interpolateVariables(template, enhancedVariables, config, logger);
} catch (error) { } catch (error) {
if (error instanceof PentestError) { if (error instanceof PentestError) {
throw error; throw error;
@@ -6,9 +6,12 @@
import { fs, path } from 'zx'; import { fs, path } from 'zx';
import { PentestError } from './error-handling.js'; import { PentestError } from './error-handling.js';
import { asyncPipe } from './utils/functional.js'; import { ErrorCode } from '../types/errors.js';
import { type Result, ok, err } from '../types/result.js';
import { asyncPipe } from '../utils/functional.js';
import type { VulnType, ExploitationDecision } from '../types/agents.js';
export type VulnType = 'injection' | 'xss' | 'auth' | 'ssrf' | 'authz'; export type { VulnType, ExploitationDecision } from '../types/agents.js';
interface VulnTypeConfigItem { interface VulnTypeConfigItem {
deliverable: string; deliverable: string;
@@ -60,18 +63,11 @@ interface QueueValidationResult {
error: string | null; error: string | null;
} }
export interface ExploitationDecision {
shouldExploit: boolean;
shouldRetry: boolean;
vulnerabilityCount: number;
vulnType: VulnType;
}
export interface SafeValidationResult { /**
success: boolean; * Result type for safe validation - explicit error handling.
data?: ExploitationDecision; */
error?: PentestError; export type SafeValidationResult = Result<ExploitationDecision, PentestError>;
}
// Vulnerability type configuration as immutable data // Vulnerability type configuration as immutable data
const VULN_TYPE_CONFIG: VulnTypeConfig = Object.freeze({ const VULN_TYPE_CONFIG: VulnTypeConfig = Object.freeze({
@@ -196,7 +192,8 @@ const validateExistenceRules = (
deliverablePath: pathsWithExistence.deliverable, deliverablePath: pathsWithExistence.deliverable,
queuePath: pathsWithExistence.queue, queuePath: pathsWithExistence.queue,
existence, existence,
} },
ErrorCode.DELIVERABLE_NOT_FOUND
), ),
}; };
} }
@@ -311,15 +308,18 @@ export async function validateQueueAndDeliverable(
); );
} }
// Pure function to safely validate (returns result instead of throwing) /**
export const safeValidateQueueAndDeliverable = async ( * Safely validate queue and deliverable files.
* Returns Result<ExploitationDecision, PentestError> for explicit error handling.
*/
export async function validateQueueSafe(
vulnType: VulnType, vulnType: VulnType,
sourceDir: string sourceDir: string
): Promise<SafeValidationResult> => { ): Promise<SafeValidationResult> {
try { try {
const result = await validateQueueAndDeliverable(vulnType, sourceDir); const result = await validateQueueAndDeliverable(vulnType, sourceDir);
return { success: true, data: result }; return ok(result);
} catch (error) { } catch (error) {
return { success: false, error: error as PentestError }; return err(error as PentestError);
} }
}; }
@@ -5,8 +5,9 @@
// as published by the Free Software Foundation. // as published by the Free Software Foundation.
import { fs, path } from 'zx'; import { fs, path } from 'zx';
import chalk from 'chalk'; import { PentestError } from './error-handling.js';
import { PentestError } from '../error-handling.js'; import { ErrorCode } from '../types/errors.js';
import type { ActivityLogger } from '../types/activity-logger.js';
interface DeliverableFile { interface DeliverableFile {
name: string; name: string;
@@ -15,7 +16,7 @@ interface DeliverableFile {
} }
// Pure function: Assemble final report from specialist deliverables // Pure function: Assemble final report from specialist deliverables
export async function assembleFinalReport(sourceDir: string): Promise<string> { export async function assembleFinalReport(sourceDir: string, logger: ActivityLogger): Promise<string> {
const deliverableFiles: DeliverableFile[] = [ const deliverableFiles: DeliverableFile[] = [
{ name: 'Injection', path: 'injection_exploitation_evidence.md', required: false }, { name: 'Injection', path: 'injection_exploitation_evidence.md', required: false },
{ name: 'XSS', path: 'xss_exploitation_evidence.md', required: false }, { name: 'XSS', path: 'xss_exploitation_evidence.md', required: false },
@@ -32,18 +33,24 @@ export async function assembleFinalReport(sourceDir: string): Promise<string> {
if (await fs.pathExists(filePath)) { if (await fs.pathExists(filePath)) {
const content = await fs.readFile(filePath, 'utf8'); const content = await fs.readFile(filePath, 'utf8');
sections.push(content); sections.push(content);
console.log(chalk.green(`Added ${file.name} findings`)); logger.info(`Added ${file.name} findings`);
} else if (file.required) { } else if (file.required) {
throw new Error(`Required file ${file.path} not found`); throw new PentestError(
`Required deliverable file not found: ${file.path}`,
'filesystem',
false,
{ deliverableFile: file.path, sourceDir },
ErrorCode.DELIVERABLE_NOT_FOUND
);
} else { } else {
console.log(chalk.gray(`⏭️ No ${file.name} deliverable found`)); logger.info(`No ${file.name} deliverable found`);
} }
} catch (error) { } catch (error) {
if (file.required) { if (file.required) {
throw error; throw error;
} }
const err = error as Error; const err = error as Error;
console.log(chalk.yellow(`⚠️ Could not read ${file.path}: ${err.message}`)); logger.warn(`Could not read ${file.path}: ${err.message}`);
} }
} }
@@ -55,7 +62,7 @@ export async function assembleFinalReport(sourceDir: string): Promise<string> {
// Ensure deliverables directory exists // Ensure deliverables directory exists
await fs.ensureDir(deliverablesDir); await fs.ensureDir(deliverablesDir);
await fs.writeFile(finalReportPath, finalContent); await fs.writeFile(finalReportPath, finalContent);
console.log(chalk.green(`Final report assembled at ${finalReportPath}`)); logger.info(`Final report assembled at ${finalReportPath}`);
} catch (error) { } catch (error) {
const err = error as Error; const err = error as Error;
throw new PentestError( throw new PentestError(
@@ -76,13 +83,14 @@ export async function assembleFinalReport(sourceDir: string): Promise<string> {
*/ */
export async function injectModelIntoReport( export async function injectModelIntoReport(
repoPath: string, repoPath: string,
outputPath: string outputPath: string,
logger: ActivityLogger
): Promise<void> { ): Promise<void> {
// 1. Read session.json to get model information // 1. Read session.json to get model information
const sessionJsonPath = path.join(outputPath, 'session.json'); const sessionJsonPath = path.join(outputPath, 'session.json');
if (!(await fs.pathExists(sessionJsonPath))) { if (!(await fs.pathExists(sessionJsonPath))) {
console.log(chalk.yellow('⚠️ session.json not found, skipping model injection')); logger.warn('session.json not found, skipping model injection');
return; return;
} }
@@ -103,18 +111,18 @@ export async function injectModelIntoReport(
} }
if (models.size === 0) { if (models.size === 0) {
console.log(chalk.yellow('⚠️ No model information found in session.json')); logger.warn('No model information found in session.json');
return; return;
} }
const modelStr = Array.from(models).join(', '); const modelStr = Array.from(models).join(', ');
console.log(chalk.blue(`📝 Injecting model info into report: ${modelStr}`)); logger.info(`Injecting model info into report: ${modelStr}`);
// 3. Read the final report // 3. Read the final report
const reportPath = path.join(repoPath, 'deliverables', 'comprehensive_security_assessment_report.md'); const reportPath = path.join(repoPath, 'deliverables', 'comprehensive_security_assessment_report.md');
if (!(await fs.pathExists(reportPath))) { if (!(await fs.pathExists(reportPath))) {
console.log(chalk.yellow('⚠️ Final report not found, skipping model injection')); logger.warn('Final report not found, skipping model injection');
return; return;
} }
@@ -132,7 +140,7 @@ export async function injectModelIntoReport(
assessmentDatePattern, assessmentDatePattern,
`$1\n${modelLine}` `$1\n${modelLine}`
); );
console.log(chalk.green('Model info injected into Executive Summary')); logger.info('Model info injected into Executive Summary');
} else { } else {
// If no Assessment Date line found, try to add after Executive Summary header // If no Assessment Date line found, try to add after Executive Summary header
const execSummaryPattern = /^## Executive Summary$/m; const execSummaryPattern = /^## Executive Summary$/m;
@@ -142,9 +150,9 @@ export async function injectModelIntoReport(
execSummaryPattern, execSummaryPattern,
`## Executive Summary\n- Model: ${modelStr}` `## Executive Summary\n- Model: ${modelStr}`
); );
console.log(chalk.green('Model info added to Executive Summary header')); logger.info('Model info added to Executive Summary header');
} else { } else {
console.log(chalk.yellow('⚠️ Could not find Executive Summary section')); logger.warn('Could not find Executive Summary section');
return; return;
} }
} }
+142 -46
View File
@@ -4,106 +4,105 @@
// it under the terms of the GNU Affero General Public License version 3 // it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation. // as published by the Free Software Foundation.
import { path } from 'zx'; import { path, fs } from 'zx';
import type { AgentName } from './types/index.js'; import { validateQueueAndDeliverable } from './services/queue-validation.js';
import type { AgentName, AgentDefinition, PlaywrightAgent, AgentValidator, VulnType } from './types/index.js';
// Agent definition interface import type { ActivityLogger } from './types/activity-logger.js';
export interface AgentDefinition {
name: AgentName;
displayName: string;
prerequisites: AgentName[];
}
// Agent definitions according to PRD // Agent definitions according to PRD
// NOTE: deliverableFilename values must match mcp-server/src/types/deliverables.ts:DELIVERABLE_FILENAMES
export const AGENTS: Readonly<Record<AgentName, AgentDefinition>> = Object.freeze({ export const AGENTS: Readonly<Record<AgentName, AgentDefinition>> = Object.freeze({
'pre-recon': { 'pre-recon': {
name: 'pre-recon', name: 'pre-recon',
displayName: 'Pre-recon agent', displayName: 'Pre-recon agent',
prerequisites: [] prerequisites: [],
promptTemplate: 'pre-recon-code',
deliverableFilename: 'code_analysis_deliverable.md',
}, },
'recon': { 'recon': {
name: 'recon', name: 'recon',
displayName: 'Recon agent', displayName: 'Recon agent',
prerequisites: ['pre-recon'] prerequisites: ['pre-recon'],
promptTemplate: 'recon',
deliverableFilename: 'recon_deliverable.md',
}, },
'injection-vuln': { 'injection-vuln': {
name: 'injection-vuln', name: 'injection-vuln',
displayName: 'Injection vuln agent', displayName: 'Injection vuln agent',
prerequisites: ['recon'] prerequisites: ['recon'],
promptTemplate: 'vuln-injection',
deliverableFilename: 'injection_analysis_deliverable.md',
}, },
'xss-vuln': { 'xss-vuln': {
name: 'xss-vuln', name: 'xss-vuln',
displayName: 'XSS vuln agent', displayName: 'XSS vuln agent',
prerequisites: ['recon'] prerequisites: ['recon'],
promptTemplate: 'vuln-xss',
deliverableFilename: 'xss_analysis_deliverable.md',
}, },
'auth-vuln': { 'auth-vuln': {
name: 'auth-vuln', name: 'auth-vuln',
displayName: 'Auth vuln agent', displayName: 'Auth vuln agent',
prerequisites: ['recon'] prerequisites: ['recon'],
promptTemplate: 'vuln-auth',
deliverableFilename: 'auth_analysis_deliverable.md',
}, },
'ssrf-vuln': { 'ssrf-vuln': {
name: 'ssrf-vuln', name: 'ssrf-vuln',
displayName: 'SSRF vuln agent', displayName: 'SSRF vuln agent',
prerequisites: ['recon'] prerequisites: ['recon'],
promptTemplate: 'vuln-ssrf',
deliverableFilename: 'ssrf_analysis_deliverable.md',
}, },
'authz-vuln': { 'authz-vuln': {
name: 'authz-vuln', name: 'authz-vuln',
displayName: 'Authz vuln agent', displayName: 'Authz vuln agent',
prerequisites: ['recon'] prerequisites: ['recon'],
promptTemplate: 'vuln-authz',
deliverableFilename: 'authz_analysis_deliverable.md',
}, },
'injection-exploit': { 'injection-exploit': {
name: 'injection-exploit', name: 'injection-exploit',
displayName: 'Injection exploit agent', displayName: 'Injection exploit agent',
prerequisites: ['injection-vuln'] prerequisites: ['injection-vuln'],
promptTemplate: 'exploit-injection',
deliverableFilename: 'injection_exploitation_evidence.md',
}, },
'xss-exploit': { 'xss-exploit': {
name: 'xss-exploit', name: 'xss-exploit',
displayName: 'XSS exploit agent', displayName: 'XSS exploit agent',
prerequisites: ['xss-vuln'] prerequisites: ['xss-vuln'],
promptTemplate: 'exploit-xss',
deliverableFilename: 'xss_exploitation_evidence.md',
}, },
'auth-exploit': { 'auth-exploit': {
name: 'auth-exploit', name: 'auth-exploit',
displayName: 'Auth exploit agent', displayName: 'Auth exploit agent',
prerequisites: ['auth-vuln'] prerequisites: ['auth-vuln'],
promptTemplate: 'exploit-auth',
deliverableFilename: 'auth_exploitation_evidence.md',
}, },
'ssrf-exploit': { 'ssrf-exploit': {
name: 'ssrf-exploit', name: 'ssrf-exploit',
displayName: 'SSRF exploit agent', displayName: 'SSRF exploit agent',
prerequisites: ['ssrf-vuln'] prerequisites: ['ssrf-vuln'],
promptTemplate: 'exploit-ssrf',
deliverableFilename: 'ssrf_exploitation_evidence.md',
}, },
'authz-exploit': { 'authz-exploit': {
name: 'authz-exploit', name: 'authz-exploit',
displayName: 'Authz exploit agent', displayName: 'Authz exploit agent',
prerequisites: ['authz-vuln'] prerequisites: ['authz-vuln'],
promptTemplate: 'exploit-authz',
deliverableFilename: 'authz_exploitation_evidence.md',
}, },
'report': { 'report': {
name: 'report', name: 'report',
displayName: 'Report agent', displayName: 'Report agent',
prerequisites: ['injection-exploit', 'xss-exploit', 'auth-exploit', 'ssrf-exploit', 'authz-exploit'] prerequisites: ['injection-exploit', 'xss-exploit', 'auth-exploit', 'ssrf-exploit', 'authz-exploit'],
} promptTemplate: 'report-executive',
}); deliverableFilename: 'comprehensive_security_assessment_report.md',
},
// Agent execution order
export const AGENT_ORDER: readonly AgentName[] = Object.freeze([
'pre-recon',
'recon',
'injection-vuln',
'xss-vuln',
'auth-vuln',
'ssrf-vuln',
'authz-vuln',
'injection-exploit',
'xss-exploit',
'auth-exploit',
'ssrf-exploit',
'authz-exploit',
'report'
] as const);
// Parallel execution groups
export const getParallelGroups = (): Readonly<{ vuln: AgentName[]; exploit: AgentName[] }> => Object.freeze({
vuln: ['injection-vuln', 'xss-vuln', 'auth-vuln', 'ssrf-vuln', 'authz-vuln'],
exploit: ['injection-exploit', 'xss-exploit', 'auth-exploit', 'ssrf-exploit', 'authz-exploit']
}); });
// Phase names for metrics aggregation // Phase names for metrics aggregation
@@ -126,4 +125,101 @@ export const AGENT_PHASE_MAP: Readonly<Record<AgentName, PhaseName>> = Object.fr
'report': 'reporting', 'report': 'reporting',
}); });
// Factory function for vulnerability queue validators
function createVulnValidator(vulnType: VulnType): AgentValidator {
return async (sourceDir: string, logger: ActivityLogger): Promise<boolean> => {
try {
await validateQueueAndDeliverable(vulnType, sourceDir);
return true;
} catch (error) {
const errMsg = error instanceof Error ? error.message : String(error);
logger.warn(`Queue validation failed for ${vulnType}: ${errMsg}`);
return false;
}
};
}
// Factory function for exploit deliverable validators
function createExploitValidator(vulnType: VulnType): AgentValidator {
return async (sourceDir: string): Promise<boolean> => {
const evidenceFile = path.join(sourceDir, 'deliverables', `${vulnType}_exploitation_evidence.md`);
return await fs.pathExists(evidenceFile);
};
}
// MCP agent mapping - assigns each agent to a specific Playwright instance to prevent conflicts
// Keys are promptTemplate values from AGENTS registry
export const MCP_AGENT_MAPPING: Record<string, PlaywrightAgent> = Object.freeze({
// Phase 1: Pre-reconnaissance (actual prompt name is 'pre-recon-code')
// NOTE: Pre-recon is pure code analysis and doesn't use browser automation,
// but assigning MCP server anyway for consistency and future extensibility
'pre-recon-code': 'playwright-agent1',
// Phase 2: Reconnaissance (actual prompt name is 'recon')
recon: 'playwright-agent2',
// Phase 3: Vulnerability Analysis (5 parallel agents)
'vuln-injection': 'playwright-agent1',
'vuln-xss': 'playwright-agent2',
'vuln-auth': 'playwright-agent3',
'vuln-ssrf': 'playwright-agent4',
'vuln-authz': 'playwright-agent5',
// Phase 4: Exploitation (5 parallel agents - same as vuln counterparts)
'exploit-injection': 'playwright-agent1',
'exploit-xss': 'playwright-agent2',
'exploit-auth': 'playwright-agent3',
'exploit-ssrf': 'playwright-agent4',
'exploit-authz': 'playwright-agent5',
// Phase 5: Reporting (actual prompt name is 'report-executive')
// NOTE: Report generation is typically text-based and doesn't use browser automation,
// but assigning MCP server anyway for potential screenshot inclusion or future needs
'report-executive': 'playwright-agent3',
});
// Direct agent-to-validator mapping - much simpler than pattern matching
export const AGENT_VALIDATORS: Record<AgentName, AgentValidator> = Object.freeze({
// Pre-reconnaissance agent - validates the code analysis deliverable created by the agent
'pre-recon': async (sourceDir: string): Promise<boolean> => {
const codeAnalysisFile = path.join(sourceDir, 'deliverables', 'code_analysis_deliverable.md');
return await fs.pathExists(codeAnalysisFile);
},
// Reconnaissance agent
recon: async (sourceDir: string): Promise<boolean> => {
const reconFile = path.join(sourceDir, 'deliverables', 'recon_deliverable.md');
return await fs.pathExists(reconFile);
},
// Vulnerability analysis agents
'injection-vuln': createVulnValidator('injection'),
'xss-vuln': createVulnValidator('xss'),
'auth-vuln': createVulnValidator('auth'),
'ssrf-vuln': createVulnValidator('ssrf'),
'authz-vuln': createVulnValidator('authz'),
// Exploitation agents
'injection-exploit': createExploitValidator('injection'),
'xss-exploit': createExploitValidator('xss'),
'auth-exploit': createExploitValidator('auth'),
'ssrf-exploit': createExploitValidator('ssrf'),
'authz-exploit': createExploitValidator('authz'),
// Executive report agent
report: async (sourceDir: string, logger: ActivityLogger): Promise<boolean> => {
const reportFile = path.join(
sourceDir,
'deliverables',
'comprehensive_security_assessment_report.md'
);
const reportExists = await fs.pathExists(reportFile);
if (!reportExists) {
logger.error('Missing required deliverable: comprehensive_security_assessment_report.md');
}
return reportExists;
},
});
-56
View File
@@ -1,56 +0,0 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
import { $, fs, path } from 'zx';
import chalk from 'chalk';
import { PentestError } from '../error-handling.js';
// Pure function: Setup local repository for testing
export async function setupLocalRepo(repoPath: string): Promise<string> {
try {
const sourceDir = path.resolve(repoPath);
// MCP servers are now configured via mcpServers option in claude-executor.js
// No need for pre-setup with claude CLI
// Initialize git repository if not already initialized and create checkpoint
try {
// Check if it's already a git repository
const isGitRepo = await fs.pathExists(path.join(sourceDir, '.git'));
if (!isGitRepo) {
await $`cd ${sourceDir} && git init`;
console.log(chalk.blue('✅ Git repository initialized'));
}
// Configure git for pentest agent
await $`cd ${sourceDir} && git config user.name "Pentest Agent"`;
await $`cd ${sourceDir} && git config user.email "agent@localhost"`;
// Create initial checkpoint
await $`cd ${sourceDir} && git add -A && git commit -m "Initial checkpoint: Local repository setup" --allow-empty`;
console.log(chalk.green('✅ Initial checkpoint created'));
} catch (gitError) {
const errMsg = gitError instanceof Error ? gitError.message : String(gitError);
console.log(chalk.yellow(`⚠️ Git setup warning: ${errMsg}`));
// Non-fatal - continue without Git setup
}
// MCP tools (save_deliverable, generate_totp) are now available natively via shannon-helper MCP server
// No need to copy bash scripts to target repository
return sourceDir;
} catch (error) {
if (error instanceof PentestError) {
throw error;
}
const errMsg = error instanceof Error ? error.message : String(error);
throw new PentestError(`Local repository setup failed: ${errMsg}`, 'filesystem', false, {
repoPath,
originalError: errMsg,
});
}
}
+182 -347
View File
@@ -7,28 +7,58 @@
/** /**
* Temporal activities for Shannon agent execution. * Temporal activities for Shannon agent execution.
* *
* Each activity wraps a single agent execution with: * Each activity wraps service calls with Temporal-specific concerns:
* - Heartbeat loop (2s interval) to signal worker liveness * - Heartbeat loop (2s interval) to signal worker liveness
* - Git checkpoint/rollback/commit per attempt * - Error classification into ApplicationFailure
* - Error classification for Temporal retry behavior * - Container lifecycle management
* - Audit session logging
* *
* Temporal handles retries based on error classification: * Business logic is delegated to services in src/services/.
* - Retryable: BillingError, TransientError (429, 5xx, network)
* - Non-retryable: AuthenticationError, PermissionError, ConfigurationError, etc.
*/ */
import { heartbeat, ApplicationFailure, Context } from '@temporalio/activity'; import { heartbeat, ApplicationFailure, Context } from '@temporalio/activity';
import chalk from 'chalk'; import path from 'path';
import fs from 'fs/promises';
import { classifyErrorForTemporal, PentestError } from '../services/error-handling.js';
import { ErrorCode } from '../types/errors.js';
import { getOrCreateContainer, getContainer, removeContainer } from '../services/container.js';
import { ExploitationCheckerService } from '../services/exploitation-checker.js';
import type { VulnType, ExploitationDecision } from '../services/queue-validation.js';
import { AuditSession } from '../audit/index.js';
import type { WorkflowSummary } from '../audit/workflow-logger.js';
import type { AgentName } from '../types/agents.js';
import { ALL_AGENTS } from '../types/agents.js';
import type { AgentMetrics, ResumeState } from './shared.js';
import { copyDeliverablesToAudit, type SessionMetadata } from '../audit/utils.js';
import { readJson, fileExists } from '../utils/file-io.js';
import { assembleFinalReport, injectModelIntoReport } from '../services/reporting.js';
import { AGENTS } from '../session-manager.js';
import { executeGitCommandWithRetry } from '../services/git-manager.js';
import type { ResumeAttempt } from '../audit/metrics-tracker.js';
import { createActivityLogger } from './activity-logger.js';
// Max lengths to prevent Temporal protobuf buffer overflow // Max lengths to prevent Temporal protobuf buffer overflow
const MAX_ERROR_MESSAGE_LENGTH = 2000; const MAX_ERROR_MESSAGE_LENGTH = 2000;
const MAX_STACK_TRACE_LENGTH = 1000; const MAX_STACK_TRACE_LENGTH = 1000;
// Max retries for output validation errors (agent didn't save deliverables) // Max retries for output validation errors (agent didn't save deliverables)
// Lower than default 50 since this is unlikely to self-heal
const MAX_OUTPUT_VALIDATION_RETRIES = 3; const MAX_OUTPUT_VALIDATION_RETRIES = 3;
const HEARTBEAT_INTERVAL_MS = 2000;
/**
* Input for all agent activities.
*/
export interface ActivityInput {
webUrl: string;
repoPath: string;
configPath?: string;
outputPath?: string;
pipelineTestingMode?: boolean;
workflowId: string;
sessionId: string;
}
/** /**
* Truncate error message to prevent buffer overflow in Temporal serialization. * Truncate error message to prevent buffer overflow in Temporal serialization.
*/ */
@@ -48,85 +78,34 @@ function truncateStackTrace(failure: ApplicationFailure): void {
} }
} }
import {
runClaudePrompt,
validateAgentOutput,
type ClaudePromptResult,
} from '../ai/claude-executor.js';
import { loadPrompt } from '../prompts/prompt-manager.js';
import { parseConfig, distributeConfig } from '../config-parser.js';
import { classifyErrorForTemporal } from '../error-handling.js';
import {
safeValidateQueueAndDeliverable,
type VulnType,
type ExploitationDecision,
} from '../queue-validation.js';
import {
createGitCheckpoint,
commitGitSuccess,
rollbackGitWorkspace,
getGitCommitHash,
} from '../utils/git-manager.js';
import { assembleFinalReport, injectModelIntoReport } from '../phases/reporting.js';
import { getPromptNameForAgent } from '../types/agents.js';
import { AuditSession } from '../audit/index.js';
import type { WorkflowSummary } from '../audit/workflow-logger.js';
import type { AgentName } from '../types/agents.js';
import { getDeliverablePath, ALL_AGENTS } from '../types/agents.js';
import type { AgentMetrics, ResumeState } from './shared.js';
import type { DistributedConfig } from '../types/config.js';
import { copyDeliverablesToAudit, type SessionMetadata, readJson, fileExists } from '../audit/utils.js';
import type { ResumeAttempt } from '../audit/metrics-tracker.js';
import { executeGitCommandWithRetry } from '../utils/git-manager.js';
import path from 'path';
import fs from 'fs/promises';
const HEARTBEAT_INTERVAL_MS = 2000; // Must be < heartbeatTimeout (10min production, 5min testing)
/** /**
* Input for all agent activities. * Build SessionMetadata from ActivityInput.
* Matches PipelineInput but with required workflowId for audit correlation.
*/ */
export interface ActivityInput { function buildSessionMetadata(input: ActivityInput): SessionMetadata {
webUrl: string; const { webUrl, repoPath, outputPath, sessionId } = input;
repoPath: string; return {
configPath?: string; id: sessionId,
outputPath?: string; webUrl,
pipelineTestingMode?: boolean; repoPath,
workflowId: string; ...(outputPath && { outputPath }),
sessionId: string; // Workspace name (for resume) or workflowId (for new runs) };
} }
/** /**
* Core activity implementation. * Core activity implementation using services.
* *
* Executes a single agent with: * Executes a single agent with:
* 1. Heartbeat loop for worker liveness * 1. Heartbeat loop for worker liveness
* 2. Config loading (if configPath provided) * 2. Container creation/reuse
* 3. Audit session initialization * 3. Service-based agent execution
* 4. Prompt loading * 4. Error classification for Temporal retry
* 5. Git checkpoint before execution
* 6. Agent execution (single attempt)
* 7. Output validation
* 8. Git commit on success, rollback on failure
* 9. Error classification for Temporal retry
*/ */
async function runAgentActivity( async function runAgentActivity(
agentName: AgentName, agentName: AgentName,
input: ActivityInput input: ActivityInput
): Promise<AgentMetrics> { ): Promise<AgentMetrics> {
const { const { repoPath, configPath, pipelineTestingMode = false, workflowId, webUrl } = input;
webUrl,
repoPath,
configPath,
outputPath,
pipelineTestingMode = false,
workflowId,
} = input;
const startTime = Date.now(); const startTime = Date.now();
// Get attempt number from Temporal context (tracks retries automatically)
const attemptNumber = Context.current().info.attempt; const attemptNumber = Context.current().info.attempt;
// Heartbeat loop - signals worker is alive to Temporal server // Heartbeat loop - signals worker is alive to Temporal server
@@ -136,160 +115,66 @@ async function runAgentActivity(
}, HEARTBEAT_INTERVAL_MS); }, HEARTBEAT_INTERVAL_MS);
try { try {
// 1. Load config (if provided) const logger = createActivityLogger();
let distributedConfig: DistributedConfig | null = null;
if (configPath) {
try {
const config = await parseConfig(configPath);
distributedConfig = distributeConfig(config);
} catch (err) {
throw new Error(`Failed to load config ${configPath}: ${err instanceof Error ? err.message : String(err)}`);
}
}
// 2. Build session metadata for audit // 1. Build session metadata and get/create container
// Use sessionId (workspace name) for directory, workflowId for tracking const sessionMetadata = buildSessionMetadata(input);
const sessionMetadata: SessionMetadata = { const container = getOrCreateContainer(workflowId, sessionMetadata);
id: input.sessionId,
webUrl,
repoPath,
...(outputPath && { outputPath }),
};
// 3. Initialize audit session (idempotent, safe across retries) // 2. Create audit session for THIS agent execution
// NOTE: Each agent needs its own AuditSession because AuditSession uses
// instance state (currentAgentName) that cannot be shared across parallel agents
const auditSession = new AuditSession(sessionMetadata); const auditSession = new AuditSession(sessionMetadata);
await auditSession.initialize(workflowId); await auditSession.initialize(workflowId);
// 4. Load prompt // 3. Execute agent via service (throws PentestError on failure)
const promptName = getPromptNameForAgent(agentName); const endResult = await container.agentExecution.executeOrThrow(
const prompt = await loadPrompt(
promptName,
{ webUrl, repoPath },
distributedConfig,
pipelineTestingMode
);
// 5. Create git checkpoint before execution
await createGitCheckpoint(repoPath, agentName, attemptNumber);
await auditSession.startAgent(agentName, prompt, attemptNumber);
// 6. Execute agent (single attempt - Temporal handles retries)
const result: ClaudePromptResult = await runClaudePrompt(
prompt,
repoPath,
'', // context
agentName, // description
agentName, agentName,
chalk.cyan, {
sessionMetadata, webUrl,
repoPath,
configPath,
pipelineTestingMode,
attemptNumber,
},
auditSession, auditSession,
attemptNumber logger
); );
// 6.5. Sanity check: Detect spending cap that slipped through all detection layers // 4. Return metrics
// Defense-in-depth: A successful agent execution should never have ≤2 turns with $0 cost
if (result.success && (result.turns ?? 0) <= 2 && (result.cost || 0) === 0) {
const resultText = result.result || '';
const looksLikeBillingError = /spending|cap|limit|budget|resets/i.test(resultText);
if (looksLikeBillingError) {
await rollbackGitWorkspace(repoPath, 'spending cap detected');
await auditSession.endAgent(agentName, {
attemptNumber,
duration_ms: result.duration,
cost_usd: 0,
success: false,
model: result.model,
error: `Spending cap likely reached: ${resultText.slice(0, 100)}`,
});
// Throw as billing error so Temporal retries with long backoff
throw new Error(`Spending cap likely reached: ${resultText.slice(0, 100)}`);
}
}
// 7. Handle execution failure
if (!result.success) {
await rollbackGitWorkspace(repoPath, 'execution failure');
await auditSession.endAgent(agentName, {
attemptNumber,
duration_ms: result.duration,
cost_usd: result.cost || 0,
success: false,
model: result.model,
error: result.error || 'Execution failed',
});
throw new Error(result.error || 'Agent execution failed');
}
// 8. Validate output
const validationPassed = await validateAgentOutput(result, agentName, repoPath);
if (!validationPassed) {
await rollbackGitWorkspace(repoPath, 'validation failure');
await auditSession.endAgent(agentName, {
attemptNumber,
duration_ms: result.duration,
cost_usd: result.cost || 0,
success: false,
model: result.model,
error: 'Output validation failed',
});
// Limit output validation retries (unlikely to self-heal)
if (attemptNumber >= MAX_OUTPUT_VALIDATION_RETRIES) {
throw ApplicationFailure.nonRetryable(
`Agent ${agentName} failed output validation after ${attemptNumber} attempts`,
'OutputValidationError',
[{ agentName, attemptNumber, elapsed: Date.now() - startTime }]
);
}
// Let Temporal retry (will be classified as OutputValidationError)
throw new Error(`Agent ${agentName} failed output validation`);
}
// 9. Success - commit deliverables, then capture checkpoint hash
await commitGitSuccess(repoPath, agentName);
const commitHash = await getGitCommitHash(repoPath);
await auditSession.endAgent(agentName, {
attemptNumber,
duration_ms: result.duration,
cost_usd: result.cost || 0,
success: true,
model: result.model,
...(commitHash && { checkpoint: commitHash }),
});
// 10. Return metrics
return { return {
durationMs: Date.now() - startTime, durationMs: Date.now() - startTime,
inputTokens: null, // Not currently exposed by SDK wrapper inputTokens: null,
outputTokens: null, outputTokens: null,
costUsd: result.cost ?? null, costUsd: endResult.cost_usd,
numTurns: result.turns ?? null, numTurns: null,
model: result.model, model: endResult.model,
}; };
} catch (error) { } catch (error) {
// Rollback git workspace before Temporal retry to ensure clean state // If error is already an ApplicationFailure, re-throw directly
try {
await rollbackGitWorkspace(repoPath, 'error recovery');
} catch (rollbackErr) {
// Log but don't fail - rollback is best-effort
console.error(`Failed to rollback git workspace for ${agentName}:`, rollbackErr);
}
// If error is already an ApplicationFailure (e.g., from our retry limit logic),
// re-throw it directly without re-classifying
if (error instanceof ApplicationFailure) { if (error instanceof ApplicationFailure) {
throw error; throw error;
} }
// Check if output validation retry limit reached (PentestError with code)
if (
error instanceof PentestError &&
error.code === ErrorCode.OUTPUT_VALIDATION_FAILED &&
attemptNumber >= MAX_OUTPUT_VALIDATION_RETRIES
) {
throw ApplicationFailure.nonRetryable(
`Agent ${agentName} failed output validation after ${attemptNumber} attempts`,
'OutputValidationError',
[{ agentName, attemptNumber, elapsed: Date.now() - startTime }]
);
}
// Classify error for Temporal retry behavior // Classify error for Temporal retry behavior
const classified = classifyErrorForTemporal(error); const classified = classifyErrorForTemporal(error);
// Truncate message to prevent protobuf buffer overflow
const rawMessage = error instanceof Error ? error.message : String(error); const rawMessage = error instanceof Error ? error.message : String(error);
const message = truncateErrorMessage(rawMessage); const message = truncateErrorMessage(rawMessage);
if (classified.retryable) { if (classified.retryable) {
// Temporal will retry with configured backoff
const failure = ApplicationFailure.create({ const failure = ApplicationFailure.create({
message, message,
type: classified.type, type: classified.type,
@@ -298,7 +183,6 @@ async function runAgentActivity(
truncateStackTrace(failure); truncateStackTrace(failure);
throw failure; throw failure;
} else { } else {
// Fail immediately - no retry
const failure = ApplicationFailure.nonRetryable(message, classified.type, [ const failure = ApplicationFailure.nonRetryable(message, classified.type, [
{ agentName, attemptNumber, elapsed: Date.now() - startTime }, { agentName, attemptNumber, elapsed: Date.now() - startTime },
]); ]);
@@ -310,9 +194,6 @@ async function runAgentActivity(
} }
} }
// === Individual Agent Activity Exports ===
// Each function is a thin wrapper around runAgentActivity with the agent name.
export async function runPreReconAgent(input: ActivityInput): Promise<AgentMetrics> { export async function runPreReconAgent(input: ActivityInput): Promise<AgentMetrics> {
return runAgentActivity('pre-recon', input); return runAgentActivity('pre-recon', input);
} }
@@ -367,92 +248,56 @@ export async function runReportAgent(input: ActivityInput): Promise<AgentMetrics
/** /**
* Assemble the final report by concatenating exploitation evidence files. * Assemble the final report by concatenating exploitation evidence files.
* This must be called BEFORE runReportAgent to create the file that the report agent will modify.
*/ */
export async function assembleReportActivity(input: ActivityInput): Promise<void> { export async function assembleReportActivity(input: ActivityInput): Promise<void> {
const { repoPath } = input; const { repoPath } = input;
console.log(chalk.blue('📝 Assembling deliverables from specialist agents...')); const logger = createActivityLogger();
logger.info('Assembling deliverables from specialist agents...');
try { try {
await assembleFinalReport(repoPath); await assembleFinalReport(repoPath, logger);
} catch (error) { } catch (error) {
const err = error as Error; const err = error as Error;
console.log(chalk.yellow(`⚠️ Error assembling final report: ${err.message}`)); logger.warn(`Error assembling final report: ${err.message}`);
// Don't throw - the report agent can still create content even if no exploitation files exist
} }
} }
/** /**
* Inject model metadata into the final report. * Inject model metadata into the final report.
* This must be called AFTER runReportAgent to add the model information to the Executive Summary.
*/ */
export async function injectReportMetadataActivity(input: ActivityInput): Promise<void> { export async function injectReportMetadataActivity(input: ActivityInput): Promise<void> {
const { repoPath, sessionId, outputPath } = input; const { repoPath, sessionId, outputPath } = input;
const logger = createActivityLogger();
const effectiveOutputPath = outputPath const effectiveOutputPath = outputPath
? path.join(outputPath, sessionId) ? path.join(outputPath, sessionId)
: path.join('./audit-logs', sessionId); : path.join('./audit-logs', sessionId);
try { try {
await injectModelIntoReport(repoPath, effectiveOutputPath); await injectModelIntoReport(repoPath, effectiveOutputPath, logger);
} catch (error) { } catch (error) {
const err = error as Error; const err = error as Error;
console.log(chalk.yellow(`⚠️ Error injecting model into report: ${err.message}`)); logger.warn(`Error injecting model into report: ${err.message}`);
// Don't throw - this is a non-critical enhancement
} }
} }
/** /**
* Check if exploitation should run for a given vulnerability type. * Check if exploitation should run for a given vulnerability type.
* Reads the vulnerability queue file and returns the decision.
* *
* This activity allows the workflow to skip exploit agents entirely * Uses existing container if available (from prior agent runs),
* when no vulnerabilities were found, saving API calls and time. * otherwise creates service directly (stateless, no dependencies).
*
* Error handling:
* - Retryable errors (missing files, invalid JSON): re-throw for Temporal retry
* - Non-retryable errors: skip exploitation gracefully
*/ */
export async function checkExploitationQueue( export async function checkExploitationQueue(
input: ActivityInput, input: ActivityInput,
vulnType: VulnType vulnType: VulnType
): Promise<ExploitationDecision> { ): Promise<ExploitationDecision> {
const { repoPath } = input; const { repoPath, workflowId } = input;
const logger = createActivityLogger();
const result = await safeValidateQueueAndDeliverable(vulnType, repoPath); // Reuse container's service if available (from prior vuln agent runs)
const existingContainer = getContainer(workflowId);
const checker = existingContainer?.exploitationChecker ?? new ExploitationCheckerService();
if (result.success && result.data) { return checker.checkQueue(vulnType, repoPath, logger);
const { shouldExploit, vulnerabilityCount } = result.data;
console.log(
chalk.blue(
`🔍 ${vulnType}: ${shouldExploit ? `${vulnerabilityCount} vulnerabilities found` : 'no vulnerabilities, skipping exploitation'}`
)
);
return result.data;
}
// Validation failed - check if we should retry or skip
const error = result.error;
if (error?.retryable) {
// Re-throw retryable errors so Temporal can retry the vuln agent
console.log(chalk.yellow(`⚠️ ${vulnType}: ${error.message} (retrying)`));
throw error;
}
// Non-retryable error - skip exploitation gracefully
console.log(
chalk.yellow(`⚠️ ${vulnType}: ${error?.message ?? 'Unknown error'}, skipping exploitation`)
);
return {
shouldExploit: false,
shouldRetry: false,
vulnerabilityCount: 0,
vulnType,
};
} }
// === Resume Activities ===
/**
* Session.json structure for resume state loading
*/
interface SessionJson { interface SessionJson {
session: { session: {
id: string; id: string;
@@ -462,27 +307,27 @@ interface SessionJson {
resumeAttempts?: ResumeAttempt[]; resumeAttempts?: ResumeAttempt[];
}; };
metrics: { metrics: {
agents: Record<string, { agents: Record<
status: 'in-progress' | 'success' | 'failed'; string,
checkpoint?: string; {
}>; status: 'in-progress' | 'success' | 'failed';
checkpoint?: string;
}
>;
}; };
} }
/** /**
* Load resume state from an existing workspace. * Load resume state from an existing workspace.
* Validates workspace exists, URL matches, and determines which agents to skip.
*
* @throws ApplicationFailure.nonRetryable if workspace not found or URL mismatch
*/ */
export async function loadResumeState( export async function loadResumeState(
workspaceName: string, workspaceName: string,
expectedUrl: string, expectedUrl: string,
expectedRepoPath: string expectedRepoPath: string
): Promise<ResumeState> { ): Promise<ResumeState> {
// 1. Validate workspace exists
const sessionPath = path.join('./audit-logs', workspaceName, 'session.json'); const sessionPath = path.join('./audit-logs', workspaceName, 'session.json');
// Validate workspace exists
const exists = await fileExists(sessionPath); const exists = await fileExists(sessionPath);
if (!exists) { if (!exists) {
throw ApplicationFailure.nonRetryable( throw ApplicationFailure.nonRetryable(
@@ -491,7 +336,7 @@ export async function loadResumeState(
); );
} }
// Load session.json // 2. Parse session.json and validate URL match
let session: SessionJson; let session: SessionJson;
try { try {
session = await readJson<SessionJson>(sessionPath); session = await readJson<SessionJson>(sessionPath);
@@ -503,7 +348,6 @@ export async function loadResumeState(
); );
} }
// Validate URL matches
if (session.session.webUrl !== expectedUrl) { if (session.session.webUrl !== expectedUrl) {
throw ApplicationFailure.nonRetryable( throw ApplicationFailure.nonRetryable(
`URL mismatch with workspace\n Workspace URL: ${session.session.webUrl}\n Provided URL: ${expectedUrl}`, `URL mismatch with workspace\n Workspace URL: ${session.session.webUrl}\n Provided URL: ${expectedUrl}`,
@@ -511,34 +355,30 @@ export async function loadResumeState(
); );
} }
// Find completed agents (status === 'success' AND deliverable exists) // 3. Cross-check agent status with deliverables on disk
const completedAgents: string[] = []; const completedAgents: string[] = [];
const agents = session.metrics.agents; const agents = session.metrics.agents;
for (const agentName of ALL_AGENTS) { for (const agentName of ALL_AGENTS) {
const agentData = agents[agentName]; const agentData = agents[agentName];
// Skip if agent never ran or didn't succeed
if (!agentData || agentData.status !== 'success') { if (!agentData || agentData.status !== 'success') {
continue; continue;
} }
// Validate deliverable exists const deliverableFilename = AGENTS[agentName].deliverableFilename;
const deliverablePath = getDeliverablePath(agentName, expectedRepoPath); const deliverablePath = `${expectedRepoPath}/deliverables/${deliverableFilename}`;
const deliverableExists = await fileExists(deliverablePath); const deliverableExists = await fileExists(deliverablePath);
if (!deliverableExists) { if (!deliverableExists) {
console.log( const logger = createActivityLogger();
chalk.yellow(`Agent ${agentName} shows success but deliverable missing, will re-run`) logger.warn(`Agent ${agentName} shows success but deliverable missing, will re-run`);
);
continue; continue;
} }
// Agent completed successfully and deliverable exists
completedAgents.push(agentName); completedAgents.push(agentName);
} }
// Find latest checkpoint from completed agents // 4. Collect git checkpoints and validate at least one exists
const checkpoints = completedAgents const checkpoints = completedAgents
.map((name) => agents[name]?.checkpoint) .map((name) => agents[name]?.checkpoint)
.filter((hash): hash is string => hash != null); .filter((hash): hash is string => hash != null);
@@ -550,24 +390,26 @@ export async function loadResumeState(
throw ApplicationFailure.nonRetryable( throw ApplicationFailure.nonRetryable(
`Cannot resume workspace ${workspaceName}: ` + `Cannot resume workspace ${workspaceName}: ` +
(successAgents.length > 0 (successAgents.length > 0
? `${successAgents.length} agent(s) show success in session.json (${successAgents.join(', ')}) ` + ? `${successAgents.length} agent(s) show success in session.json (${successAgents.join(', ')}) ` +
`but their deliverable files are missing from disk. ` + `but their deliverable files are missing from disk. ` +
`Start a fresh run instead.` `Start a fresh run instead.`
: `No agents completed successfully. Start a fresh run instead.`), : `No agents completed successfully. Start a fresh run instead.`),
'NoCheckpointsError' 'NoCheckpointsError'
); );
} }
// Find most recent commit among checkpoints // 5. Find the most recent checkpoint commit
const checkpointHash = await findLatestCommit(expectedRepoPath, checkpoints); const checkpointHash = await findLatestCommit(expectedRepoPath, checkpoints);
const originalWorkflowId = session.session.originalWorkflowId || session.session.id; const originalWorkflowId = session.session.originalWorkflowId || session.session.id;
console.log(chalk.cyan(`=== RESUME STATE ===`)); // 6. Log summary and return resume state
console.log(`Workspace: ${workspaceName}`); const logger = createActivityLogger();
console.log(`Completed agents: ${completedAgents.length}`); logger.info('Resume state loaded', {
console.log(`Checkpoint: ${checkpointHash}`); workspace: workspaceName,
completedAgents: completedAgents.length,
checkpoint: checkpointHash,
});
return { return {
workspaceName, workspaceName,
@@ -578,20 +420,21 @@ export async function loadResumeState(
}; };
} }
/**
* Find the most recent commit among a list of commit hashes.
* Uses git rev-list to determine which commit is newest.
*/
async function findLatestCommit(repoPath: string, commitHashes: string[]): Promise<string> { async function findLatestCommit(repoPath: string, commitHashes: string[]): Promise<string> {
if (commitHashes.length === 1) { if (commitHashes.length === 1) {
const hash = commitHashes[0]; const hash = commitHashes[0];
if (!hash) { if (!hash) {
throw new Error('Empty commit hash in array'); throw new PentestError(
'Empty commit hash in array',
'filesystem',
false, // Non-retryable - corrupt workspace state
{ phase: 'resume' },
ErrorCode.GIT_CHECKPOINT_FAILED
);
} }
return hash; return hash;
} }
// Use git rev-list to find the most recent commit among all hashes
const result = await executeGitCommandWithRetry( const result = await executeGitCommandWithRetry(
['git', 'rev-list', '--max-count=1', ...commitHashes], ['git', 'rev-list', '--max-count=1', ...commitHashes],
repoPath, repoPath,
@@ -603,20 +446,15 @@ async function findLatestCommit(repoPath: string, commitHashes: string[]): Promi
/** /**
* Restore git workspace to a checkpoint and clean up partial deliverables. * Restore git workspace to a checkpoint and clean up partial deliverables.
*
* @param repoPath - Repository path
* @param checkpointHash - Git commit hash to reset to
* @param incompleteAgents - Agents that didn't complete (will have deliverables cleaned up)
*/ */
export async function restoreGitCheckpoint( export async function restoreGitCheckpoint(
repoPath: string, repoPath: string,
checkpointHash: string, checkpointHash: string,
incompleteAgents: AgentName[] incompleteAgents: AgentName[]
): Promise<void> { ): Promise<void> {
console.log(chalk.blue(`Restoring git workspace to ${checkpointHash}...`)); const logger = createActivityLogger();
logger.info(`Restoring git workspace to ${checkpointHash}...`);
// Checkpoint hash points to the success commit (after commitGitSuccess),
// so git reset --hard naturally preserves all completed agent deliverables.
await executeGitCommandWithRetry( await executeGitCommandWithRetry(
['git', 'reset', '--hard', checkpointHash], ['git', 'reset', '--hard', checkpointHash],
repoPath, repoPath,
@@ -628,67 +466,60 @@ export async function restoreGitCheckpoint(
'clean untracked files for resume' 'clean untracked files for resume'
); );
// Clean up any partial deliverables from incomplete agents
for (const agentName of incompleteAgents) { for (const agentName of incompleteAgents) {
const deliverablePath = getDeliverablePath(agentName, repoPath); const deliverableFilename = AGENTS[agentName].deliverableFilename;
const deliverablePath = `${repoPath}/deliverables/${deliverableFilename}`;
try { try {
const exists = await fileExists(deliverablePath); const exists = await fileExists(deliverablePath);
if (exists) { if (exists) {
console.log(chalk.yellow(`Cleaning partial deliverable: ${agentName}`)); logger.warn(`Cleaning partial deliverable: ${agentName}`);
await fs.unlink(deliverablePath); await fs.unlink(deliverablePath);
} }
} catch (error) { } catch (error) {
console.log(chalk.gray(`Note: Failed to delete ${deliverablePath}: ${error}`)); logger.info(`Note: Failed to delete ${deliverablePath}: ${error}`);
} }
} }
console.log(chalk.green('Workspace restored to clean state')); logger.info('Workspace restored to clean state');
} }
/** /**
* Record a resume attempt in session.json. * Record a resume attempt in session.json and write resume header to workflow.log.
* Tracks the new workflow ID, terminated workflows, and checkpoint hash.
*/ */
export async function recordResumeAttempt( export async function recordResumeAttempt(
input: ActivityInput, input: ActivityInput,
terminatedWorkflows: string[], terminatedWorkflows: string[],
checkpointHash: string checkpointHash: string,
previousWorkflowId: string,
completedAgents: string[]
): Promise<void> { ): Promise<void> {
const { webUrl, repoPath, outputPath, sessionId, workflowId } = input; const sessionMetadata = buildSessionMetadata(input);
const sessionMetadata: SessionMetadata = {
id: sessionId,
webUrl,
repoPath,
...(outputPath && { outputPath }),
};
const auditSession = new AuditSession(sessionMetadata); const auditSession = new AuditSession(sessionMetadata);
await auditSession.initialize(); await auditSession.initialize();
await auditSession.addResumeAttempt(workflowId, terminatedWorkflows, checkpointHash); // Update session.json with resume attempt
await auditSession.addResumeAttempt(input.workflowId, terminatedWorkflows, checkpointHash);
// Write resume header to workflow.log
await auditSession.logResumeHeader({
previousWorkflowId,
newWorkflowId: input.workflowId,
checkpointHash,
completedAgents,
});
} }
/** /**
* Log phase transition to the unified workflow log. * Log phase transition to the unified workflow log.
* Called at phase boundaries for per-workflow logging.
*/ */
export async function logPhaseTransition( export async function logPhaseTransition(
input: ActivityInput, input: ActivityInput,
phase: string, phase: string,
event: 'start' | 'complete' event: 'start' | 'complete'
): Promise<void> { ): Promise<void> {
const { webUrl, repoPath, outputPath, sessionId, workflowId } = input; const sessionMetadata = buildSessionMetadata(input);
const sessionMetadata: SessionMetadata = {
id: sessionId,
webUrl,
repoPath,
...(outputPath && { outputPath }),
};
const auditSession = new AuditSession(sessionMetadata); const auditSession = new AuditSession(sessionMetadata);
await auditSession.initialize(workflowId); await auditSession.initialize(input.workflowId);
if (event === 'start') { if (event === 'start') {
await auditSession.logPhaseStart(phase); await auditSession.logPhaseStart(phase);
@@ -698,28 +529,23 @@ export async function logPhaseTransition(
} }
/** /**
* Log workflow completion with full summary to the unified workflow log. * Log workflow completion with full summary.
* Called at the end of the workflow to write a summary breakdown. * Cleans up container when done.
*/ */
export async function logWorkflowComplete( export async function logWorkflowComplete(
input: ActivityInput, input: ActivityInput,
summary: WorkflowSummary summary: WorkflowSummary
): Promise<void> { ): Promise<void> {
const { webUrl, repoPath, outputPath, sessionId, workflowId } = input; const { repoPath, workflowId } = input;
const sessionMetadata = buildSessionMetadata(input);
const sessionMetadata: SessionMetadata = {
id: sessionId,
webUrl,
repoPath,
...(outputPath && { outputPath }),
};
// 1. Initialize audit session and mark final status
const auditSession = new AuditSession(sessionMetadata); const auditSession = new AuditSession(sessionMetadata);
await auditSession.initialize(workflowId); await auditSession.initialize(workflowId);
await auditSession.updateSessionStatus(summary.status); await auditSession.updateSessionStatus(summary.status);
// Use cumulative metrics from session.json (includes all resume attempts) // 2. Load cumulative metrics from session.json
const sessionData = await auditSession.getMetrics() as { const sessionData = (await auditSession.getMetrics()) as {
metrics: { metrics: {
total_duration_ms: number; total_duration_ms: number;
total_cost_usd: number; total_cost_usd: number;
@@ -727,7 +553,7 @@ export async function logWorkflowComplete(
}; };
}; };
// Fill in metrics for skipped agents (completed in previous runs) // 3. Fill in metrics for skipped agents (resumed from previous run)
const agentMetrics = { ...summary.agentMetrics }; const agentMetrics = { ...summary.agentMetrics };
for (const agentName of summary.completedAgents) { for (const agentName of summary.completedAgents) {
if (!agentMetrics[agentName]) { if (!agentMetrics[agentName]) {
@@ -741,18 +567,27 @@ export async function logWorkflowComplete(
} }
} }
// 4. Build cumulative summary with cross-run totals
const cumulativeSummary: WorkflowSummary = { const cumulativeSummary: WorkflowSummary = {
...summary, ...summary,
totalDurationMs: sessionData.metrics.total_duration_ms, totalDurationMs: sessionData.metrics.total_duration_ms,
totalCostUsd: sessionData.metrics.total_cost_usd, totalCostUsd: sessionData.metrics.total_cost_usd,
agentMetrics, agentMetrics,
}; };
// 5. Write completion entry to workflow.log
await auditSession.logWorkflowComplete(cumulativeSummary); await auditSession.logWorkflowComplete(cumulativeSummary);
// Copy all deliverables to audit-logs once at workflow end (non-fatal) // 6. Copy deliverables to audit-logs
try { try {
await copyDeliverablesToAudit(sessionMetadata, repoPath); await copyDeliverablesToAudit(sessionMetadata, repoPath);
} catch (copyErr) { } catch (copyErr) {
console.error('Failed to copy deliverables to audit-logs:', copyErr); const logger = createActivityLogger();
logger.error('Failed to copy deliverables to audit-logs', {
error: copyErr instanceof Error ? copyErr.message : String(copyErr),
});
} }
// 7. Clean up container
removeContainer(workflowId);
} }
+34
View File
@@ -0,0 +1,34 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
import { Context } from '@temporalio/activity';
import type { ActivityLogger } from '../types/activity-logger.js';
/**
* ActivityLogger backed by Temporal's Context.current().log.
* Must be called inside a running Temporal activity — throws otherwise.
*/
export class TemporalActivityLogger implements ActivityLogger {
info(message: string, attrs?: Record<string, unknown>): void {
Context.current().log.info(message, attrs ?? {});
}
warn(message: string, attrs?: Record<string, unknown>): void {
Context.current().log.warn(message, attrs ?? {});
}
error(message: string, attrs?: Record<string, unknown>): void {
Context.current().log.error(message, attrs ?? {});
}
}
/**
* Create an ActivityLogger. Must be called inside a Temporal activity.
* Throws if called outside an activity context.
*/
export function createActivityLogger(): ActivityLogger {
return new TemporalActivityLogger();
}
+241 -175
View File
@@ -26,12 +26,11 @@
* TEMPORAL_ADDRESS - Temporal server address (default: localhost:7233) * TEMPORAL_ADDRESS - Temporal server address (default: localhost:7233)
*/ */
import { Connection, Client, WorkflowNotFoundError } from '@temporalio/client'; import { Connection, Client, WorkflowNotFoundError, type WorkflowHandle } from '@temporalio/client';
import dotenv from 'dotenv'; import dotenv from 'dotenv';
import chalk from 'chalk';
import { displaySplashScreen } from '../splash-screen.js'; import { displaySplashScreen } from '../splash-screen.js';
import { sanitizeHostname } from '../audit/utils.js'; import { sanitizeHostname } from '../audit/utils.js';
import { readJson, fileExists } from '../audit/utils.js'; import { readJson, fileExists } from '../utils/file-io.js';
import path from 'path'; import path from 'path';
// Import types only - these don't pull in workflow runtime code // Import types only - these don't pull in workflow runtime code
import type { PipelineInput, PipelineState, PipelineProgress } from './shared.js'; import type { PipelineInput, PipelineState, PipelineProgress } from './shared.js';
@@ -89,18 +88,18 @@ async function terminateExistingWorkflows(
const description = await handle.describe(); const description = await handle.describe();
if (description.status.name === 'RUNNING') { if (description.status.name === 'RUNNING') {
console.log(chalk.yellow(`Terminating running workflow: ${wfId}`)); console.log(`Terminating running workflow: ${wfId}`);
await handle.terminate('Superseded by resume workflow'); await handle.terminate('Superseded by resume workflow');
terminated.push(wfId); terminated.push(wfId);
console.log(chalk.green(`Terminated: ${wfId}`)); console.log(`Terminated: ${wfId}`);
} else { } else {
console.log(chalk.gray(`Workflow already ${description.status.name}: ${wfId}`)); console.log(`Workflow already ${description.status.name}: ${wfId}`);
} }
} catch (error) { } catch (error) {
if (error instanceof WorkflowNotFoundError) { if (error instanceof WorkflowNotFoundError) {
console.log(chalk.gray(`Workflow not found (already cleaned up): ${wfId}`)); console.log(`Workflow not found (already cleaned up): ${wfId}`);
} else { } else {
console.log(chalk.red(`Failed to terminate ${wfId}: ${error}`)); console.log(`Failed to terminate ${wfId}: ${error}`);
// Continue anyway - don't block resume on termination failure // Continue anyway - don't block resume on termination failure
} }
} }
@@ -118,13 +117,13 @@ function isValidWorkspaceName(name: string): boolean {
} }
function showUsage(): void { function showUsage(): void {
console.log(chalk.cyan.bold('\nShannon Temporal Client')); console.log('\nShannon Temporal Client');
console.log(chalk.gray('Start a pentest pipeline workflow\n')); console.log('Start a pentest pipeline workflow\n');
console.log(chalk.yellow('Usage:')); console.log('Usage:');
console.log( console.log(
' node dist/temporal/client.js <webUrl> <repoPath> [options]\n' ' node dist/temporal/client.js <webUrl> <repoPath> [options]\n'
); );
console.log(chalk.yellow('Options:')); console.log('Options:');
console.log(' --config <path> Configuration file path'); console.log(' --config <path> Configuration file path');
console.log(' --output <path> Output directory for audit logs'); console.log(' --output <path> Output directory for audit logs');
console.log(' --pipeline-testing Use minimal prompts for fast testing'); console.log(' --pipeline-testing Use minimal prompts for fast testing');
@@ -133,54 +132,65 @@ function showUsage(): void {
' --workflow-id <id> Custom workflow ID (default: shannon-<timestamp>)' ' --workflow-id <id> Custom workflow ID (default: shannon-<timestamp>)'
); );
console.log(' --wait Wait for workflow completion with progress polling\n'); console.log(' --wait Wait for workflow completion with progress polling\n');
console.log(chalk.yellow('Examples:')); console.log('Examples:');
console.log(' node dist/temporal/client.js https://example.com /path/to/repo'); console.log(' node dist/temporal/client.js https://example.com /path/to/repo');
console.log( console.log(
' node dist/temporal/client.js https://example.com /path/to/repo --config config.yaml\n' ' node dist/temporal/client.js https://example.com /path/to/repo --config config.yaml\n'
); );
} }
async function startPipeline(): Promise<void> { // === CLI Argument Parsing ===
const args = process.argv.slice(2);
if (args.includes('--help') || args.includes('-h') || args.length === 0) { interface CliArgs {
webUrl: string;
repoPath: string;
configPath?: string;
outputPath?: string;
displayOutputPath?: string;
pipelineTestingMode: boolean;
customWorkflowId?: string;
waitForCompletion: boolean;
resumeFromWorkspace?: string;
}
function parseCliArgs(argv: string[]): CliArgs {
if (argv.includes('--help') || argv.includes('-h') || argv.length === 0) {
showUsage(); showUsage();
process.exit(0); process.exit(0);
} }
// Parse arguments
let webUrl: string | undefined; let webUrl: string | undefined;
let repoPath: string | undefined; let repoPath: string | undefined;
let configPath: string | undefined; let configPath: string | undefined;
let outputPath: string | undefined; let outputPath: string | undefined;
let displayOutputPath: string | undefined; // Host path for display purposes let displayOutputPath: string | undefined;
let pipelineTestingMode = false; let pipelineTestingMode = false;
let customWorkflowId: string | undefined; let customWorkflowId: string | undefined;
let waitForCompletion = false; let waitForCompletion = false;
let resumeFromWorkspace: string | undefined; let resumeFromWorkspace: string | undefined;
for (let i = 0; i < args.length; i++) { for (let i = 0; i < argv.length; i++) {
const arg = args[i]; const arg = argv[i];
if (arg === '--config') { if (arg === '--config') {
const nextArg = args[i + 1]; const nextArg = argv[i + 1];
if (nextArg && !nextArg.startsWith('-')) { if (nextArg && !nextArg.startsWith('-')) {
configPath = nextArg; configPath = nextArg;
i++; i++;
} }
} else if (arg === '--output') { } else if (arg === '--output') {
const nextArg = args[i + 1]; const nextArg = argv[i + 1];
if (nextArg && !nextArg.startsWith('-')) { if (nextArg && !nextArg.startsWith('-')) {
outputPath = nextArg; outputPath = nextArg;
i++; i++;
} }
} else if (arg === '--display-output') { } else if (arg === '--display-output') {
const nextArg = args[i + 1]; const nextArg = argv[i + 1];
if (nextArg && !nextArg.startsWith('-')) { if (nextArg && !nextArg.startsWith('-')) {
displayOutputPath = nextArg; displayOutputPath = nextArg;
i++; i++;
} }
} else if (arg === '--workflow-id') { } else if (arg === '--workflow-id') {
const nextArg = args[i + 1]; const nextArg = argv[i + 1];
if (nextArg && !nextArg.startsWith('-')) { if (nextArg && !nextArg.startsWith('-')) {
customWorkflowId = nextArg; customWorkflowId = nextArg;
i++; i++;
@@ -188,7 +198,7 @@ async function startPipeline(): Promise<void> {
} else if (arg === '--pipeline-testing') { } else if (arg === '--pipeline-testing') {
pipelineTestingMode = true; pipelineTestingMode = true;
} else if (arg === '--workspace') { } else if (arg === '--workspace') {
const nextArg = args[i + 1]; const nextArg = argv[i + 1];
if (nextArg && !nextArg.startsWith('-')) { if (nextArg && !nextArg.startsWith('-')) {
resumeFromWorkspace = nextArg; resumeFromWorkspace = nextArg;
i++; i++;
@@ -205,177 +215,233 @@ async function startPipeline(): Promise<void> {
} }
if (!webUrl || !repoPath) { if (!webUrl || !repoPath) {
console.log(chalk.red('Error: webUrl and repoPath are required')); console.log('Error: webUrl and repoPath are required');
showUsage(); showUsage();
process.exit(1); process.exit(1);
} }
// Display splash screen return {
webUrl, repoPath, pipelineTestingMode, waitForCompletion,
...(configPath && { configPath }),
...(outputPath && { outputPath }),
...(displayOutputPath && { displayOutputPath }),
...(customWorkflowId && { customWorkflowId }),
...(resumeFromWorkspace && { resumeFromWorkspace }),
};
}
// === Workspace Resolution ===
interface WorkspaceResolution {
workflowId: string;
sessionId: string;
isResume: boolean;
terminatedWorkflows: string[];
}
async function resolveWorkspace(
client: Client,
args: CliArgs
): Promise<WorkspaceResolution> {
if (!args.resumeFromWorkspace) {
const hostname = sanitizeHostname(args.webUrl);
const workflowId = args.customWorkflowId || `${hostname}_shannon-${Date.now()}`;
return {
workflowId,
sessionId: workflowId,
isResume: false,
terminatedWorkflows: [],
};
}
const workspace = args.resumeFromWorkspace;
const sessionPath = path.join('./audit-logs', workspace, 'session.json');
const workspaceExists = await fileExists(sessionPath);
if (workspaceExists) {
console.log('=== RESUME MODE ===');
console.log(`Workspace: ${workspace}\n`);
// 1. Terminate any running workflows from previous attempts
const terminatedWorkflows = await terminateExistingWorkflows(client, workspace);
if (terminatedWorkflows.length > 0) {
console.log(`Terminated ${terminatedWorkflows.length} previous workflow(s)\n`);
}
// 2. Validate URL matches the workspace
const session = await readJson<SessionJson>(sessionPath);
if (session.session.webUrl !== args.webUrl) {
console.error('ERROR: URL mismatch with workspace');
console.error(` Workspace URL: ${session.session.webUrl}`);
console.error(` Provided URL: ${args.webUrl}`);
process.exit(1);
}
// 3. Generate a new workflow ID scoped to this resume attempt
// 4. Return resolution with isResume=true so downstream uses resume logic
return {
workflowId: `${workspace}_resume_${Date.now()}`,
sessionId: workspace,
isResume: true,
terminatedWorkflows,
};
}
if (!isValidWorkspaceName(workspace)) {
console.error(`ERROR: Invalid workspace name: "${workspace}"`);
console.error(' Must be 1-128 characters, alphanumeric/hyphens/underscores, starting with alphanumeric');
process.exit(1);
}
console.log('=== NEW NAMED WORKSPACE ===');
console.log(`Workspace: ${workspace}\n`);
return {
workflowId: `${workspace}_shannon-${Date.now()}`,
sessionId: workspace,
isResume: false,
terminatedWorkflows: [],
};
}
// === Pipeline Input Construction ===
function buildPipelineInput(args: CliArgs, workspace: WorkspaceResolution): PipelineInput {
return {
webUrl: args.webUrl,
repoPath: args.repoPath,
workflowId: workspace.workflowId,
sessionId: workspace.sessionId,
...(args.configPath && { configPath: args.configPath }),
...(args.outputPath && { outputPath: args.outputPath }),
...(args.pipelineTestingMode && { pipelineTestingMode: args.pipelineTestingMode }),
...(workspace.isResume && args.resumeFromWorkspace && { resumeFromWorkspace: args.resumeFromWorkspace }),
...(workspace.terminatedWorkflows.length > 0 && { terminatedWorkflows: workspace.terminatedWorkflows }),
};
}
// === Display Helpers ===
function displayWorkflowInfo(args: CliArgs, workspace: WorkspaceResolution): void {
console.log(`✓ Workflow started: ${workspace.workflowId}`);
if (workspace.isResume) {
console.log(` (Resuming workspace: ${workspace.sessionId})`);
}
console.log();
console.log(` Target: ${args.webUrl}`);
console.log(` Repository: ${args.repoPath}`);
console.log(` Workspace: ${workspace.sessionId}`);
if (args.configPath) {
console.log(` Config: ${args.configPath}`);
}
if (args.displayOutputPath) {
console.log(` Output: ${args.displayOutputPath}`);
}
if (args.pipelineTestingMode) {
console.log(` Mode: Pipeline Testing`);
}
console.log();
}
function displayMonitoringInfo(args: CliArgs, workspace: WorkspaceResolution): void {
const effectiveDisplayPath = args.displayOutputPath || args.outputPath || './audit-logs';
const outputDir = `${effectiveDisplayPath}/${workspace.sessionId}`;
console.log('Monitor progress:');
console.log(` Web UI: http://localhost:8233/namespaces/default/workflows/${workspace.workflowId}`);
console.log(` Logs: ./shannon logs ID=${workspace.workflowId}`);
console.log();
console.log('Output:');
console.log(` Reports: ${outputDir}`);
console.log();
}
// === Workflow Result Handling ===
async function waitForWorkflowResult(
handle: WorkflowHandle<(input: PipelineInput) => Promise<PipelineState>>,
workspace: WorkspaceResolution
): Promise<void> {
const progressInterval = setInterval(async () => {
try {
const progress = await handle.query<PipelineProgress>(PROGRESS_QUERY);
const elapsed = Math.floor(progress.elapsedMs / 1000);
console.log(
`[${elapsed}s] Phase: ${progress.currentPhase || 'unknown'} | Agent: ${progress.currentAgent || 'none'} | Completed: ${progress.completedAgents.length}/13`
);
} catch {
// Workflow may have completed
}
}, 30000);
try {
// 1. Block until workflow completes
const result = await handle.result();
clearInterval(progressInterval);
// 2. Display run metrics
console.log('\nPipeline completed successfully!');
if (result.summary) {
console.log(`Duration: ${Math.floor(result.summary.totalDurationMs / 1000)}s`);
console.log(`Agents completed: ${result.summary.agentCount}`);
console.log(`Total turns: ${result.summary.totalTurns}`);
console.log(`Run cost: $${result.summary.totalCostUsd.toFixed(4)}`);
// 3. Show cumulative cost across all resume attempts
if (workspace.isResume) {
try {
const session = await readJson<SessionJson>(
path.join('./audit-logs', workspace.sessionId, 'session.json')
);
console.log(`Cumulative cost: $${session.metrics.total_cost_usd.toFixed(4)}`);
} catch {
// Non-fatal, skip cumulative cost display
}
}
}
} catch (error) {
clearInterval(progressInterval);
console.error('\nPipeline failed:', error);
process.exit(1);
}
}
// === Main Entry Point ===
async function startPipeline(): Promise<void> {
// 1. Parse CLI args and display splash
const args = parseCliArgs(process.argv.slice(2));
await displaySplashScreen(); await displaySplashScreen();
// 2. Connect to Temporal server
const address = process.env.TEMPORAL_ADDRESS || 'localhost:7233'; const address = process.env.TEMPORAL_ADDRESS || 'localhost:7233';
console.log(chalk.gray(`Connecting to Temporal at ${address}...`)); console.log(`Connecting to Temporal at ${address}...`);
const connection = await Connection.connect({ address }); const connection = await Connection.connect({ address });
const client = new Client({ connection }); const client = new Client({ connection });
try { try {
let terminatedWorkflows: string[] = []; // 3. Resolve workspace (new or resume) and build pipeline input
let workflowId: string; const workspace = await resolveWorkspace(client, args);
let sessionId: string; // Workspace name (persistent directory) const input = buildPipelineInput(args, workspace);
let isResume = false;
if (resumeFromWorkspace) { // 4. Start the Temporal workflow
const sessionPath = path.join('./audit-logs', resumeFromWorkspace, 'session.json');
const workspaceExists = await fileExists(sessionPath);
if (workspaceExists) {
// === Resume Mode: existing workspace ===
isResume = true;
console.log(chalk.cyan('=== RESUME MODE ==='));
console.log(`Workspace: ${resumeFromWorkspace}\n`);
// Terminate any running workflows for this workspace
terminatedWorkflows = await terminateExistingWorkflows(client, resumeFromWorkspace);
if (terminatedWorkflows.length > 0) {
console.log(chalk.yellow(`Terminated ${terminatedWorkflows.length} previous workflow(s)\n`));
}
// Validate URL matches workspace
const session = await readJson<SessionJson>(sessionPath);
if (session.session.webUrl !== webUrl) {
console.error(chalk.red('ERROR: URL mismatch with workspace'));
console.error(` Workspace URL: ${session.session.webUrl}`);
console.error(` Provided URL: ${webUrl}`);
process.exit(1);
}
// Generate resume workflow ID
workflowId = `${resumeFromWorkspace}_resume_${Date.now()}`;
sessionId = resumeFromWorkspace;
} else {
// === New Named Workspace ===
if (!isValidWorkspaceName(resumeFromWorkspace)) {
console.error(chalk.red(`ERROR: Invalid workspace name: "${resumeFromWorkspace}"`));
console.error(chalk.gray(' Must be 1-128 characters, alphanumeric/hyphens/underscores, starting with alphanumeric'));
process.exit(1);
}
console.log(chalk.cyan('=== NEW NAMED WORKSPACE ==='));
console.log(`Workspace: ${resumeFromWorkspace}\n`);
workflowId = `${resumeFromWorkspace}_shannon-${Date.now()}`;
sessionId = resumeFromWorkspace;
}
} else {
// === New Auto-Named Workflow ===
const hostname = sanitizeHostname(webUrl);
workflowId = customWorkflowId || `${hostname}_shannon-${Date.now()}`;
sessionId = workflowId;
}
const input: PipelineInput = {
webUrl,
repoPath,
workflowId, // Add for audit correlation
sessionId, // Workspace directory name
...(configPath && { configPath }),
...(outputPath && { outputPath }),
...(pipelineTestingMode && { pipelineTestingMode }),
...(isResume && resumeFromWorkspace && { resumeFromWorkspace }),
...(terminatedWorkflows.length > 0 && { terminatedWorkflows }),
};
// Determine output directory for display (use sessionId for persistent directory)
// Use displayOutputPath (host path) if provided, otherwise fall back to outputPath or default
const effectiveDisplayPath = displayOutputPath || outputPath || './audit-logs';
const outputDir = `${effectiveDisplayPath}/${sessionId}`;
console.log(chalk.green.bold(`✓ Workflow started: ${workflowId}`));
if (isResume) {
console.log(chalk.gray(` (Resuming workspace: ${sessionId})`));
}
console.log();
console.log(chalk.white(' Target: ') + chalk.cyan(webUrl));
console.log(chalk.white(' Repository: ') + chalk.cyan(repoPath));
console.log(chalk.white(' Workspace: ') + chalk.cyan(sessionId));
if (configPath) {
console.log(chalk.white(' Config: ') + chalk.cyan(configPath));
}
if (displayOutputPath) {
console.log(chalk.white(' Output: ') + chalk.cyan(displayOutputPath));
}
if (pipelineTestingMode) {
console.log(chalk.white(' Mode: ') + chalk.yellow('Pipeline Testing'));
}
console.log();
// Start workflow by name (not by importing the function)
const handle = await client.workflow.start<(input: PipelineInput) => Promise<PipelineState>>( const handle = await client.workflow.start<(input: PipelineInput) => Promise<PipelineState>>(
'pentestPipelineWorkflow', 'pentestPipelineWorkflow',
{ {
taskQueue: 'shannon-pipeline', taskQueue: 'shannon-pipeline',
workflowId, workflowId: workspace.workflowId,
args: [input], args: [input],
} }
); );
if (!waitForCompletion) { // 5. Display info and optionally wait for completion
console.log(chalk.bold('Monitor progress:')); displayWorkflowInfo(args, workspace);
console.log(chalk.white(' Web UI: ') + chalk.blue(`http://localhost:8233/namespaces/default/workflows/${workflowId}`));
console.log(chalk.white(' Logs: ') + chalk.gray(`./shannon logs ID=${workflowId}`));
console.log();
console.log(chalk.bold('Output:'));
console.log(chalk.white(' Reports: ') + chalk.cyan(outputDir));
console.log();
return;
}
// Poll for progress every 30 seconds if (args.waitForCompletion) {
const progressInterval = setInterval(async () => { await waitForWorkflowResult(handle, workspace);
try { } else {
const progress = await handle.query<PipelineProgress>(PROGRESS_QUERY); displayMonitoringInfo(args, workspace);
const elapsed = Math.floor(progress.elapsedMs / 1000);
console.log(
chalk.gray(`[${elapsed}s]`),
chalk.cyan(`Phase: ${progress.currentPhase || 'unknown'}`),
chalk.gray(`| Agent: ${progress.currentAgent || 'none'}`),
chalk.gray(`| Completed: ${progress.completedAgents.length}/13`)
);
} catch {
// Workflow may have completed
}
}, 30000);
try {
const result = await handle.result();
clearInterval(progressInterval);
console.log(chalk.green.bold('\nPipeline completed successfully!'));
if (result.summary) {
console.log(chalk.gray(`Duration: ${Math.floor(result.summary.totalDurationMs / 1000)}s`));
console.log(chalk.gray(`Agents completed: ${result.summary.agentCount}`));
console.log(chalk.gray(`Total turns: ${result.summary.totalTurns}`));
console.log(chalk.gray(`Run cost: $${result.summary.totalCostUsd.toFixed(4)}`));
// Show cumulative cost from session.json (includes all resume attempts)
if (isResume) {
try {
const session = await readJson<SessionJson>(
path.join('./audit-logs', sessionId, 'session.json')
);
console.log(chalk.gray(`Cumulative cost: $${session.metrics.total_cost_usd.toFixed(4)}`));
} catch {
// Non-fatal, skip cumulative cost display
}
}
}
} catch (error) {
clearInterval(progressInterval);
console.error(chalk.red.bold('\nPipeline failed:'), error);
process.exit(1);
} }
} finally { } finally {
await connection.close(); await connection.close();
@@ -383,6 +449,6 @@ async function startPipeline(): Promise<void> {
} }
startPipeline().catch((err) => { startPipeline().catch((err) => {
console.error(chalk.red('Client error:'), err); console.error('Client error:', err);
process.exit(1); process.exit(1);
}); });
+3 -13
View File
@@ -1,6 +1,7 @@
import { defineQuery } from '@temporalio/workflow'; import { defineQuery } from '@temporalio/workflow';
// === Types === export type { AgentMetrics } from '../types/metrics.js';
import type { AgentMetrics } from '../types/metrics.js';
export interface PipelineInput { export interface PipelineInput {
webUrl: string; webUrl: string;
@@ -8,7 +9,7 @@ export interface PipelineInput {
configPath?: string; configPath?: string;
outputPath?: string; outputPath?: string;
pipelineTestingMode?: boolean; pipelineTestingMode?: boolean;
workflowId?: string; // Added by client, used for audit correlation workflowId?: string; // Used for audit correlation
sessionId?: string; // Workspace directory name (distinct from workflowId for named workspaces) sessionId?: string; // Workspace directory name (distinct from workflowId for named workspaces)
resumeFromWorkspace?: string; // Workspace name to resume from resumeFromWorkspace?: string; // Workspace name to resume from
terminatedWorkflows?: string[]; // Workflows terminated during resume terminatedWorkflows?: string[]; // Workflows terminated during resume
@@ -22,15 +23,6 @@ export interface ResumeState {
originalWorkflowId: string; originalWorkflowId: string;
} }
export interface AgentMetrics {
durationMs: number;
inputTokens: number | null;
outputTokens: number | null;
costUsd: number | null;
numTurns: number | null;
model?: string | undefined;
}
export interface PipelineSummary { export interface PipelineSummary {
totalCostUsd: number; totalCostUsd: number;
totalDurationMs: number; // Wall-clock time (end - start) totalDurationMs: number; // Wall-clock time (end - start)
@@ -68,6 +60,4 @@ export interface VulnExploitPipelineResult {
error: string | null; error: string | null;
} }
// === Queries ===
export const getProgress = defineQuery<PipelineProgress>('getProgress'); export const getProgress = defineQuery<PipelineProgress>('getProgress');
+45
View File
@@ -0,0 +1,45 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
/**
* Maps PipelineState to WorkflowSummary for audit logging.
* Pure function with no side effects.
*/
import type { PipelineState } from './shared.js';
import type { WorkflowSummary } from '../audit/workflow-logger.js';
/**
* Maps PipelineState to WorkflowSummary.
*
* This function is deterministic (no Date.now() or I/O) so it can be
* safely imported into Temporal workflows. The caller must ensure
* state.summary is set before calling (via computeSummary).
*/
export function toWorkflowSummary(
state: PipelineState,
status: 'completed' | 'failed'
): WorkflowSummary {
// state.summary must be computed before calling this mapper
const summary = state.summary;
if (!summary) {
throw new Error('toWorkflowSummary: state.summary must be set before calling');
}
return {
status,
totalDurationMs: summary.totalDurationMs,
totalCostUsd: summary.totalCostUsd,
completedAgents: state.completedAgents,
agentMetrics: Object.fromEntries(
Object.entries(state.agentMetrics).map(([name, m]) => [
name,
{ durationMs: m.durationMs, costUsd: m.costUsd },
])
),
...(state.error && { error: state.error }),
};
}
+8 -9
View File
@@ -24,7 +24,6 @@ import { NativeConnection, Worker, bundleWorkflowCode } from '@temporalio/worker
import { fileURLToPath } from 'node:url'; import { fileURLToPath } from 'node:url';
import path from 'node:path'; import path from 'node:path';
import dotenv from 'dotenv'; import dotenv from 'dotenv';
import chalk from 'chalk';
import * as activities from './activities.js'; import * as activities from './activities.js';
dotenv.config(); dotenv.config();
@@ -33,12 +32,12 @@ const __dirname = path.dirname(fileURLToPath(import.meta.url));
async function runWorker(): Promise<void> { async function runWorker(): Promise<void> {
const address = process.env.TEMPORAL_ADDRESS || 'localhost:7233'; const address = process.env.TEMPORAL_ADDRESS || 'localhost:7233';
console.log(chalk.cyan(`Connecting to Temporal at ${address}...`)); console.log(`Connecting to Temporal at ${address}...`);
const connection = await NativeConnection.connect({ address }); const connection = await NativeConnection.connect({ address });
// Bundle workflows for Temporal's V8 isolate // Bundle workflows for Temporal's V8 isolate
console.log(chalk.gray('Bundling workflows...')); console.log('Bundling workflows...');
const workflowBundle = await bundleWorkflowCode({ const workflowBundle = await bundleWorkflowCode({
workflowsPath: path.join(__dirname, 'workflows.js'), workflowsPath: path.join(__dirname, 'workflows.js'),
}); });
@@ -54,26 +53,26 @@ async function runWorker(): Promise<void> {
// Graceful shutdown handling // Graceful shutdown handling
const shutdown = async (): Promise<void> => { const shutdown = async (): Promise<void> => {
console.log(chalk.yellow('\nShutting down worker...')); console.log('\nShutting down worker...');
worker.shutdown(); worker.shutdown();
}; };
process.on('SIGINT', shutdown); process.on('SIGINT', shutdown);
process.on('SIGTERM', shutdown); process.on('SIGTERM', shutdown);
console.log(chalk.green('Shannon worker started')); console.log('Shannon worker started');
console.log(chalk.gray('Task queue: shannon-pipeline')); console.log('Task queue: shannon-pipeline');
console.log(chalk.gray('Press Ctrl+C to stop\n')); console.log('Press Ctrl+C to stop\n');
try { try {
await worker.run(); await worker.run();
} finally { } finally {
await connection.close(); await connection.close();
console.log(chalk.gray('Worker stopped')); console.log('Worker stopped');
} }
} }
runWorker().catch((err) => { runWorker().catch((err) => {
console.error(chalk.red('Worker failed:'), err); console.error('Worker failed:', err);
process.exit(1); process.exit(1);
}); });
+125 -162
View File
@@ -24,6 +24,7 @@
*/ */
import { import {
log,
proxyActivities, proxyActivities,
setHandler, setHandler,
workflowInfo, workflowInfo,
@@ -40,9 +41,10 @@ import {
type AgentMetrics, type AgentMetrics,
type ResumeState, type ResumeState,
} from './shared.js'; } from './shared.js';
import type { VulnType } from '../queue-validation.js'; import type { VulnType } from '../services/queue-validation.js';
import type { AgentName } from '../types/agents.js'; import type { AgentName } from '../types/agents.js';
import { ALL_AGENTS } from '../types/agents.js'; import { ALL_AGENTS } from '../types/agents.js';
import { toWorkflowSummary } from './summary-mapper.js';
// Retry configuration for production (long intervals for billing recovery) // Retry configuration for production (long intervals for billing recovery)
const PRODUCTION_RETRY = { const PRODUCTION_RETRY = {
@@ -103,11 +105,9 @@ export async function pentestPipelineWorkflow(
): Promise<PipelineState> { ): Promise<PipelineState> {
const { workflowId } = workflowInfo(); const { workflowId } = workflowInfo();
// Select activity proxy based on testing mode
// Pipeline testing uses fast retry intervals (10s) for quick iteration // Pipeline testing uses fast retry intervals (10s) for quick iteration
const a = input.pipelineTestingMode ? testActs : acts; const a = input.pipelineTestingMode ? testActs : acts;
// Workflow state (queryable)
const state: PipelineState = { const state: PipelineState = {
status: 'running', status: 'running',
currentPhase: null, currentPhase: null,
@@ -120,7 +120,6 @@ export async function pentestPipelineWorkflow(
summary: null, summary: null,
}; };
// Register query handler for real-time progress inspection
setHandler(getProgress, (): PipelineProgress => ({ setHandler(getProgress, (): PipelineProgress => ({
...state, ...state,
workflowId, workflowId,
@@ -145,18 +144,17 @@ export async function pentestPipelineWorkflow(
}), }),
}; };
// === RESUME LOGIC ===
let resumeState: ResumeState | null = null; let resumeState: ResumeState | null = null;
if (input.resumeFromWorkspace) { if (input.resumeFromWorkspace) {
// Load resume state from existing workspace // 1. Load resume state (validates workspace, cross-checks deliverables)
resumeState = await a.loadResumeState( resumeState = await a.loadResumeState(
input.resumeFromWorkspace, input.resumeFromWorkspace,
input.webUrl, input.webUrl,
input.repoPath input.repoPath
); );
// Restore git checkpoint and clean up partial deliverables // 2. Restore git workspace and clean up incomplete deliverables
const incompleteAgents = ALL_AGENTS.filter( const incompleteAgents = ALL_AGENTS.filter(
(agentName) => !resumeState!.completedAgents.includes(agentName) (agentName) => !resumeState!.completedAgents.includes(agentName)
) as AgentName[]; ) as AgentName[];
@@ -167,120 +165,59 @@ export async function pentestPipelineWorkflow(
incompleteAgents incompleteAgents
); );
// Check if all agents are already complete // 3. Short-circuit if all agents already completed
if (resumeState.completedAgents.length === ALL_AGENTS.length) { if (resumeState.completedAgents.length === ALL_AGENTS.length) {
console.log(`All ${ALL_AGENTS.length} agents already completed. Nothing to resume.`); log.info(`All ${ALL_AGENTS.length} agents already completed. Nothing to resume.`);
state.status = 'completed'; state.status = 'completed';
state.completedAgents = [...resumeState.completedAgents]; state.completedAgents = [...resumeState.completedAgents];
state.summary = computeSummary(state); state.summary = computeSummary(state);
return state; return state;
} }
// Record resume attempt in session.json // 4. Record this resume attempt in session.json and workflow.log
await a.recordResumeAttempt( await a.recordResumeAttempt(
activityInput, activityInput,
input.terminatedWorkflows || [], input.terminatedWorkflows || [],
resumeState.checkpointHash resumeState.checkpointHash,
resumeState.originalWorkflowId,
resumeState.completedAgents
); );
console.log('Resume state loaded and workspace restored'); log.info('Resume state loaded and workspace restored');
} }
// Helper to check if an agent should be skipped
const shouldSkip = (agentName: string): boolean => { const shouldSkip = (agentName: string): boolean => {
return resumeState?.completedAgents.includes(agentName) ?? false; return resumeState?.completedAgents.includes(agentName) ?? false;
}; };
try { // Run a sequential agent phase (pre-recon, recon)
// === Phase 1: Pre-Reconnaissance === async function runSequentialPhase(
if (!shouldSkip('pre-recon')) { phaseName: string,
state.currentPhase = 'pre-recon'; agentName: AgentName,
state.currentAgent = 'pre-recon'; runAgent: (input: ActivityInput) => Promise<AgentMetrics>
await a.logPhaseTransition(activityInput, 'pre-recon', 'start'); ): Promise<void> {
state.agentMetrics['pre-recon'] = if (!shouldSkip(agentName)) {
await a.runPreReconAgent(activityInput); state.currentPhase = phaseName;
state.completedAgents.push('pre-recon'); state.currentAgent = agentName;
await a.logPhaseTransition(activityInput, 'pre-recon', 'complete'); await a.logPhaseTransition(activityInput, phaseName, 'start');
state.agentMetrics[agentName] = await runAgent(activityInput);
state.completedAgents.push(agentName);
await a.logPhaseTransition(activityInput, phaseName, 'complete');
} else { } else {
console.log('Skipping pre-recon (already complete)'); log.info(`Skipping ${agentName} (already complete)`);
state.completedAgents.push('pre-recon'); state.completedAgents.push(agentName);
} }
}
// === Phase 2: Reconnaissance === // Build pipeline configs for the 5 vuln→exploit pairs
if (!shouldSkip('recon')) { function buildPipelineConfigs(): Array<{
state.currentPhase = 'recon'; vulnType: VulnType;
state.currentAgent = 'recon'; vulnAgent: string;
await a.logPhaseTransition(activityInput, 'recon', 'start'); exploitAgent: string;
state.agentMetrics['recon'] = await a.runReconAgent(activityInput); runVuln: () => Promise<AgentMetrics>;
state.completedAgents.push('recon'); runExploit: () => Promise<AgentMetrics>;
await a.logPhaseTransition(activityInput, 'recon', 'complete'); }> {
} else { return [
console.log('Skipping recon (already complete)');
state.completedAgents.push('recon');
}
// === Phases 3-4: Vulnerability Analysis + Exploitation (Pipelined) ===
// Each vuln type runs as an independent pipeline:
// vuln agent → queue check → conditional exploit agent
// This eliminates the synchronization barrier between phases - each exploit
// starts immediately when its vuln agent finishes, not waiting for all.
state.currentPhase = 'vulnerability-exploitation';
state.currentAgent = 'pipelines';
await a.logPhaseTransition(activityInput, 'vulnerability-exploitation', 'start');
// Helper: Run a single vuln→exploit pipeline with skip logic
async function runVulnExploitPipeline(
vulnType: VulnType,
runVulnAgent: () => Promise<AgentMetrics>,
runExploitAgent: () => Promise<AgentMetrics>
): Promise<VulnExploitPipelineResult> {
const vulnAgentName = `${vulnType}-vuln`;
const exploitAgentName = `${vulnType}-exploit`;
// Step 1: Run vulnerability agent (or skip if completed)
let vulnMetrics: AgentMetrics | null = null;
if (!shouldSkip(vulnAgentName)) {
vulnMetrics = await runVulnAgent();
} else {
console.log(`Skipping ${vulnAgentName} (already complete)`);
}
// Step 2: Check exploitation queue (only if vuln agent ran or completed previously)
const decision = await a.checkExploitationQueue(activityInput, vulnType);
// Step 3: Conditionally run exploit agent (skip if already completed)
let exploitMetrics: AgentMetrics | null = null;
if (decision.shouldExploit) {
if (!shouldSkip(exploitAgentName)) {
exploitMetrics = await runExploitAgent();
} else {
console.log(`Skipping ${exploitAgentName} (already complete)`);
}
}
return {
vulnType,
vulnMetrics,
exploitMetrics,
exploitDecision: {
shouldExploit: decision.shouldExploit,
vulnerabilityCount: decision.vulnerabilityCount,
},
error: null,
};
}
// Determine which pipelines to run (skip if both vuln and exploit completed)
const pipelinesToRun: Array<Promise<VulnExploitPipelineResult>> = [];
// Only run pipeline if at least one agent (vuln or exploit) is incomplete
const pipelineConfigs: Array<{
vulnType: VulnType;
vulnAgent: string;
exploitAgent: string;
runVuln: () => Promise<AgentMetrics>;
runExploit: () => Promise<AgentMetrics>;
}> = [
{ {
vulnType: 'injection', vulnType: 'injection',
vulnAgent: 'injection-vuln', vulnAgent: 'injection-vuln',
@@ -317,56 +254,34 @@ export async function pentestPipelineWorkflow(
runExploit: () => a.runAuthzExploitAgent(activityInput), runExploit: () => a.runAuthzExploitAgent(activityInput),
}, },
]; ];
}
for (const config of pipelineConfigs) { // Aggregate results from settled pipeline promises into workflow state
const vulnComplete = shouldSkip(config.vulnAgent); function aggregatePipelineResults(
const exploitComplete = shouldSkip(config.exploitAgent); results: PromiseSettledResult<VulnExploitPipelineResult>[]
): void {
// Only run pipeline if at least one agent needs to run
if (!vulnComplete || !exploitComplete) {
pipelinesToRun.push(
runVulnExploitPipeline(config.vulnType, config.runVuln, config.runExploit)
);
} else {
console.log(
`Skipping entire ${config.vulnType} pipeline (both agents complete)`
);
// Still need to mark them as completed in state
state.completedAgents.push(config.vulnAgent, config.exploitAgent);
}
}
// Run pipelines in parallel with graceful failure handling
// Promise.allSettled ensures other pipelines continue if one fails
const pipelineResults = await Promise.allSettled(pipelinesToRun);
// Aggregate results from all pipelines
const failedPipelines: string[] = []; const failedPipelines: string[] = [];
for (const result of pipelineResults) {
for (const result of results) {
if (result.status === 'fulfilled') { if (result.status === 'fulfilled') {
const { vulnType, vulnMetrics, exploitMetrics } = result.value; const { vulnType, vulnMetrics, exploitMetrics } = result.value;
// Record vuln agent
const vulnAgentName = `${vulnType}-vuln`; const vulnAgentName = `${vulnType}-vuln`;
if (vulnMetrics) { if (vulnMetrics) {
state.agentMetrics[vulnAgentName] = vulnMetrics; state.agentMetrics[vulnAgentName] = vulnMetrics;
state.completedAgents.push(vulnAgentName); state.completedAgents.push(vulnAgentName);
} else if (shouldSkip(vulnAgentName)) { } else if (shouldSkip(vulnAgentName)) {
// Agent was skipped because already complete
state.completedAgents.push(vulnAgentName); state.completedAgents.push(vulnAgentName);
} }
// Record exploit agent (if it ran)
const exploitAgentName = `${vulnType}-exploit`; const exploitAgentName = `${vulnType}-exploit`;
if (exploitMetrics) { if (exploitMetrics) {
state.agentMetrics[exploitAgentName] = exploitMetrics; state.agentMetrics[exploitAgentName] = exploitMetrics;
state.completedAgents.push(exploitAgentName); state.completedAgents.push(exploitAgentName);
} else if (shouldSkip(exploitAgentName)) { } else if (shouldSkip(exploitAgentName)) {
// Agent was skipped because already complete
state.completedAgents.push(exploitAgentName); state.completedAgents.push(exploitAgentName);
} }
} else { } else {
// Pipeline failed - log error but continue with others
const errorMsg = const errorMsg =
result.reason instanceof Error result.reason instanceof Error
? result.reason.message ? result.reason.message
@@ -375,15 +290,87 @@ export async function pentestPipelineWorkflow(
} }
} }
// Log any pipeline failures (workflow continues despite failures)
if (failedPipelines.length > 0) { if (failedPipelines.length > 0) {
console.log( log.warn(`${failedPipelines.length} pipeline(s) failed`, {
`⚠️ ${failedPipelines.length} pipeline(s) failed:`, failures: failedPipelines,
failedPipelines });
); }
}
try {
// === Phase 1: Pre-Reconnaissance ===
await runSequentialPhase('pre-recon', 'pre-recon', a.runPreReconAgent);
// === Phase 2: Reconnaissance ===
await runSequentialPhase('recon', 'recon', a.runReconAgent);
// === Phases 3-4: Vulnerability Analysis + Exploitation (Pipelined) ===
// Each vuln type runs as an independent pipeline:
// vuln agent → queue check → conditional exploit agent
// Exploits start immediately when their vuln finishes, not waiting for all.
state.currentPhase = 'vulnerability-exploitation';
state.currentAgent = 'pipelines';
await a.logPhaseTransition(activityInput, 'vulnerability-exploitation', 'start');
// Closure over shouldSkip and activityInput by design (Temporal replay safety)
async function runVulnExploitPipeline(
vulnType: VulnType,
runVulnAgent: () => Promise<AgentMetrics>,
runExploitAgent: () => Promise<AgentMetrics>
): Promise<VulnExploitPipelineResult> {
const vulnAgentName = `${vulnType}-vuln`;
const exploitAgentName = `${vulnType}-exploit`;
// 1. Run vulnerability analysis (or skip if resumed)
let vulnMetrics: AgentMetrics | null = null;
if (!shouldSkip(vulnAgentName)) {
vulnMetrics = await runVulnAgent();
} else {
log.info(`Skipping ${vulnAgentName} (already complete)`);
}
// 2. Check exploitation queue for actionable findings
const decision = await a.checkExploitationQueue(activityInput, vulnType);
// 3. Conditionally run exploitation agent
let exploitMetrics: AgentMetrics | null = null;
if (decision.shouldExploit) {
if (!shouldSkip(exploitAgentName)) {
exploitMetrics = await runExploitAgent();
} else {
log.info(`Skipping ${exploitAgentName} (already complete)`);
}
}
return {
vulnType,
vulnMetrics,
exploitMetrics,
exploitDecision: {
shouldExploit: decision.shouldExploit,
vulnerabilityCount: decision.vulnerabilityCount,
},
error: null,
};
} }
// Update phase markers const pipelineConfigs = buildPipelineConfigs();
const pipelinesToRun: Array<Promise<VulnExploitPipelineResult>> = [];
for (const config of pipelineConfigs) {
if (!shouldSkip(config.vulnAgent) || !shouldSkip(config.exploitAgent)) {
pipelinesToRun.push(
runVulnExploitPipeline(config.vulnType, config.runVuln, config.runExploit)
);
} else {
log.info(`Skipping entire ${config.vulnType} pipeline (both agents complete)`);
state.completedAgents.push(config.vulnAgent, config.exploitAgent);
}
}
const pipelineResults = await Promise.allSettled(pipelinesToRun);
aggregatePipelineResults(pipelineResults);
state.currentPhase = 'exploitation'; state.currentPhase = 'exploitation';
state.currentAgent = null; state.currentAgent = null;
await a.logPhaseTransition(activityInput, 'vulnerability-exploitation', 'complete'); await a.logPhaseTransition(activityInput, 'vulnerability-exploitation', 'complete');
@@ -406,29 +393,17 @@ export async function pentestPipelineWorkflow(
await a.logPhaseTransition(activityInput, 'reporting', 'complete'); await a.logPhaseTransition(activityInput, 'reporting', 'complete');
} else { } else {
console.log('Skipping report (already complete)'); log.info('Skipping report (already complete)');
state.completedAgents.push('report'); state.completedAgents.push('report');
} }
// === Complete ===
state.status = 'completed'; state.status = 'completed';
state.currentPhase = null; state.currentPhase = null;
state.currentAgent = null; state.currentAgent = null;
state.summary = computeSummary(state); state.summary = computeSummary(state);
// Log workflow completion summary // Log workflow completion summary
await a.logWorkflowComplete(activityInput, { await a.logWorkflowComplete(activityInput, toWorkflowSummary(state, 'completed'));
status: 'completed',
totalDurationMs: state.summary.totalDurationMs,
totalCostUsd: state.summary.totalCostUsd,
completedAgents: state.completedAgents,
agentMetrics: Object.fromEntries(
Object.entries(state.agentMetrics).map(([name, m]) => [
name,
{ durationMs: m.durationMs, costUsd: m.costUsd },
])
),
});
return state; return state;
} catch (error) { } catch (error) {
@@ -438,19 +413,7 @@ export async function pentestPipelineWorkflow(
state.summary = computeSummary(state); state.summary = computeSummary(state);
// Log workflow failure summary // Log workflow failure summary
await a.logWorkflowComplete(activityInput, { await a.logWorkflowComplete(activityInput, toWorkflowSummary(state, 'failed'));
status: 'failed',
totalDurationMs: state.summary.totalDurationMs,
totalCostUsd: state.summary.totalCostUsd,
completedAgents: state.completedAgents,
agentMetrics: Object.fromEntries(
Object.entries(state.agentMetrics).map(([name, m]) => [
name,
{ durationMs: m.durationMs, costUsd: m.costUsd },
])
),
error: state.error ?? undefined,
});
throw error; throw error;
} }
+22 -34
View File
@@ -20,7 +20,6 @@
import fs from 'fs/promises'; import fs from 'fs/promises';
import path from 'path'; import path from 'path';
import chalk from 'chalk';
interface SessionJson { interface SessionJson {
session: { session: {
@@ -59,16 +58,7 @@ function formatDuration(ms: number): string {
} }
function getStatusDisplay(status: string): string { function getStatusDisplay(status: string): string {
switch (status) { return status;
case 'completed':
return chalk.green(status);
case 'in-progress':
return chalk.yellow(status);
case 'failed':
return chalk.red(status);
default:
return status;
}
} }
function truncate(str: string, maxLen: number): string { function truncate(str: string, maxLen: number): string {
@@ -83,8 +73,8 @@ async function listWorkspaces(): Promise<void> {
try { try {
entries = await fs.readdir(auditDir); entries = await fs.readdir(auditDir);
} catch { } catch {
console.log(chalk.yellow('No audit-logs directory found.')); console.log('No audit-logs directory found.');
console.log(chalk.gray(`Expected: ${auditDir}`)); console.log(`Expected: ${auditDir}`);
return; return;
} }
@@ -110,15 +100,15 @@ async function listWorkspaces(): Promise<void> {
} }
if (workspaces.length === 0) { if (workspaces.length === 0) {
console.log(chalk.yellow('\nNo workspaces found.')); console.log('\nNo workspaces found.');
console.log(chalk.gray('Run a pipeline first: ./shannon start URL=<url> REPO=<repo>')); console.log('Run a pipeline first: ./shannon start URL=<url> REPO=<repo>');
return; return;
} }
// Sort by creation date (most recent first) // Sort by creation date (most recent first)
workspaces.sort((a, b) => b.createdAt.getTime() - a.createdAt.getTime()); workspaces.sort((a, b) => b.createdAt.getTime() - a.createdAt.getTime());
console.log(chalk.cyan.bold('\n=== Shannon Workspaces ===\n')); console.log('\n=== Shannon Workspaces ===\n');
// Column widths // Column widths
const nameWidth = 30; const nameWidth = 30;
@@ -129,16 +119,14 @@ async function listWorkspaces(): Promise<void> {
// Header // Header
console.log( console.log(
chalk.gray( ' ' +
' ' + 'WORKSPACE'.padEnd(nameWidth) +
'WORKSPACE'.padEnd(nameWidth) + 'URL'.padEnd(urlWidth) +
'URL'.padEnd(urlWidth) + 'STATUS'.padEnd(statusWidth) +
'STATUS'.padEnd(statusWidth) + 'DURATION'.padEnd(durationWidth) +
'DURATION'.padEnd(durationWidth) + 'COST'.padEnd(costWidth)
'COST'.padEnd(costWidth)
)
); );
console.log(chalk.gray(' ' + '\u2500'.repeat(nameWidth + urlWidth + statusWidth + durationWidth + costWidth))); console.log(' ' + '\u2500'.repeat(nameWidth + urlWidth + statusWidth + durationWidth + costWidth));
let resumableCount = 0; let resumableCount = 0;
@@ -154,15 +142,15 @@ async function listWorkspaces(): Promise<void> {
resumableCount++; resumableCount++;
} }
const resumeTag = isResumable ? chalk.cyan(' (resumable)') : ''; const resumeTag = isResumable ? ' (resumable)' : '';
console.log( console.log(
' ' + ' ' +
chalk.white(truncate(ws.name, nameWidth - 2).padEnd(nameWidth)) + truncate(ws.name, nameWidth - 2).padEnd(nameWidth) +
chalk.gray(truncate(ws.url, urlWidth - 2).padEnd(urlWidth)) + truncate(ws.url, urlWidth - 2).padEnd(urlWidth) +
getStatusDisplay(ws.status).padEnd(statusWidth + 10) + // +10 for chalk escape codes getStatusDisplay(ws.status).padEnd(statusWidth) +
chalk.gray(duration.padEnd(durationWidth)) + duration.padEnd(durationWidth) +
chalk.gray(cost.padEnd(costWidth)) + cost.padEnd(costWidth) +
resumeTag resumeTag
); );
} }
@@ -170,16 +158,16 @@ async function listWorkspaces(): Promise<void> {
console.log(); console.log();
const summary = `${workspaces.length} workspace${workspaces.length === 1 ? '' : 's'} found`; const summary = `${workspaces.length} workspace${workspaces.length === 1 ? '' : 's'} found`;
const resumeSummary = resumableCount > 0 ? ` (${resumableCount} resumable)` : ''; const resumeSummary = resumableCount > 0 ? ` (${resumableCount} resumable)` : '';
console.log(chalk.gray(`${summary}${resumeSummary}`)); console.log(`${summary}${resumeSummary}`);
if (resumableCount > 0) { if (resumableCount > 0) {
console.log(chalk.gray('\nResume with: ./shannon start URL=<url> REPO=<repo> WORKSPACE=<name>')); console.log('\nResume with: ./shannon start URL=<url> REPO=<repo> WORKSPACE=<name>');
} }
console.log(); console.log();
} }
listWorkspaces().catch((err) => { listWorkspaces().catch((err) => {
console.error(chalk.red('Error listing workspaces:'), err); console.error('Error listing workspaces:', err);
process.exit(1); process.exit(1);
}); });
-66
View File
@@ -1,66 +0,0 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
import { $ } from 'zx';
import chalk from 'chalk';
type ToolName = 'nmap' | 'subfinder' | 'whatweb' | 'schemathesis';
export type ToolAvailability = Record<ToolName, boolean>;
// Check availability of required tools
export const checkToolAvailability = async (): Promise<ToolAvailability> => {
const tools: ToolName[] = ['nmap', 'subfinder', 'whatweb', 'schemathesis'];
const availability: ToolAvailability = {
nmap: false,
subfinder: false,
whatweb: false,
schemathesis: false
};
console.log(chalk.blue('🔧 Checking tool availability...'));
for (const tool of tools) {
try {
await $`command -v ${tool}`;
availability[tool] = true;
console.log(chalk.green(`${tool} - available`));
} catch {
availability[tool] = false;
console.log(chalk.yellow(` ⚠️ ${tool} - not found`));
}
}
return availability;
};
// Handle missing tools with user-friendly messages
export const handleMissingTools = (toolAvailability: ToolAvailability): ToolName[] => {
const missing = (Object.entries(toolAvailability) as Array<[ToolName, boolean]>)
.filter(([, available]) => !available)
.map(([tool]) => tool);
if (missing.length > 0) {
console.log(chalk.yellow(`\n⚠️ Missing tools: ${missing.join(', ')}`));
console.log(chalk.gray('Some functionality will be limited. Install missing tools for full capability.'));
// Provide installation hints
const installHints: Record<ToolName, string> = {
'nmap': 'brew install nmap (macOS) or apt install nmap (Ubuntu)',
'subfinder': 'go install -v github.com/projectdiscovery/subfinder/v2/cmd/subfinder@latest',
'whatweb': 'gem install whatweb',
'schemathesis': 'pip install schemathesis'
};
console.log(chalk.gray('\nInstallation hints:'));
missing.forEach(tool => {
console.log(chalk.gray(` ${tool}: ${installHints[tool]}`));
});
console.log('');
}
return missing;
};
+15
View File
@@ -0,0 +1,15 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
/**
* Logger interface for services called from Temporal activities.
* Keeps services Temporal-agnostic while providing structured logging.
*/
export interface ActivityLogger {
info(message: string, attrs?: Record<string, unknown>): void;
warn(message: string, attrs?: Record<string, unknown>): void;
error(message: string, attrs?: Record<string, unknown>): void;
}
+13 -57
View File
@@ -34,21 +34,6 @@ export const ALL_AGENTS = [
*/ */
export type AgentName = typeof ALL_AGENTS[number]; export type AgentName = typeof ALL_AGENTS[number];
export type PromptName =
| 'pre-recon-code'
| 'recon'
| 'vuln-injection'
| 'vuln-xss'
| 'vuln-auth'
| 'vuln-ssrf'
| 'vuln-authz'
| 'exploit-injection'
| 'exploit-xss'
| 'exploit-auth'
| 'exploit-ssrf'
| 'exploit-authz'
| 'report-executive';
export type PlaywrightAgent = export type PlaywrightAgent =
| 'playwright-agent1' | 'playwright-agent1'
| 'playwright-agent2' | 'playwright-agent2'
@@ -56,7 +41,9 @@ export type PlaywrightAgent =
| 'playwright-agent4' | 'playwright-agent4'
| 'playwright-agent5'; | 'playwright-agent5';
export type AgentValidator = (sourceDir: string) => Promise<boolean>; import type { ActivityLogger } from './activity-logger.js';
export type AgentValidator = (sourceDir: string, logger: ActivityLogger) => Promise<boolean>;
export type AgentStatus = export type AgentStatus =
| 'pending' | 'pending'
@@ -69,52 +56,21 @@ export interface AgentDefinition {
name: AgentName; name: AgentName;
displayName: string; displayName: string;
prerequisites: AgentName[]; prerequisites: AgentName[];
promptTemplate: string;
deliverableFilename: string;
} }
/** /**
* Maps an agent name to its corresponding prompt file name. * Vulnerability types supported by the pipeline.
*/ */
export function getPromptNameForAgent(agentName: AgentName): PromptName { export type VulnType = 'injection' | 'xss' | 'auth' | 'ssrf' | 'authz';
const mappings: Record<AgentName, PromptName> = {
'pre-recon': 'pre-recon-code',
'recon': 'recon',
'injection-vuln': 'vuln-injection',
'xss-vuln': 'vuln-xss',
'auth-vuln': 'vuln-auth',
'ssrf-vuln': 'vuln-ssrf',
'authz-vuln': 'vuln-authz',
'injection-exploit': 'exploit-injection',
'xss-exploit': 'exploit-xss',
'auth-exploit': 'exploit-auth',
'ssrf-exploit': 'exploit-ssrf',
'authz-exploit': 'exploit-authz',
'report': 'report-executive',
};
return mappings[agentName];
}
/** /**
* Maps an agent name to its deliverable file path. * Decision returned by queue validation for exploitation phase.
* Must match mcp-server/src/types/deliverables.ts:DELIVERABLE_FILENAMES
*/ */
export function getDeliverablePath(agentName: AgentName, repoPath: string): string { export interface ExploitationDecision {
const deliverableMap: Record<AgentName, string> = { shouldExploit: boolean;
'pre-recon': 'code_analysis_deliverable.md', shouldRetry: boolean;
'recon': 'recon_deliverable.md', vulnerabilityCount: number;
'injection-vuln': 'injection_analysis_deliverable.md', vulnType: VulnType;
'xss-vuln': 'xss_analysis_deliverable.md',
'auth-vuln': 'auth_analysis_deliverable.md',
'ssrf-vuln': 'ssrf_analysis_deliverable.md',
'authz-vuln': 'authz_analysis_deliverable.md',
'injection-exploit': 'injection_exploitation_evidence.md',
'xss-exploit': 'xss_exploitation_evidence.md',
'auth-exploit': 'auth_exploitation_evidence.md',
'ssrf-exploit': 'ssrf_exploitation_evidence.md',
'authz-exploit': 'authz_exploitation_evidence.md',
'report': 'comprehensive_security_assessment_report.md',
};
const filename = deliverableMap[agentName];
return `${repoPath}/deliverables/${filename}`;
} }
+35
View File
@@ -0,0 +1,35 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
/**
* Audit system type definitions
*/
/**
* Cross-cutting session metadata used by services, temporal, and audit.
*/
export interface SessionMetadata {
id: string;
webUrl: string;
repoPath?: string;
outputPath?: string;
[key: string]: unknown;
}
/**
* Result data passed to audit system when an agent execution ends.
* Used by both AuditSession and MetricsTracker.
*/
export interface AgentEndResult {
attemptNumber: number;
duration_ms: number;
cost_usd: number;
success: boolean;
model?: string | undefined;
error?: string | undefined;
checkpoint?: string | undefined;
isFinalAttempt?: boolean | undefined;
}
+1 -4
View File
@@ -29,10 +29,8 @@ export interface Rules {
export type LoginType = 'form' | 'sso' | 'api' | 'basic'; export type LoginType = 'form' | 'sso' | 'api' | 'basic';
export type SuccessConditionType = 'url' | 'cookie' | 'element' | 'redirect';
export interface SuccessCondition { export interface SuccessCondition {
type: SuccessConditionType; type: 'url' | 'cookie' | 'element' | 'redirect';
value: string; value: string;
} }
@@ -53,7 +51,6 @@ export interface Authentication {
export interface Config { export interface Config {
rules?: Rules; rules?: Rules;
authentication?: Authentication; authentication?: Authentication;
login?: unknown; // Deprecated
} }
export interface DistributedConfig { export interface DistributedConfig {
+33
View File
@@ -8,6 +8,39 @@
* Error type definitions * Error type definitions
*/ */
/**
* Specific error codes for reliable classification.
*
* ErrorCode provides precision within the coarse 8-category PentestErrorType.
* Used by classifyErrorForTemporal for code-based classification (preferred)
* with string matching as fallback for external errors.
*/
export enum ErrorCode {
// Config errors (PentestErrorType: 'config')
CONFIG_NOT_FOUND = 'CONFIG_NOT_FOUND',
CONFIG_VALIDATION_FAILED = 'CONFIG_VALIDATION_FAILED',
CONFIG_PARSE_ERROR = 'CONFIG_PARSE_ERROR',
// Agent execution errors (PentestErrorType: 'validation')
AGENT_EXECUTION_FAILED = 'AGENT_EXECUTION_FAILED',
OUTPUT_VALIDATION_FAILED = 'OUTPUT_VALIDATION_FAILED',
// Billing errors (PentestErrorType: 'billing')
API_RATE_LIMITED = 'API_RATE_LIMITED',
SPENDING_CAP_REACHED = 'SPENDING_CAP_REACHED',
INSUFFICIENT_CREDITS = 'INSUFFICIENT_CREDITS',
// Git errors (PentestErrorType: 'filesystem')
GIT_CHECKPOINT_FAILED = 'GIT_CHECKPOINT_FAILED',
GIT_ROLLBACK_FAILED = 'GIT_ROLLBACK_FAILED',
// Prompt errors (PentestErrorType: 'prompt')
PROMPT_LOAD_FAILED = 'PROMPT_LOAD_FAILED',
// Validation errors (PentestErrorType: 'validation')
DELIVERABLE_NOT_FOUND = 'DELIVERABLE_NOT_FOUND',
}
export type PentestErrorType = export type PentestErrorType =
| 'config' | 'config'
| 'network' | 'network'
+4
View File
@@ -8,6 +8,10 @@
* Type definitions barrel export * Type definitions barrel export
*/ */
export * from './activity-logger.js';
export * from './errors.js'; export * from './errors.js';
export * from './config.js'; export * from './config.js';
export * from './agents.js'; export * from './agents.js';
export * from './audit.js';
export * from './result.js';
export * from './metrics.js';
+19
View File
@@ -0,0 +1,19 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
/**
* Agent metrics types used across services and activities.
* Centralized here to avoid temporal/shared.ts import boundary violations.
*/
export interface AgentMetrics {
durationMs: number;
inputTokens: number | null;
outputTokens: number | null;
costUsd: number | null;
numTurns: number | null;
model?: string | undefined;
}
+62
View File
@@ -0,0 +1,62 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
/**
* Minimal Result type for explicit error handling.
*
* A discriminated union that makes error handling explicit without adding
* heavy machinery. Used in key modules (config loading, agent execution,
* queue validation) where callers need to make decisions based on error type.
*/
/**
* Success variant of Result
*/
export interface Ok<T> {
readonly ok: true;
readonly value: T;
}
/**
* Error variant of Result
*/
export interface Err<E> {
readonly ok: false;
readonly error: E;
}
/**
* Result type - either Ok with a value or Err with an error
*/
export type Result<T, E> = Ok<T> | Err<E>;
/**
* Create a success Result
*/
export function ok<T>(value: T): Ok<T> {
return { ok: true, value };
}
/**
* Create an error Result
*/
export function err<E>(error: E): Err<E> {
return { ok: false, error };
}
/**
* Type guard for Ok variant
*/
export function isOk<T, E>(result: Result<T, E>): result is Ok<T> {
return result.ok === true;
}
/**
* Type guard for Err variant
*/
export function isErr<T, E>(result: Result<T, E>): result is Err<E> {
return result.ok === false;
}
+95
View File
@@ -0,0 +1,95 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
/**
* Consolidated billing/spending cap detection utilities.
*
* Anthropic's spending cap behavior is inconsistent:
* - Sometimes a proper SDK error (billing_error)
* - Sometimes Claude responds with text about the cap
* - Sometimes partial billing before cutoff
*
* This module provides defense-in-depth detection with shared pattern lists
* to prevent drift between detection points.
*/
/**
* Text patterns for SDK output sniffing (what Claude says).
* Used by message-handlers.ts and the behavioral heuristic.
*/
export const BILLING_TEXT_PATTERNS = [
'spending cap',
'spending limit',
'cap reached',
'budget exceeded',
'usage limit',
'resets',
] as const;
/**
* API patterns for error message classification (what the API returns).
* Used by classifyErrorForTemporal in error-handling.ts.
*/
export const BILLING_API_PATTERNS = [
'billing_error',
'credit balance is too low',
'insufficient credits',
'usage is blocked due to insufficient credits',
'please visit plans & billing',
'please visit plans and billing',
'usage limit reached',
'quota exceeded',
'daily rate limit',
'limit will reset',
'billing limit reached',
] as const;
/**
* Checks if text matches any billing text pattern.
* Used for sniffing SDK output content for spending cap messages.
*/
export function matchesBillingTextPattern(text: string): boolean {
const lowerText = text.toLowerCase();
return BILLING_TEXT_PATTERNS.some((pattern) => lowerText.includes(pattern));
}
/**
* Checks if an error message matches any billing API pattern.
* Used for classifying API error messages.
*/
export function matchesBillingApiPattern(message: string): boolean {
const lowerMessage = message.toLowerCase();
return BILLING_API_PATTERNS.some((pattern) => lowerMessage.includes(pattern));
}
/**
* Behavioral heuristic for detecting spending cap.
*
* When Claude hits a spending cap, it often returns a short message
* with $0 cost. Legitimate agent work NEVER costs $0 with only 1-2 turns.
*
* This combines three signals:
* 1. Very low turn count (<=2)
* 2. Zero cost ($0)
* 3. Text matches billing patterns
*
* @param turns - Number of turns the agent took
* @param cost - Total cost in USD
* @param resultText - The result text from the agent
* @returns true if this looks like a spending cap hit
*/
export function isSpendingCapBehavior(
turns: number,
cost: number,
resultText: string
): boolean {
// Only check if turns <= 2 AND cost is exactly 0
if (turns > 2 || cost !== 0) {
return false;
}
return matchesBillingTextPattern(resultText);
}
-84
View File
@@ -4,11 +4,6 @@
// it under the terms of the GNU Affero General Public License version 3 // it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation. // as published by the Free Software Foundation.
import chalk from 'chalk';
import { formatDuration } from './formatting.js';
// Timing utilities
export class Timer { export class Timer {
name: string; name: string;
startTime: number; startTime: number;
@@ -29,82 +24,3 @@ export class Timer {
return end - this.startTime; return end - this.startTime;
} }
} }
interface TimingResultsAgents {
[key: string]: number;
}
interface TimingResults {
total: Timer | null;
agents: TimingResultsAgents;
}
interface CostResultsAgents {
[key: string]: number;
}
interface CostResults {
agents: CostResultsAgents;
total: number;
}
// Global timing and cost tracker
export const timingResults: TimingResults = {
total: null,
agents: {},
};
export const costResults: CostResults = {
agents: {},
total: 0,
};
// Function to display comprehensive timing summary
export const displayTimingSummary = (): void => {
if (!timingResults.total) {
console.log(chalk.yellow('No timing data available'));
return;
}
const totalDuration = timingResults.total.stop();
console.log(chalk.cyan.bold('\n⏱️ TIMING SUMMARY'));
console.log(chalk.gray('─'.repeat(60)));
// Total execution time
console.log(chalk.cyan(`📊 Total Execution Time: ${formatDuration(totalDuration)}`));
console.log();
// Agent breakdown
if (Object.keys(timingResults.agents).length > 0) {
console.log(chalk.magenta.bold('🤖 Agent Breakdown:'));
let agentTotal = 0;
for (const [agent, duration] of Object.entries(timingResults.agents)) {
const percentage = ((duration / totalDuration) * 100).toFixed(1);
const displayName = agent.replace(/-/g, ' ');
console.log(
chalk.magenta(
` ${displayName.padEnd(20)} ${formatDuration(duration).padStart(8)} (${percentage}%)`
)
);
agentTotal += duration;
}
console.log(
chalk.gray(
` ${'Agents Total'.padEnd(20)} ${formatDuration(agentTotal).padStart(8)} (${((agentTotal / totalDuration) * 100).toFixed(1)}%)`
)
);
}
// Cost breakdown
if (Object.keys(costResults.agents).length > 0) {
console.log(chalk.green.bold('\n💰 Cost Breakdown:'));
for (const [agent, cost] of Object.entries(costResults.agents)) {
const displayName = agent.replace(/-/g, ' ');
console.log(chalk.green(` ${displayName.padEnd(20)} $${cost.toFixed(4).padStart(8)}`));
}
console.log(chalk.gray(` ${'Total Cost'.padEnd(20)} $${costResults.total.toFixed(4).padStart(8)}`));
}
console.log(chalk.gray('─'.repeat(60)));
};
-264
View File
@@ -1,264 +0,0 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
import { AGENTS } from '../session-manager.js';
interface ToolCallInput {
url?: string;
element?: string;
key?: string;
fields?: unknown[];
text?: string;
action?: string;
description?: string;
todos?: Array<{
status: string;
content: string;
}>;
[key: string]: unknown;
}
interface ToolCall {
name: string;
input?: ToolCallInput;
}
/**
* Extract domain from URL for display
*/
function extractDomain(url: string): string {
try {
const urlObj = new URL(url);
return urlObj.hostname || url.slice(0, 30);
} catch {
return url.slice(0, 30);
}
}
/**
* Summarize TodoWrite updates into clean progress indicators
*/
function summarizeTodoUpdate(input: ToolCallInput | undefined): string | null {
if (!input?.todos || !Array.isArray(input.todos)) {
return null;
}
const todos = input.todos;
const completed = todos.filter((t) => t.status === 'completed');
const inProgress = todos.filter((t) => t.status === 'in_progress');
// Show recently completed tasks
if (completed.length > 0) {
const recent = completed[completed.length - 1]!;
return `${recent.content}`;
}
// Show current in-progress task
if (inProgress.length > 0) {
const current = inProgress[0]!;
return `🔄 ${current.content}`;
}
return null;
}
/**
* Get agent prefix for parallel execution
*/
export function getAgentPrefix(description: string): string {
// Map agent names to their prefixes
const agentPrefixes: Record<string, string> = {
'injection-vuln': '[Injection]',
'xss-vuln': '[XSS]',
'auth-vuln': '[Auth]',
'authz-vuln': '[Authz]',
'ssrf-vuln': '[SSRF]',
'injection-exploit': '[Injection]',
'xss-exploit': '[XSS]',
'auth-exploit': '[Auth]',
'authz-exploit': '[Authz]',
'ssrf-exploit': '[SSRF]',
};
// First try to match by agent name directly
for (const [agentName, prefix] of Object.entries(agentPrefixes)) {
const agent = AGENTS[agentName as keyof typeof AGENTS];
if (agent && description.includes(agent.displayName)) {
return prefix;
}
}
// Fallback to partial matches for backwards compatibility
if (description.includes('injection')) return '[Injection]';
if (description.includes('xss')) return '[XSS]';
if (description.includes('authz')) return '[Authz]'; // Check authz before auth
if (description.includes('auth')) return '[Auth]';
if (description.includes('ssrf')) return '[SSRF]';
return '[Agent]';
}
/**
* Format browser tool calls into clean progress indicators
*/
function formatBrowserAction(toolCall: ToolCall): string {
const toolName = toolCall.name;
const input = toolCall.input || {};
// Core Browser Operations
if (toolName === 'mcp__playwright__browser_navigate') {
const url = input.url || '';
const domain = extractDomain(url);
return `🌐 Navigating to ${domain}`;
}
if (toolName === 'mcp__playwright__browser_navigate_back') {
return `⬅️ Going back`;
}
// Page Interaction
if (toolName === 'mcp__playwright__browser_click') {
const element = input.element || 'element';
return `🖱️ Clicking ${element.slice(0, 25)}`;
}
if (toolName === 'mcp__playwright__browser_hover') {
const element = input.element || 'element';
return `👆 Hovering over ${element.slice(0, 20)}`;
}
if (toolName === 'mcp__playwright__browser_type') {
const element = input.element || 'field';
return `⌨️ Typing in ${element.slice(0, 20)}`;
}
if (toolName === 'mcp__playwright__browser_press_key') {
const key = input.key || 'key';
return `⌨️ Pressing ${key}`;
}
// Form Handling
if (toolName === 'mcp__playwright__browser_fill_form') {
const fieldCount = input.fields?.length || 0;
return `📝 Filling ${fieldCount} form fields`;
}
if (toolName === 'mcp__playwright__browser_select_option') {
return `📋 Selecting dropdown option`;
}
if (toolName === 'mcp__playwright__browser_file_upload') {
return `📁 Uploading file`;
}
// Page Analysis
if (toolName === 'mcp__playwright__browser_snapshot') {
return `📸 Taking page snapshot`;
}
if (toolName === 'mcp__playwright__browser_take_screenshot') {
return `📸 Taking screenshot`;
}
if (toolName === 'mcp__playwright__browser_evaluate') {
return `🔍 Running JavaScript analysis`;
}
// Waiting & Monitoring
if (toolName === 'mcp__playwright__browser_wait_for') {
if (input.text) {
return `⏳ Waiting for "${input.text.slice(0, 20)}"`;
}
return `⏳ Waiting for page response`;
}
if (toolName === 'mcp__playwright__browser_console_messages') {
return `📜 Checking console logs`;
}
if (toolName === 'mcp__playwright__browser_network_requests') {
return `🌐 Analyzing network traffic`;
}
// Tab Management
if (toolName === 'mcp__playwright__browser_tabs') {
const action = input.action || 'managing';
return `🗂️ ${action} browser tab`;
}
// Dialog Handling
if (toolName === 'mcp__playwright__browser_handle_dialog') {
return `💬 Handling browser dialog`;
}
// Fallback for any missed tools
const actionType = toolName.split('_').pop();
return `🌐 Browser: ${actionType}`;
}
/**
* Filter out JSON tool calls from content, with special handling for Task calls
*/
export function filterJsonToolCalls(content: string | null | undefined): string {
if (!content || typeof content !== 'string') {
return content || '';
}
const lines = content.split('\n');
const processedLines: string[] = [];
for (const line of lines) {
const trimmed = line.trim();
// Skip empty lines
if (trimmed === '') {
continue;
}
// Check if this is a JSON tool call
if (trimmed.startsWith('{"type":"tool_use"')) {
try {
const toolCall = JSON.parse(trimmed) as ToolCall;
// Special handling for Task tool calls
if (toolCall.name === 'Task') {
const description = toolCall.input?.description || 'analysis agent';
processedLines.push(`🚀 Launching ${description}`);
continue;
}
// Special handling for TodoWrite tool calls
if (toolCall.name === 'TodoWrite') {
const summary = summarizeTodoUpdate(toolCall.input);
if (summary) {
processedLines.push(summary);
}
continue;
}
// Special handling for browser tool calls
if (toolCall.name.startsWith('mcp__playwright__browser_')) {
const browserAction = formatBrowserAction(toolCall);
if (browserAction) {
processedLines.push(browserAction);
}
continue;
}
// Hide all other tool calls (Read, Write, Grep, etc.)
continue;
} catch {
// If JSON parsing fails, treat as regular text
processedLines.push(line);
}
} else {
// Keep non-JSON lines (assistant text)
processedLines.push(line);
}
}
return processedLines.join('\n');
}
+5 -5
View File
@@ -33,11 +33,11 @@
"exactOptionalPropertyTypes": true, "exactOptionalPropertyTypes": true,
// Style Options // Style Options
// "noImplicitReturns": true, "noImplicitReturns": true,
// "noImplicitOverride": true, "noImplicitOverride": true,
// "noUnusedLocals": true, "noUnusedLocals": true,
// "noUnusedParameters": true, "noUnusedParameters": true,
// "noFallthroughCasesInSwitch": true, "noFallthroughCasesInSwitch": true,
// "noPropertyAccessFromIndexSignature": true, // "noPropertyAccessFromIndexSignature": true,
// Recommended Options // Recommended Options