Initial commit

Co-Authored-By: Nellie Mullane <nellie@keygraph.io>
This commit is contained in:
ajmallesh
2025-10-03 19:35:08 -07:00
commit 8f52722d56
69 changed files with 16062 additions and 0 deletions
+309
View File
@@ -0,0 +1,309 @@
import chalk from 'chalk';
import { path } from 'zx';
export class AgentStatusManager {
constructor(options = {}) {
this.mode = options.mode || 'parallel'; // 'parallel' or 'single'
this.activeStatuses = new Map();
this.lastStatusLine = '';
this.hiddenOperationCount = 0;
this.lastSummaryCount = 0;
this.summaryInterval = options.summaryInterval || 10;
this.showTodos = options.showTodos !== false;
// Tools to completely hide in output
this.suppressedTools = new Set([
'Read', 'Write', 'Edit', 'MultiEdit',
'Grep', 'Glob', 'LS'
]);
// Tools that might be noisy bash commands to hide
this.hiddenBashCommands = new Set([
'pwd', 'echo', 'ls', 'cd'
]);
}
/**
* Update status for an agent based on its current turn data
*/
updateAgentStatus(agentName, turnData) {
if (this.mode === 'single') {
this.handleSingleAgentOutput(agentName, turnData);
} else {
const status = this.extractMeaningfulStatus(turnData);
if (status) {
this.activeStatuses.set(agentName, status);
this.redrawStatusLine();
}
}
}
/**
* Handle output for single agent mode with clean formatting
*/
handleSingleAgentOutput(agentName, turnData) {
const toolUse = turnData.tool_use;
const text = turnData.assistant_text;
const turnCount = turnData.turnCount;
// Check if this is a tool we should hide
if (toolUse && this.shouldHideTool(toolUse)) {
this.hiddenOperationCount++;
// Show summary every N hidden operations
if (this.hiddenOperationCount - this.lastSummaryCount >= this.summaryInterval) {
const operationCount = this.hiddenOperationCount - this.lastSummaryCount;
console.log(chalk.gray(` [${operationCount} file operations...]`));
this.lastSummaryCount = this.hiddenOperationCount;
}
return;
}
// Format and show meaningful tools
if (toolUse) {
const formatted = this.formatMeaningfulTool(toolUse);
if (formatted) {
console.log(`🤖 ${formatted}`);
return;
}
}
// For turns without tool use, just ignore them silently
// These are planning/thinking turns that don't need any output
}
/**
* Check if a tool should be hidden from output
*/
shouldHideTool(toolUse) {
const toolName = toolUse.name;
// Always hide these tools
if (this.suppressedTools.has(toolName)) {
return true;
}
// Hide TodoWrite unless we're configured to show todos
if (toolName === 'TodoWrite' && !this.showTodos) {
return true;
}
// Hide simple bash commands
if (toolName === 'Bash') {
const command = toolUse.input?.command || '';
const simpleCommand = command.split(' ')[0];
return this.hiddenBashCommands.has(simpleCommand);
}
return false;
}
/**
* Format meaningful tools for single agent display
*/
formatMeaningfulTool(toolUse) {
const toolName = toolUse.name;
const input = toolUse.input || {};
switch (toolName) {
case 'Task':
const description = input.description || 'analysis agent';
return `🚀 Launching ${description}`;
case 'TodoWrite':
if (this.showTodos) {
return this.formatTodoUpdate(input);
}
return null;
case 'WebFetch':
const domain = this.extractDomain(input.url || '');
return `🌐 Fetching ${domain}`;
case 'Bash':
// Only show meaningful bash commands
const command = input.command || '';
if (command.includes('nmap') || command.includes('subfinder') || command.includes('whatweb')) {
const tool = command.split(' ')[0];
return `🔍 Running ${tool}`;
}
return null;
// Browser tools (keep existing formatting)
default:
if (toolName.startsWith('mcp__playwright__browser_')) {
return this.extractBrowserAction(toolUse);
}
}
return null;
}
/**
* Format TodoWrite updates for display
*/
formatTodoUpdate(input) {
if (!input.todos || !Array.isArray(input.todos)) {
return null;
}
const todos = input.todos;
const inProgress = todos.filter(t => t.status === 'in_progress');
const completed = todos.filter(t => t.status === 'completed');
if (completed.length > 0) {
const recent = completed[completed.length - 1];
return `${recent.content.slice(0, 50)}${recent.content.length > 50 ? '...' : ''}`;
}
if (inProgress.length > 0) {
const current = inProgress[0];
return `🔄 ${current.content.slice(0, 50)}${current.content.length > 50 ? '...' : ''}`;
}
return null;
}
/**
* Extract meaningful status from turn data, suppressing internal operations
*/
extractMeaningfulStatus(turnData) {
// Check for tool use first
if (turnData.tool_use?.name) {
// Suppress internal operations completely
if (this.suppressedTools.has(turnData.tool_use.name)) {
return null;
}
// Show browser testing actions
if (turnData.tool_use.name.startsWith('mcp__playwright__browser_')) {
return this.extractBrowserAction(turnData.tool_use);
}
// Show Task agent launches
if (turnData.tool_use.name === 'Task') {
const description = turnData.tool_use.input?.description || 'analysis';
return `🚀 ${description.slice(0, 40)}`;
}
}
// Parse assistant text for progress milestones
if (turnData.assistant_text) {
return this.extractProgressFromText(turnData.assistant_text);
}
return null; // Suppress everything else
}
/**
* Extract browser action details
*/
extractBrowserAction(toolUse) {
const actionType = toolUse.name.split('_').pop();
switch (actionType) {
case 'navigate':
const url = toolUse.input?.url || '';
const domain = this.extractDomain(url);
return `🌐 Testing ${domain}`;
case 'click':
const element = toolUse.input?.element || 'element';
return `🖱️ Clicking ${element.slice(0, 20)}`;
case 'fill':
case 'form':
return `📝 Testing form inputs`;
case 'snapshot':
return `📸 Capturing page state`;
case 'type':
return `⌨️ Testing input fields`;
default:
return `🌐 Browser: ${actionType}`;
}
}
/**
* Extract meaningful progress from assistant text (single-agent mode only)
*/
extractProgressFromText(text) {
// Only extract progress for single agents, not parallel ones
if (this.mode !== 'single') {
return null;
}
// For single agents, be very conservative about what we show
// Most progress should come from tool formatting, not text parsing
return null;
}
/**
* Extract domain from URL for display
*/
extractDomain(url) {
try {
const urlObj = new URL(url);
return urlObj.hostname || url.slice(0, 30);
} catch {
return url.slice(0, 30);
}
}
/**
* Redraw the status line showing all active agents
*/
redrawStatusLine() {
// Clear previous line
if (this.lastStatusLine) {
process.stdout.write('\r' + ' '.repeat(this.lastStatusLine.length) + '\r');
}
// Build new status line
const statusEntries = Array.from(this.activeStatuses.entries())
.map(([agent, status]) => `[${chalk.cyan(agent)}] ${status}`)
.join(' | ');
if (statusEntries) {
process.stdout.write(statusEntries);
this.lastStatusLine = statusEntries.replace(/\u001b\[[0-9;]*m/g, ''); // Remove ANSI codes for length calc
}
}
/**
* Clear status for a specific agent
*/
clearAgentStatus(agentName) {
this.activeStatuses.delete(agentName);
this.redrawStatusLine();
}
/**
* Clear all statuses and finish the status line
*/
finishStatusLine() {
if (this.lastStatusLine) {
process.stdout.write('\n'); // Move to next line
this.lastStatusLine = '';
this.activeStatuses.clear();
}
}
/**
* Parse JSON tool use from message content
*/
parseToolUse(content) {
try {
// Look for JSON tool use patterns
const jsonMatch = content.match(/\{"type":"tool_use".*?\}/s);
if (jsonMatch) {
return JSON.parse(jsonMatch[0]);
}
} catch (error) {
// Ignore parsing errors
}
return null;
}
}
+536
View File
@@ -0,0 +1,536 @@
import { $, fs, path } from 'zx';
import chalk from 'chalk';
import { query } from '@anthropic-ai/claude-code';
import { isRetryableError, getRetryDelay, PentestError } from '../error-handling.js';
import { ProgressIndicator } from '../progress-indicator.js';
import { timingResults, costResults, Timer, formatDuration } from '../utils/metrics.js';
import { createGitCheckpoint, commitGitSuccess, rollbackGitWorkspace } from '../utils/git-manager.js';
import { savePromptSnapshot } from '../prompts/prompt-manager.js';
import { AGENT_VALIDATORS } from '../constants.js';
import { filterJsonToolCalls, getAgentPrefix } from '../utils/output-formatter.js';
import { generateSessionLogPath } from '../session-manager.js';
// Simplified validation using direct agent name mapping
async function validateAgentOutput(result, agentName, sourceDir) {
console.log(chalk.blue(` 🔍 Validating ${agentName} agent output`));
try {
// Check if agent completed successfully
if (!result.success || !result.result) {
console.log(chalk.red(` ❌ Validation failed: Agent execution was unsuccessful`));
return false;
}
// Get validator function for this agent
const validator = AGENT_VALIDATORS[agentName];
if (!validator) {
console.log(chalk.yellow(` ⚠️ No validator found for agent "${agentName}" - assuming success`));
console.log(chalk.green(` ✅ Validation passed: Unknown agent with successful result`));
return true;
}
console.log(chalk.blue(` 📋 Using validator for agent: ${agentName}`));
console.log(chalk.blue(` 📂 Source directory: ${sourceDir}`));
// Apply validation function
const validationResult = await validator(sourceDir);
if (validationResult) {
console.log(chalk.green(` ✅ Validation passed: Required files/structure present`));
} else {
console.log(chalk.red(` ❌ Validation failed: Missing required deliverable files`));
}
return validationResult;
} catch (error) {
console.log(chalk.red(` ❌ Validation failed with error: ${error.message}`));
return false; // Assume invalid on validation error
}
}
// Pure function: Run Claude Code with SDK - Maximum Autonomy
// WARNING: This is a low-level function. Use runClaudePromptWithRetry() for agent execution to ensure:
// - Retry logic and error handling
// - Output validation
// - Prompt snapshotting for debugging
// - Git checkpoint/rollback safety
async function runClaudePrompt(prompt, sourceDir, allowedTools = 'Read', context = '', description = 'Claude analysis', colorFn = chalk.cyan, sessionMetadata = null) {
const timer = new Timer(`agent-${description.toLowerCase().replace(/\s+/g, '-')}`);
const fullPrompt = context ? `${context}\n\n${prompt}` : prompt;
let totalCost = 0;
// Auto-detect execution mode to adjust logging behavior
const isParallelExecution = description.includes('vuln agent') || description.includes('exploit agent');
const useCleanOutput = description.includes('Pre-recon agent') ||
description.includes('Recon agent') ||
description.includes('Executive Summary and Report Cleanup') ||
description.includes('vuln agent') ||
description.includes('exploit agent');
// Disable status manager - using simple JSON filtering for all agents now
const statusManager = null;
// Setup progress indicator for clean output agents
let progressIndicator = null;
if (useCleanOutput) {
const agentType = description.includes('Pre-recon') ? 'pre-reconnaissance' :
description.includes('Recon') ? 'reconnaissance' :
description.includes('Report') ? 'report generation' : 'analysis';
progressIndicator = new ProgressIndicator(`Running ${agentType}...`);
}
// Setup detailed logging for all agents (if session metadata is available)
let logFilePath = null;
let logBuffer = [];
if (sessionMetadata && sessionMetadata.webUrl && sessionMetadata.sessionId) {
const timestamp = new Date().toISOString().replace(/T/, '_').replace(/[:.]/g, '-').slice(0, 19);
const agentName = description.toLowerCase().replace(/\s+/g, '-');
// Use session-based folder structure
const logDir = generateSessionLogPath(sessionMetadata.webUrl, sessionMetadata.sessionId);
await fs.ensureDir(logDir);
logFilePath = path.join(logDir, `${timestamp}_${agentName}_attempt-1.log`);
// Initialize log with agent startup info
const sessionId = sessionMetadata?.sessionId || path.basename(sourceDir).split('-').pop().substring(0, 8);
logBuffer.push(`=== ${description} - Detailed Execution Log ===`);
logBuffer.push(`Timestamp: ${new Date().toISOString()}`);
logBuffer.push(`Working Directory: ${sourceDir}`);
logBuffer.push(`Session ID: ${sessionId}`);
logBuffer.push(`Log File: ${logFilePath}`);
logBuffer.push(`\n=== Agent Execution Start ===\n`);
} else {
console.log(chalk.blue(` 🤖 Running Claude Code: ${description}...`));
}
try {
const options = {
model: 'claude-sonnet-4-20250514', // Use latest Claude 4 Sonnet
maxTurns: 10_000, // Maximum turns for autonomous work
cwd: sourceDir, // Set working directory using SDK option
permissionMode: 'bypassPermissions', // Bypass all permission checks for pentesting
customSystemPrompt: fullPrompt, // Use system prompt for better security and consistency
};
// SDK Options only shown for verbose agents (not clean output)
if (!useCleanOutput) {
console.log(chalk.gray(` SDK Options: maxTurns=${options.maxTurns}, cwd=${sourceDir}, permissions=BYPASS`));
}
let result = null;
let messages = [];
let turnCount = 0;
let apiErrorDetected = false;
// Start progress indicator for clean output agents
if (progressIndicator) {
progressIndicator.start();
}
for await (const message of query({ prompt: 'Begin.', options })) {
if (message.type === "assistant") {
turnCount++;
const content = Array.isArray(message.message.content)
? message.message.content.map(c => c.text || JSON.stringify(c)).join('\n')
: message.message.content;
if (statusManager) {
// Smart status updates for parallel execution
const toolUse = statusManager.parseToolUse(content);
statusManager.updateAgentStatus(description, {
tool_use: toolUse,
assistant_text: content,
turnCount
});
} else if (useCleanOutput) {
// Clean output for all agents: filter JSON tool calls but show meaningful text
const cleanedContent = filterJsonToolCalls(content);
if (cleanedContent.trim()) {
// Temporarily stop progress indicator to show output
if (progressIndicator) {
progressIndicator.stop();
}
if (isParallelExecution) {
// Compact output for parallel agents with prefixes
const prefix = getAgentPrefix(description);
console.log(colorFn(`${prefix} ${cleanedContent}`));
} else {
// Full turn output for single agents
console.log(colorFn(`\n 🤖 Turn ${turnCount} (${description}):`))
console.log(colorFn(` ${cleanedContent}`));
}
// Restart progress indicator after output
if (progressIndicator) {
progressIndicator.start();
}
}
} else {
// Full streaming output - show complete messages with specialist color
console.log(colorFn(`\n 🤖 Turn ${turnCount} (${description}):`))
console.log(colorFn(` ${content}`));
}
// Log full details to file for later review
logBuffer.push(`\n🤖 Turn ${turnCount} (${description}):`);
logBuffer.push(content);
messages.push(content);
// Check for API error patterns in assistant message content
if (content && typeof content === 'string') {
const lowerContent = content.toLowerCase();
if (lowerContent.includes('api error') || lowerContent.includes('terminated')) {
apiErrorDetected = true;
console.log(chalk.red(` ⚠️ API Error detected in assistant response: ${content.trim()}`));
}
}
} else if (message.type === "system" && message.subtype === "init") {
// Show useful system info only for verbose agents
if (!useCleanOutput) {
console.log(chalk.blue(` ️ Model: ${message.model}, Permission: ${message.permissionMode}`));
if (message.mcp_servers && message.mcp_servers.length > 0) {
const mcpStatus = message.mcp_servers.map(s => `${s.name}(${s.status})`).join(', ');
console.log(chalk.blue(` 📦 MCP: ${mcpStatus}`));
}
}
} else if (message.type === "user") {
// Skip user messages (these are our own inputs echoed back)
continue;
} else if (message.type === "tool_use") {
console.log(chalk.yellow(`\n 🔧 Using Tool: ${message.name}`));
if (message.input && Object.keys(message.input).length > 0) {
console.log(chalk.gray(` Input: ${JSON.stringify(message.input, null, 2)}`));
}
} else if (message.type === "tool_result") {
console.log(chalk.green(` ✅ Tool Result:`));
if (message.content) {
// Show tool results but truncate if too long
const resultStr = typeof message.content === 'string' ? message.content : JSON.stringify(message.content, null, 2);
if (resultStr.length > 500) {
console.log(chalk.gray(` ${resultStr.slice(0, 500)}...\n [Result truncated - ${resultStr.length} total chars]`));
} else {
console.log(chalk.gray(` ${resultStr}`));
}
}
} else if (message.type === "result") {
result = message.result;
if (!statusManager) {
if (useCleanOutput) {
// Clean completion output - just duration and cost
console.log(chalk.magenta(`\n 🏁 COMPLETED:`));
const cost = message.total_cost_usd || 0;
console.log(chalk.gray(` ⏱️ Duration: ${(message.duration_ms/1000).toFixed(1)}s, Cost: $${cost.toFixed(4)}`));
if (message.subtype === "error_max_turns") {
console.log(chalk.red(` ⚠️ Stopped: Hit maximum turns limit`));
} else if (message.subtype === "error_during_execution") {
console.log(chalk.red(` ❌ Stopped: Execution error`));
}
if (message.permission_denials && message.permission_denials.length > 0) {
console.log(chalk.yellow(` 🚫 ${message.permission_denials.length} permission denials`));
}
} else {
// Full completion output for agents without clean output
console.log(chalk.magenta(`\n 🏁 COMPLETED:`));
const cost = message.total_cost_usd || 0;
console.log(chalk.gray(` ⏱️ Duration: ${(message.duration_ms/1000).toFixed(1)}s, Cost: $${cost.toFixed(4)}`));
if (message.subtype === "error_max_turns") {
console.log(chalk.red(` ⚠️ Stopped: Hit maximum turns limit`));
} else if (message.subtype === "error_during_execution") {
console.log(chalk.red(` ❌ Stopped: Execution error`));
}
if (message.permission_denials && message.permission_denials.length > 0) {
console.log(chalk.yellow(` 🚫 ${message.permission_denials.length} permission denials`));
}
// Show result content (if it's reasonable length)
if (result && typeof result === 'string') {
if (result.length > 1000) {
console.log(chalk.magenta(` 📄 ${result.slice(0, 1000)}... [${result.length} total chars]`));
} else {
console.log(chalk.magenta(` 📄 ${result}`));
}
}
}
}
// Track cost for all agents
const cost = message.total_cost_usd || 0;
const agentKey = description.toLowerCase().replace(/\s+/g, '-');
costResults.agents[agentKey] = cost;
costResults.total += cost;
// Store cost for return value
totalCost = cost;
break;
} else {
// Log any other message types we might not be handling
console.log(chalk.gray(` 💬 ${message.type}: ${JSON.stringify(message, null, 2)}`));
}
}
const duration = timer.stop();
const agentKey = description.toLowerCase().replace(/\s+/g, '-');
timingResults.agents[agentKey] = duration;
// API error detection is logged but not immediately failed
// Let the retry logic handle validation first
if (apiErrorDetected) {
console.log(chalk.yellow(` ⚠️ API Error detected in ${description} - will validate deliverables before failing`));
}
// Finish status line for parallel execution and save detailed log
if (statusManager) {
statusManager.clearAgentStatus(description);
statusManager.finishStatusLine();
}
// Write detailed log to file
if (logFilePath && logBuffer.length > 0) {
logBuffer.push(`\n=== Agent Execution Complete ===`);
logBuffer.push(`Duration: ${formatDuration(duration)}`);
logBuffer.push(`Turns: ${turnCount}`);
logBuffer.push(`Cost: $${totalCost.toFixed(4)}`);
logBuffer.push(`Status: Success`);
logBuffer.push(`Completed: ${new Date().toISOString()}`);
await fs.writeFile(logFilePath, logBuffer.join('\n'));
}
// Show completion messages based on agent type
if (progressIndicator) {
// Single agents with progress indicator
const agentType = description.includes('Pre-recon') ? 'Pre-recon analysis' :
description.includes('Recon') ? 'Reconnaissance' :
description.includes('Report') ? 'Report generation' : 'Analysis';
progressIndicator.finish(`${agentType} complete! (${turnCount} turns, ${formatDuration(duration)})`);
} else if (isParallelExecution) {
// Compact completion for parallel agents
const prefix = getAgentPrefix(description);
console.log(chalk.green(`${prefix} ✅ Complete (${turnCount} turns, ${formatDuration(duration)})`));
} else if (!useCleanOutput) {
// Verbose completion for remaining agents
console.log(chalk.green(` ✅ Claude Code completed: ${description} (${turnCount} turns) in ${formatDuration(duration)}`));
}
// Return result with log file path for all agents
const returnData = { result, success: true, duration, turns: turnCount, cost: totalCost, apiErrorDetected };
if (logFilePath) {
returnData.logFile = logFilePath;
}
return returnData;
} catch (error) {
const duration = timer.stop();
const agentKey = description.toLowerCase().replace(/\s+/g, '-');
timingResults.agents[agentKey] = duration;
// Clear status for parallel execution before showing error
if (statusManager) {
statusManager.clearAgentStatus(description);
statusManager.finishStatusLine();
}
// Write error log to file
if (logFilePath && logBuffer.length > 0) {
logBuffer.push(`\n=== Agent Execution Failed ===`);
logBuffer.push(`Duration: ${formatDuration(duration)}`);
logBuffer.push(`Turns: ${turnCount}`);
logBuffer.push(`Error: ${error.message}`);
logBuffer.push(`Error Type: ${error.constructor.name}`);
logBuffer.push(`Status: Failed`);
logBuffer.push(`Failed: ${new Date().toISOString()}`);
await fs.writeFile(logFilePath, logBuffer.join('\n'));
}
// Show error messages based on agent type
if (progressIndicator) {
// Single agents with progress indicator
progressIndicator.stop();
const agentType = description.includes('Pre-recon') ? 'Pre-recon analysis' :
description.includes('Recon') ? 'Reconnaissance' :
description.includes('Report') ? 'Report generation' : 'Analysis';
console.log(chalk.red(`${agentType} failed (${formatDuration(duration)})`));
} else if (isParallelExecution) {
// Compact error for parallel agents
const prefix = getAgentPrefix(description);
console.log(chalk.red(`${prefix} ❌ Failed (${formatDuration(duration)})`));
} else if (!useCleanOutput) {
// Verbose error for remaining agents
console.log(chalk.red(` ❌ Claude Code failed: ${description} (${formatDuration(duration)})`));
}
console.log(chalk.red(` Error Type: ${error.constructor.name}`));
console.log(chalk.red(` Message: ${error.message}`));
console.log(chalk.gray(` Agent: ${description}`));
console.log(chalk.gray(` Working Directory: ${sourceDir}`));
console.log(chalk.gray(` Retryable: ${isRetryableError(error) ? 'Yes' : 'No'}`));
// Log additional context if available
if (error.code) {
console.log(chalk.gray(` Error Code: ${error.code}`));
}
if (error.status) {
console.log(chalk.gray(` HTTP Status: ${error.status}`));
}
// Save detailed error to log file for debugging
try {
const errorLog = {
timestamp: new Date().toISOString(),
agent: description,
error: {
name: error.constructor.name,
message: error.message,
code: error.code,
status: error.status,
stack: error.stack
},
context: {
sourceDir,
prompt: fullPrompt.slice(0, 200) + '...',
retryable: isRetryableError(error)
},
duration
};
const logPath = path.join(sourceDir, 'error.log');
await fs.appendFile(logPath, JSON.stringify(errorLog) + '\n');
} catch (logError) {
// Ignore logging errors to avoid cascading failures
console.log(chalk.gray(` (Failed to write error log: ${logError.message})`));
}
return {
error: error.message,
errorType: error.constructor.name,
prompt: fullPrompt.slice(0, 100) + '...',
success: false,
duration,
retryable: isRetryableError(error)
};
}
}
// PREFERRED: Production-ready Claude agent execution with full orchestration
// This is the standard function for all agent execution. Provides:
// - Intelligent retry logic with exponential backoff
// - Output validation to ensure deliverables are created
// - Prompt snapshotting for debugging and reproducibility
// - Git checkpoint/rollback safety for workspace protection
// - Comprehensive error handling and logging
export async function runClaudePromptWithRetry(prompt, sourceDir, allowedTools = 'Read', context = '', description = 'Claude analysis', agentName = null, colorFn = chalk.cyan, sessionMetadata = null) {
const maxRetries = 3;
let lastError;
let retryContext = context; // Preserve context between retries
console.log(chalk.cyan(`🚀 Starting ${description} with ${maxRetries} max attempts`));
// Save prompt snapshot before execution starts (for debugging failed runs)
let snapshotSaved = false;
for (let attempt = 1; attempt <= maxRetries; attempt++) {
// Create checkpoint before each attempt
await createGitCheckpoint(sourceDir, description, attempt);
// Save snapshot on first attempt only (before any execution)
if (!snapshotSaved && agentName) {
const fullPrompt = retryContext ? `${retryContext}\n\n${prompt}` : prompt;
await savePromptSnapshot(sourceDir, agentName, fullPrompt);
snapshotSaved = true;
}
try {
const result = await runClaudePrompt(prompt, sourceDir, allowedTools, retryContext, description, colorFn, sessionMetadata);
// Validate output after successful run
if (result.success) {
const validationPassed = await validateAgentOutput(result, agentName, sourceDir);
if (validationPassed) {
// Check if API error was detected but validation passed
if (result.apiErrorDetected) {
console.log(chalk.yellow(`📋 Validation: Ready for exploitation despite API error warnings`));
}
// Commit successful changes (will include the snapshot)
await commitGitSuccess(sourceDir, description);
console.log(chalk.green.bold(`🎉 ${description} completed successfully on attempt ${attempt}/${maxRetries}`));
return result;
} else {
// Agent completed but output validation failed
console.log(chalk.yellow(`⚠️ ${description} completed but output validation failed`));
// If API error detected AND validation failed, this is a retryable error
if (result.apiErrorDetected) {
console.log(chalk.yellow(`⚠️ API Error detected with validation failure - treating as retryable`));
lastError = new Error('API Error: terminated with validation failure');
} else {
lastError = new Error('Output validation failed');
}
if (attempt < maxRetries) {
// Rollback contaminated workspace
await rollbackGitWorkspace(sourceDir, 'validation failure');
continue;
} else {
// FAIL FAST - Don't continue with broken pipeline
throw new PentestError(
`Agent ${description} failed output validation after ${maxRetries} attempts. Required deliverable files were not created.`,
'validation',
false,
{ description, sourceDir, attemptsExhausted: maxRetries }
);
}
}
}
} catch (error) {
lastError = error;
// Check if error is retryable
if (!isRetryableError(error)) {
console.log(chalk.red(`${description} failed with non-retryable error: ${error.message}`));
await rollbackGitWorkspace(sourceDir, 'non-retryable error cleanup');
throw error;
}
if (attempt < maxRetries) {
// Rollback for clean retry
await rollbackGitWorkspace(sourceDir, 'retryable error cleanup');
const delay = getRetryDelay(error, attempt);
const delaySeconds = (delay / 1000).toFixed(1);
console.log(chalk.yellow(`⚠️ ${description} failed (attempt ${attempt}/${maxRetries})`));
console.log(chalk.gray(` Error: ${error.message}`));
console.log(chalk.gray(` Workspace rolled back, retrying in ${delaySeconds}s...`));
// Preserve any partial results for next retry
if (error.partialResults) {
retryContext = `${context}\n\nPrevious partial results: ${JSON.stringify(error.partialResults)}`;
}
await new Promise(resolve => setTimeout(resolve, delay));
} else {
await rollbackGitWorkspace(sourceDir, 'final failure cleanup');
console.log(chalk.red(`${description} failed after ${maxRetries} attempts`));
console.log(chalk.red(` Final error: ${error.message}`));
}
}
}
throw lastError;
}
+889
View File
@@ -0,0 +1,889 @@
import { fs, path, $ } from 'zx';
import chalk from 'chalk';
import { PentestError } from './error-handling.js';
import { parseConfig, distributeConfig } from './config-parser.js';
import { executeGitCommandWithRetry } from './utils/git-manager.js';
import {
AGENTS,
PHASES,
selectSession,
validateAgent,
validateAgentRange,
validatePhase,
checkPrerequisites,
getNextAgent,
markAgentCompleted,
markAgentFailed,
getSessionStatus,
rollbackToAgent,
updateSession
} from './session-manager.js';
// Check if target repository exists and is accessible
const validateTargetRepo = async (targetRepo) => {
if (!targetRepo || !await fs.pathExists(targetRepo)) {
throw new PentestError(
`Target repository '${targetRepo}' not found or not accessible`,
'filesystem',
false,
{ targetRepo }
);
}
// Check if it's a git repository
const gitDir = path.join(targetRepo, '.git');
if (!await fs.pathExists(gitDir)) {
throw new PentestError(
`Target repository '${targetRepo}' is not a git repository`,
'validation',
false,
{ targetRepo }
);
}
return true;
};
// Get git commit hash for checkpoint
export const getGitCommitHash = async (targetRepo) => {
try {
const result = await executeGitCommandWithRetry(['git', 'rev-parse', 'HEAD'], targetRepo, 'getting commit hash');
return result.stdout.trim();
} catch (error) {
throw new PentestError(
`Failed to get git commit hash: ${error.message}`,
'git',
false,
{ targetRepo, originalError: error.message }
);
}
};
// Rollback git workspace to specific commit
const rollbackGitToCommit = async (targetRepo, commitHash) => {
try {
await executeGitCommandWithRetry(['git', 'reset', '--hard', commitHash], targetRepo, 'rollback to commit');
await executeGitCommandWithRetry(['git', 'clean', '-fd'], targetRepo, 'cleaning after rollback');
console.log(chalk.green(`✅ Git workspace rolled back to commit ${commitHash.substring(0, 8)}`));
} catch (error) {
throw new PentestError(
`Failed to rollback git workspace: ${error.message}`,
'git',
false,
{ targetRepo, commitHash, originalError: error.message }
);
}
};
// Run a single agent with retry logic and checkpointing
export const runSingleAgent = async (agentName, session, pipelineTestingMode, runClaudePromptWithRetry, loadPrompt, allowRerun = false, skipWorkspaceClean = false) => {
// Validate agent first
const agent = validateAgent(agentName);
console.log(chalk.cyan(`\n🤖 Running agent: ${agent.displayName}`));
// Reload session to get latest state (important for agent ranges)
const { getSession } = await import('./session-manager.js');
const freshSession = await getSession(session.id);
if (!freshSession) {
throw new PentestError(`Session ${session.id} not found`, 'validation', false);
}
// Use fresh session for all subsequent checks
session = freshSession;
// Warn if session is completed
if (session.status === 'completed') {
console.log(chalk.yellow('⚠️ This session is already completed. Re-running will modify completed results.'));
}
// Block re-running completed agents unless explicitly allowed - use --rerun for explicit rollback and re-run
if (!allowRerun && session.completedAgents.includes(agentName)) {
throw new PentestError(
`Agent '${agentName}' has already been completed. Use --rerun ${agentName} for explicit rollback and re-execution.`,
'validation',
false,
{
agentName,
suggestion: `--rerun ${agentName}`,
completedAgents: session.completedAgents
}
);
}
const targetRepo = session.targetRepo;
await validateTargetRepo(targetRepo);
// Check prerequisites
checkPrerequisites(session, agentName);
// Additional safety check: if this agent is not completed but we have uncommitted changes,
// it might be from a previous interrupted run. Clean the workspace to be safe.
// Skip workspace cleaning during parallel execution to avoid agents interfering with each other
if (!session.completedAgents.includes(agentName) && !allowRerun && !skipWorkspaceClean) {
try {
const status = await executeGitCommandWithRetry(['git', 'status', '--porcelain'], targetRepo, 'checking workspace status');
const hasUncommittedChanges = status.stdout.trim().length > 0;
if (hasUncommittedChanges) {
console.log(chalk.yellow(` ⚠️ Detected uncommitted changes before running ${agentName}`));
console.log(chalk.yellow(` 🧹 Cleaning workspace to ensure clean agent execution`));
await executeGitCommandWithRetry(['git', 'reset', '--hard', 'HEAD'], targetRepo, 'cleaning workspace');
await executeGitCommandWithRetry(['git', 'clean', '-fd'], targetRepo, 'removing untracked files');
console.log(chalk.green(` ✅ Workspace cleaned successfully`));
}
} catch (error) {
console.log(chalk.yellow(` ⚠️ Could not check/clean workspace: ${error.message}`));
}
}
// Create checkpoint before execution
const variables = {
webUrl: session.webUrl,
repoPath: session.repoPath,
sourceDir: targetRepo
};
// Handle relative config paths - prepend configs/ if needed
let configPath = null;
if (session.configFile) {
configPath = session.configFile.startsWith('configs/')
? session.configFile
: path.join('configs', session.configFile);
}
const config = configPath ? await parseConfig(configPath) : null;
const distributedConfig = config ? distributeConfig(config) : null;
// Removed prompt snapshotting - using live prompts from repo
// Initialize variables that will be used in both try and catch blocks
let validationData = null;
let timingData = null;
let costData = null;
try {
// Load and run the appropriate prompt
let promptName = getPromptName(agentName);
const prompt = await loadPrompt(promptName, variables, distributedConfig, pipelineTestingMode);
// Get color function for this agent
const getAgentColor = (agentName) => {
const colorMap = {
'injection-vuln': chalk.red,
'injection-exploit': chalk.red,
'xss-vuln': chalk.yellow,
'xss-exploit': chalk.yellow,
'auth-vuln': chalk.blue,
'auth-exploit': chalk.blue,
'ssrf-vuln': chalk.magenta,
'ssrf-exploit': chalk.magenta,
'authz-vuln': chalk.green,
'authz-exploit': chalk.green
};
return colorMap[agentName] || chalk.cyan;
};
const result = await runClaudePromptWithRetry(
prompt,
targetRepo,
'*',
'',
AGENTS[agentName].displayName,
agentName, // Pass agent name for snapshot creation
getAgentColor(agentName), // Pass color function for this agent
{ webUrl: session.webUrl, sessionId: session.id } // Session metadata for logging
);
if (!result.success) {
throw new PentestError(
`Agent execution failed: ${result.error}`,
'agent',
result.retryable || false,
{ agentName, result }
);
}
// Get commit hash for checkpoint
const commitHash = await getGitCommitHash(targetRepo);
// Extract timing and cost data from result if available
timingData = result.duration;
costData = result.cost || 0;
if (agentName.includes('-vuln')) {
// Extract vulnerability type from agent name (e.g., 'injection-vuln' -> 'injection')
const vulnType = agentName.replace('-vuln', '');
try {
const { safeValidateQueueAndDeliverable } = await import('./queue-validation.js');
const validation = await safeValidateQueueAndDeliverable(vulnType, targetRepo);
if (validation.success) {
validationData = {
shouldExploit: validation.data.shouldExploit,
vulnerabilityCount: validation.data.vulnerabilityCount,
validatedAt: new Date().toISOString()
};
console.log(chalk.blue(`📋 Validation: ${validationData.shouldExploit ? `Ready for exploitation (${validationData.vulnerabilityCount} vulnerabilities)` : 'No vulnerabilities found'}`));
} else {
console.log(chalk.yellow(`⚠️ Validation failed: ${validation.error.message}`));
}
} catch (validationError) {
console.log(chalk.yellow(`⚠️ Could not validate ${vulnType}: ${validationError.message}`));
}
}
// Mark agent as completed
await markAgentCompleted(session.id, agentName, commitHash, timingData, costData, validationData);
// Only show completion message for sequential execution
if (!skipWorkspaceClean) {
console.log(chalk.green(`✅ Agent '${agentName}' completed successfully`));
}
// Return immutable result object with enhanced metadata
return Object.freeze({
success: true,
agentName,
result,
validation: validationData,
timing: timingData,
cost: costData,
checkpoint: commitHash,
completedAt: new Date().toISOString()
});
} catch (error) {
// Mark agent as failed
await markAgentFailed(session.id, agentName);
// Only show failure message for sequential execution
if (!skipWorkspaceClean) {
console.log(chalk.red(`❌ Agent '${agentName}' failed: ${error.message}`));
}
// Return immutable error object with enhanced context
const errorResult = Object.freeze({
success: false,
agentName,
error: {
message: error.message,
type: error.constructor.name,
retryable: error.retryable || false,
originalError: error
},
validation: validationData,
timing: timingData,
failedAt: new Date().toISOString(),
context: {
targetRepo,
promptName: getPromptName(agentName),
sessionId: session.id
}
});
// Throw enhanced error with preserved context
const enhancedError = new PentestError(
`Agent '${agentName}' execution failed: ${error.message}`,
'agent',
error.retryable || false,
{
agentName,
sessionId: session.id,
originalError: error.message,
errorResult
}
);
throw enhancedError;
}
};
// Run multiple agents in sequence
export const runAgentRange = async (startAgent, endAgent, session, pipelineTestingMode, runClaudePromptWithRetry, loadPrompt) => {
const agents = validateAgentRange(startAgent, endAgent);
console.log(chalk.cyan(`\n🔄 Running agent range: ${startAgent} to ${endAgent} (${agents.length} agents)`));
for (const agent of agents) {
// Skip if already completed
if (session.completedAgents.includes(agent.name)) {
console.log(chalk.gray(`⏭️ Agent '${agent.name}' already completed, skipping`));
continue;
}
try {
await runSingleAgent(agent.name, session, pipelineTestingMode, runClaudePromptWithRetry, loadPrompt);
} catch (error) {
console.log(chalk.red(`❌ Agent range execution stopped at '${agent.name}' due to failure`));
throw error;
}
}
console.log(chalk.green(`✅ Agent range ${startAgent} to ${endAgent} completed successfully`));
};
// Run vulnerability agents in parallel
export const runParallelVuln = async (session, pipelineTestingMode, runClaudePromptWithRetry, loadPrompt) => {
const vulnAgents = ['injection-vuln', 'xss-vuln', 'auth-vuln', 'ssrf-vuln', 'authz-vuln'];
const activeAgents = vulnAgents.filter(agent => !session.completedAgents.includes(agent));
if (activeAgents.length === 0) {
console.log(chalk.gray('⏭️ All vulnerability agents already completed'));
return { completed: vulnAgents, failed: [] };
}
console.log(chalk.cyan(`\n🚀 Starting ${activeAgents.length} vulnerability analysis specialists in parallel...`));
console.log(chalk.gray(' Specialists: ' + activeAgents.join(', ')));
console.log();
const startTime = Date.now();
// Collect all results without logging individual completions
const results = await Promise.allSettled(
activeAgents.map(async (agentName, index) => {
// Add 2-second stagger to prevent API overwhelm
await new Promise(resolve => setTimeout(resolve, index * 2000));
let lastError;
let attempts = 0;
const maxAttempts = 3;
while (attempts < maxAttempts) {
attempts++;
try {
const result = await runSingleAgent(agentName, session, pipelineTestingMode, runClaudePromptWithRetry, loadPrompt, false, true);
return { agentName, ...result, attempts };
} catch (error) {
lastError = error;
if (attempts < maxAttempts) {
console.log(chalk.yellow(`⚠️ ${agentName} failed attempt ${attempts}/${maxAttempts}, retrying...`));
await new Promise(resolve => setTimeout(resolve, 5000));
}
}
}
throw { agentName, error: lastError, attempts };
})
);
const totalDuration = Date.now() - startTime;
// Process and display results in a nice table
console.log(chalk.cyan('\n📊 Vulnerability Analysis Results'));
console.log(chalk.gray('─'.repeat(80)));
// Table header
console.log(chalk.bold('Agent Status Vulns Attempt Duration Cost'));
console.log(chalk.gray('─'.repeat(80)));
const completed = [];
const failed = [];
results.forEach((result, index) => {
const agentName = activeAgents[index];
const agentDisplay = agentName.padEnd(22);
if (result.status === 'fulfilled') {
const data = result.value;
completed.push(agentName);
const vulnCount = data.validation?.vulnerabilityCount || 0;
const duration = formatDuration(data.timing || 0);
const cost = `$${(data.cost || 0).toFixed(4)}`;
console.log(
`${chalk.green(agentDisplay)} ${chalk.green('✓ Success')} ${vulnCount.toString().padStart(5)} ` +
`${data.attempts}/3 ${duration.padEnd(11)} ${cost}`
);
// Show log file path for detailed review
if (data.logFile) {
const relativePath = path.relative(process.cwd(), data.logFile);
console.log(chalk.gray(` └─ Detailed log: ${relativePath}`));
}
} else {
const error = result.reason.error || result.reason;
failed.push({ agent: agentName, error: error.message });
const attempts = result.reason.attempts || 3; // Default to 3 if not available
console.log(
`${chalk.red(agentDisplay)} ${chalk.red('✗ Failed ')} - ` +
`${attempts}/3 - -`
);
console.log(chalk.gray(` └─ ${error.message.substring(0, 60)}...`));
}
});
console.log(chalk.gray('─'.repeat(80)));
console.log(chalk.cyan(`Summary: ${completed.length}/${activeAgents.length} succeeded in ${formatDuration(totalDuration)}`));
return { completed, failed };
};
// Run exploitation agents in parallel
export const runParallelExploit = async (session, pipelineTestingMode, runClaudePromptWithRetry, loadPrompt) => {
const exploitAgents = ['injection-exploit', 'xss-exploit', 'auth-exploit', 'ssrf-exploit', 'authz-exploit'];
// Get fresh session data to ensure we have the latest vulnerability analysis results
// This prevents race conditions where parallel vuln agents haven't updated session state yet
const { getSession } = await import('./session-manager.js');
const freshSession = await getSession(session.id);
// Only run exploit agents whose vuln counterparts completed successfully AND found vulnerabilities
const eligibleAgents = exploitAgents.filter(agentName => {
const vulnAgentName = agentName.replace('-exploit', '-vuln');
// Must have completed the vulnerability analysis
if (!freshSession.completedAgents.includes(vulnAgentName)) {
return false;
}
// Must have found vulnerabilities to exploit
const validationResult = freshSession.validationResults?.[vulnAgentName];
if (!validationResult || !validationResult.shouldExploit) {
console.log(chalk.gray(`⏭️ Skipping ${agentName} (no vulnerabilities found in ${vulnAgentName})`));
return false;
}
console.log(chalk.blue(`${agentName} eligible (${validationResult.vulnerabilityCount} vulnerabilities from ${vulnAgentName})`));
return true;
});
const activeAgents = eligibleAgents.filter(agent => !freshSession.completedAgents.includes(agent));
if (activeAgents.length === 0) {
if (eligibleAgents.length === 0) {
console.log(chalk.gray('⏭️ No exploitation agents eligible (no vulnerabilities found)'));
} else {
console.log(chalk.gray('⏭️ All eligible exploitation agents already completed'));
}
return { completed: eligibleAgents, failed: [] };
}
console.log(chalk.cyan(`\n🎯 Starting ${activeAgents.length} exploitation specialists in parallel...`));
console.log(chalk.gray(' Specialists: ' + activeAgents.join(', ')));
console.log();
const startTime = Date.now();
// Collect all results without logging individual completions
const results = await Promise.allSettled(
activeAgents.map(async (agentName, index) => {
// Add 2-second stagger to prevent API overwhelm
await new Promise(resolve => setTimeout(resolve, index * 2000));
let lastError;
let attempts = 0;
const maxAttempts = 3;
while (attempts < maxAttempts) {
attempts++;
try {
const result = await runSingleAgent(agentName, freshSession, pipelineTestingMode, runClaudePromptWithRetry, loadPrompt, false, true);
return { agentName, ...result, attempts };
} catch (error) {
lastError = error;
if (attempts < maxAttempts) {
console.log(chalk.yellow(`⚠️ ${agentName} failed attempt ${attempts}/${maxAttempts}, retrying...`));
await new Promise(resolve => setTimeout(resolve, 5000));
}
}
}
throw { agentName, error: lastError, attempts };
})
);
const totalDuration = Date.now() - startTime;
// Process and display results in a nice table
console.log(chalk.cyan('\n🎯 Exploitation Results'));
console.log(chalk.gray('─'.repeat(80)));
// Table header
console.log(chalk.bold('Agent Status Result Attempt Duration Cost'));
console.log(chalk.gray('─'.repeat(80)));
const completed = [];
const failed = [];
results.forEach((result, index) => {
const agentName = activeAgents[index];
const agentDisplay = agentName.padEnd(22);
if (result.status === 'fulfilled') {
const data = result.value;
completed.push(agentName);
const exploitResult = 'Success'; // Could be enhanced to show actual exploitation result
const duration = formatDuration(data.timing || 0);
const cost = `$${(data.cost || 0).toFixed(4)}`;
console.log(
`${chalk.green(agentDisplay)} ${chalk.green('✓ Success')} ${exploitResult.padEnd(6)} ` +
`${data.attempts}/3 ${duration.padEnd(11)} ${cost}`
);
// Show log file path for detailed review
if (data.logFile) {
const relativePath = path.relative(process.cwd(), data.logFile);
console.log(chalk.gray(` └─ Detailed log: ${relativePath}`));
}
} else {
const error = result.reason.error || result.reason;
failed.push({ agent: agentName, error: error.message });
const attempts = result.reason.attempts || 3; // Default to 3 if not available
console.log(
`${chalk.red(agentDisplay)} ${chalk.red('✗ Failed ')} - ` +
`${attempts}/3 - -`
);
console.log(chalk.gray(` └─ ${error.message.substring(0, 60)}...`));
}
});
console.log(chalk.gray('─'.repeat(80)));
console.log(chalk.cyan(`Summary: ${completed.length}/${activeAgents.length} succeeded in ${formatDuration(totalDuration)}`));
return { completed, failed };
};
// Run all agents in a phase
export const runPhase = async (phaseName, session, pipelineTestingMode, runClaudePromptWithRetry, loadPrompt) => {
console.log(chalk.cyan(`\n📋 Running phase: ${phaseName} (parallel execution)`));
// Use parallel execution for both vulnerability-analysis and exploitation phases
if (phaseName === 'vulnerability-analysis') {
console.log(chalk.cyan('🚀 Using parallel execution for 5x faster vulnerability analysis'));
const results = await runParallelVuln(session, pipelineTestingMode, runClaudePromptWithRetry, loadPrompt);
if (results.failed.length > 0) {
console.log(chalk.yellow(`⚠️ ${results.failed.length} agents failed, but phase continues`));
results.failed.forEach(failure => {
console.log(chalk.red(` - ${failure.agent}: ${failure.error}`));
});
}
console.log(chalk.green(`✅ Phase '${phaseName}' completed: ${results.completed.length} succeeded, ${results.failed.length} failed`));
return;
}
if (phaseName === 'exploitation') {
console.log(chalk.cyan('🎯 Using parallel execution for 5x faster exploitation'));
const results = await runParallelExploit(session, pipelineTestingMode, runClaudePromptWithRetry, loadPrompt);
if (results.failed.length > 0) {
console.log(chalk.yellow(`⚠️ ${results.failed.length} agents failed, but phase continues`));
results.failed.forEach(failure => {
console.log(chalk.red(` - ${failure.agent}: ${failure.error}`));
});
}
console.log(chalk.green(`✅ Phase '${phaseName}' completed: ${results.completed.length} succeeded, ${results.failed.length} failed`));
return;
}
// For other phases (pre-reconnaissance, reconnaissance, reporting), run the single agent
const agents = validatePhase(phaseName);
if (agents.length === 1) {
const agent = agents[0];
if (session.completedAgents.includes(agent.name)) {
console.log(chalk.gray(`⏭️ Agent '${agent.name}' already completed, skipping`));
return;
}
await runSingleAgent(agent.name, session, pipelineTestingMode, runClaudePromptWithRetry, loadPrompt);
console.log(chalk.green(`✅ Phase '${phaseName}' completed successfully`));
} else {
throw new PentestError(`Phase '${phaseName}' has multiple agents but no parallel execution defined`, 'validation', false);
}
};
// Rollback to specific agent checkpoint
export const rollbackTo = async (targetAgent, session) => {
console.log(chalk.yellow(`🔄 Rolling back to agent: ${targetAgent}`));
await validateTargetRepo(session.targetRepo);
validateAgent(targetAgent);
if (!session.checkpoints[targetAgent]) {
throw new PentestError(
`No checkpoint found for agent '${targetAgent}' in session history`,
'validation',
false,
{ targetAgent, availableCheckpoints: Object.keys(session.checkpoints) }
);
}
const commitHash = session.checkpoints[targetAgent];
// Rollback git workspace
await rollbackGitToCommit(session.targetRepo, commitHash);
// Update session state
await rollbackToAgent(session.id, targetAgent);
console.log(chalk.green(`✅ Successfully rolled back to agent '${targetAgent}'`));
};
// Rerun specific agent (rollback to previous + run current)
export const rerunAgent = async (agentName, session, pipelineTestingMode, runClaudePromptWithRetry, loadPrompt) => {
console.log(chalk.cyan(`🔁 Rerunning agent: ${agentName}`));
const agent = validateAgent(agentName);
// Find previous agent checkpoint or initial state
let rollbackTarget = null;
if (agent.prerequisites.length > 0) {
// Find the last completed prerequisite
const completedPrereqs = agent.prerequisites.filter(prereq =>
session.completedAgents.includes(prereq)
);
if (completedPrereqs.length > 0) {
// Get the prerequisite with highest order
rollbackTarget = completedPrereqs.reduce((latest, current) =>
AGENTS[current].order > AGENTS[latest].order ? current : latest
);
}
}
if (rollbackTarget) {
console.log(chalk.blue(`📍 Rolling back to prerequisite: ${rollbackTarget}`));
await rollbackTo(rollbackTarget, session);
} else if (agent.name === 'pre-recon') {
// Special case: rollback to initial clone
console.log(chalk.blue(`📍 Rolling back to initial repository state`));
try {
const initialCommit = await executeGitCommandWithRetry(['git', 'log', '--reverse', '--format=%H'], session.targetRepo, 'finding initial commit');
const firstCommit = initialCommit.stdout.trim().split('\n')[0];
await rollbackGitToCommit(session.targetRepo, firstCommit);
} catch (error) {
console.log(chalk.yellow(`⚠️ Could not find initial commit, using HEAD: ${error.message}`));
}
}
// Run the target agent (allow rerun since we've explicitly rolled back)
await runSingleAgent(agentName, session, pipelineTestingMode, runClaudePromptWithRetry, loadPrompt, true);
console.log(chalk.green(`✅ Agent '${agentName}' rerun completed successfully`));
};
// Run all remaining agents to completion
export const runAll = async (session, pipelineTestingMode, runClaudePromptWithRetry, loadPrompt) => {
// Get all agents in order
const allAgentNames = Object.keys(AGENTS);
console.log(chalk.cyan(`\n🚀 Running all remaining agents to completion`));
console.log(chalk.gray(`Current progress: ${session.completedAgents.length}/${allAgentNames.length} agents completed`));
// Find remaining agents (not yet completed)
const remainingAgents = allAgentNames.filter(agentName =>
!session.completedAgents.includes(agentName)
);
if (remainingAgents.length === 0) {
console.log(chalk.green('✅ All agents already completed!'));
return;
}
console.log(chalk.blue(`📋 Remaining agents: ${remainingAgents.join(', ')}`));
console.log();
// Run each remaining agent in sequence
for (const agentName of remainingAgents) {
await runSingleAgent(agentName, session, pipelineTestingMode, runClaudePromptWithRetry, loadPrompt);
}
console.log(chalk.green(`\n🎉 All agents completed successfully! Session marked as completed.`));
};
// Display session status
export const displayStatus = async (session) => {
const status = getSessionStatus(session);
const timeAgo = getTimeAgo(session.lastActivity);
console.log(chalk.cyan(`Session: ${new URL(session.webUrl).hostname} + ${path.basename(session.repoPath)}`));
console.log(chalk.gray(`Session ID: ${session.id}`));
console.log(chalk.gray(`Source Directory: ${session.targetRepo}`));
// Check if final deliverable exists and show its path
if (session.targetRepo) {
const finalReportPath = path.join(session.targetRepo, 'deliverables', 'comprehensive_security_assessment_report.md');
try {
if (await fs.pathExists(finalReportPath)) {
console.log(chalk.gray(`Final Deliverable Available: ${finalReportPath}`));
}
} catch (error) {
// Silently ignore if we can't check the file
}
}
const statusColor = status.status === 'completed' ? chalk.green : status.status === 'failed' ? chalk.red : chalk.blue;
console.log(statusColor(`Status: ${status.status} (${status.completedCount}/${status.totalAgents} agents completed)`));
console.log(chalk.gray(`Last Activity: ${timeAgo}`));
if (session.configFile) {
console.log(chalk.gray(`Config: ${session.configFile}`));
}
// Display cost and timing breakdown if available
if (session.costBreakdown || session.timingBreakdown) {
console.log(); // Empty line before metrics
if (session.timingBreakdown) {
console.log(chalk.blue('⏱️ Timing Breakdown:'));
console.log(chalk.gray(` Total Execution: ${formatDuration(session.timingBreakdown.total || 0)}`));
if (session.timingBreakdown.phases) {
Object.entries(session.timingBreakdown.phases).forEach(([phase, duration]) => {
console.log(chalk.gray(` ${phase}: ${formatDuration(duration)}`));
});
}
if (session.timingBreakdown.agents) {
console.log(chalk.gray(' Per Agent:'));
Object.entries(session.timingBreakdown.agents).forEach(([agent, duration]) => {
console.log(chalk.gray(` ${agent}: ${formatDuration(duration)}`));
});
}
}
if (session.costBreakdown) {
console.log(chalk.blue('💰 Cost Breakdown:'));
console.log(chalk.gray(` Total Cost: $${(session.costBreakdown.total || 0).toFixed(4)}`));
if (session.costBreakdown.agents) {
console.log(chalk.gray(' Per Agent:'));
Object.entries(session.costBreakdown.agents).forEach(([agent, cost]) => {
console.log(chalk.gray(` ${agent}: $${cost.toFixed(4)}`));
});
}
}
}
console.log(); // Empty line
// Display agent status
const agentList = Object.values(AGENTS).sort((a, b) => a.order - b.order);
for (const agent of agentList) {
let statusIcon, statusText, statusColor;
if (session.completedAgents.includes(agent.name)) {
statusIcon = '✅';
statusText = `completed ${getTimeAgoForAgent(session, agent.name)}`;
statusColor = chalk.green;
} else if (session.failedAgents.includes(agent.name)) {
statusIcon = '❌';
statusText = `failed ${getTimeAgoForAgent(session, agent.name)}`;
statusColor = chalk.red;
} else {
statusIcon = '⏸️';
statusText = 'pending';
statusColor = chalk.gray;
}
const displayName = agent.name.replace(/-/g, ' ');
console.log(`${statusIcon} ${statusColor(displayName.padEnd(20))} (${statusText})`);
}
// Show next action
const nextAgent = getNextAgent(session);
if (nextAgent) {
console.log(chalk.cyan(`\nNext: Run --run-agent ${nextAgent.name}`));
} else if (status.failedCount > 0) {
const failedAgent = session.failedAgents[0];
console.log(chalk.yellow(`\nNext: Fix ${failedAgent} failure or run --rerun ${failedAgent}`));
} else if (status.status === 'completed') {
console.log(chalk.green('\nAll agents completed successfully! 🎉'));
}
};
// List all available agents
export const listAgents = () => {
console.log(chalk.cyan('Available Agents:'));
const phaseNames = Object.keys(PHASES);
phaseNames.forEach((phaseName, phaseIndex) => {
const phaseAgents = PHASES[phaseName];
const phaseDisplayName = phaseName.split('-').map(word =>
word.charAt(0).toUpperCase() + word.slice(1)
).join(' ');
console.log(chalk.yellow(`\nPhase ${phaseIndex + 1} - ${phaseDisplayName}:`));
phaseAgents.forEach(agentName => {
const agent = AGENTS[agentName];
console.log(chalk.white(` ${agent.name.padEnd(18)} ${agent.displayName}`));
});
});
};
// Helper function to get prompt name from agent name
const getPromptName = (agentName) => {
const mappings = {
'pre-recon': 'pre-recon-code',
'recon': 'recon',
'injection-vuln': 'vuln-injection',
'xss-vuln': 'vuln-xss',
'auth-vuln': 'vuln-auth',
'ssrf-vuln': 'vuln-ssrf',
'authz-vuln': 'vuln-authz',
'injection-exploit': 'exploit-injection',
'xss-exploit': 'exploit-xss',
'auth-exploit': 'exploit-auth',
'ssrf-exploit': 'exploit-ssrf',
'authz-exploit': 'exploit-authz',
'report': 'report-executive'
};
return mappings[agentName] || agentName;
};
// Helper function to get time ago for specific agent
const getTimeAgoForAgent = (session, agentName) => {
// This would need to be implemented based on session checkpoint timestamps
// For now, just return relative to last activity
return getTimeAgo(session.lastActivity);
};
// Helper function for time ago calculation
const getTimeAgo = (timestamp) => {
const now = new Date();
const past = new Date(timestamp);
const diffMs = now - past;
const diffMins = Math.floor(diffMs / (1000 * 60));
const diffHours = Math.floor(diffMs / (1000 * 60 * 60));
const diffDays = Math.floor(diffMs / (1000 * 60 * 60 * 24));
if (diffMins < 60) {
return `${diffMins}m ago`;
} else if (diffHours < 24) {
return `${diffHours}h ago`;
} else {
return `${diffDays}d ago`;
}
};
// Helper function to format duration in milliseconds to human readable format
const formatDuration = (durationMs) => {
if (durationMs < 1000) {
return `${durationMs}ms`;
}
const seconds = Math.floor(durationMs / 1000);
const minutes = Math.floor(seconds / 60);
const hours = Math.floor(minutes / 60);
if (hours > 0) {
return `${hours}h ${minutes % 60}m ${seconds % 60}s`;
} else if (minutes > 0) {
return `${minutes}m ${seconds % 60}s`;
} else {
return `${seconds}s`;
}
};
+136
View File
@@ -0,0 +1,136 @@
import chalk from 'chalk';
import {
selectSession, deleteSession, deleteAllSessions,
validateAgent, validatePhase
} from '../session-manager.js';
import {
runPhase, runAll, rollbackTo, rerunAgent, displayStatus, listAgents
} from '../checkpoint-manager.js';
import { logError, PentestError } from '../error-handling.js';
import { cleanupMCP } from '../setup/environment.js';
// Developer command handlers
export async function handleDeveloperCommand(command, args, pipelineTestingMode, runClaudePromptWithRetry, loadPrompt) {
try {
let session;
// Commands that don't require session selection
if (command === '--list-agents') {
listAgents();
return;
}
if (command === '--cleanup') {
// Handle cleanup without needing session selection first
if (args[0]) {
// Cleanup specific session by ID
const sessionId = args[0];
const deletedSession = await deleteSession(sessionId);
console.log(chalk.green(`✅ Deleted session ${sessionId} (${new URL(deletedSession.webUrl).hostname})`));
// Clean up MCP agents when deleting specific session
await cleanupMCP();
} else {
// Cleanup all sessions - require confirmation
console.log(chalk.yellow('⚠️ This will delete all pentest sessions. Are you sure? (y/N):'));
const { createInterface } = await import('readline');
const readline = createInterface({
input: process.stdin,
output: process.stdout
});
await new Promise((resolve) => {
readline.question('', (answer) => {
readline.close();
if (answer.toLowerCase() === 'y' || answer.toLowerCase() === 'yes') {
deleteAllSessions().then(deleted => {
if (deleted) {
console.log(chalk.green('✅ All sessions deleted'));
} else {
console.log(chalk.yellow('⚠️ No sessions found to delete'));
}
// Clean up MCP agents after deleting sessions
return cleanupMCP();
}).then(() => {
resolve();
}).catch(error => {
console.log(chalk.red(`❌ Failed to delete sessions: ${error.message}`));
resolve();
});
} else {
console.log(chalk.gray('Cleanup cancelled'));
resolve();
}
});
});
}
return;
}
// Early validation for commands with agent names (before session selection)
if (command === '--run-phase') {
if (!args[0]) {
console.log(chalk.red('❌ --run-phase requires a phase name'));
console.log(chalk.gray('Usage: ./shannon.mjs --run-phase <phase-name>'));
process.exit(1);
}
validatePhase(args[0]); // This will throw PentestError if invalid
}
if (command === '--rollback-to' || command === '--rerun') {
if (!args[0]) {
console.log(chalk.red(`${command} requires an agent name`));
console.log(chalk.gray(`Usage: ./shannon.mjs ${command} <agent-name>`));
process.exit(1);
}
validateAgent(args[0]); // This will throw PentestError if invalid
}
// Get session for other commands
try {
session = await selectSession();
} catch (error) {
console.log(chalk.red(`${error.message}`));
process.exit(1);
}
switch (command) {
case '--run-phase':
await runPhase(args[0], session, pipelineTestingMode, runClaudePromptWithRetry, loadPrompt);
break;
case '--run-all':
await runAll(session, pipelineTestingMode, runClaudePromptWithRetry, loadPrompt);
break;
case '--rollback-to':
await rollbackTo(args[0], session);
break;
case '--rerun':
await rerunAgent(args[0], session, pipelineTestingMode, runClaudePromptWithRetry, loadPrompt);
break;
case '--status':
await displayStatus(session);
break;
default:
console.log(chalk.red(`❌ Unknown developer command: ${command}`));
console.log(chalk.gray('Use --help to see available commands'));
process.exit(1);
}
} catch (error) {
if (error instanceof PentestError) {
await logError(error, `Developer command ${command}`);
console.log(chalk.red.bold(`\n🚨 Command failed: ${error.message}`));
} else {
console.log(chalk.red.bold(`\n🚨 Unexpected error: ${error.message}`));
if (process.env.DEBUG) {
console.log(chalk.gray(error.stack));
}
}
process.exit(1);
}
}
+46
View File
@@ -0,0 +1,46 @@
import { fs, path } from 'zx';
// Helper function: Validate web URL
export function validateWebUrl(url) {
try {
const parsed = new URL(url);
if (!['http:', 'https:'].includes(parsed.protocol)) {
return { valid: false, error: 'Web URL must use HTTP or HTTPS protocol' };
}
if (!parsed.hostname) {
return { valid: false, error: 'Web URL must have a valid hostname' };
}
return { valid: true };
} catch (error) {
return { valid: false, error: 'Invalid web URL format' };
}
}
// Helper function: Validate local repository path
export async function validateRepoPath(repoPath) {
try {
// Check if path exists
if (!await fs.pathExists(repoPath)) {
return { valid: false, error: 'Repository path does not exist' };
}
// Check if it's a directory
const stats = await fs.stat(repoPath);
if (!stats.isDirectory()) {
return { valid: false, error: 'Repository path must be a directory' };
}
// Check if it's readable
try {
await fs.access(repoPath, fs.constants.R_OK);
} catch (error) {
return { valid: false, error: 'Repository path is not readable' };
}
// Convert to absolute path
const absolutePath = path.resolve(repoPath);
return { valid: true, path: absolutePath };
} catch (error) {
return { valid: false, error: `Invalid repository path: ${error.message}` };
}
}
+60
View File
@@ -0,0 +1,60 @@
import chalk from 'chalk';
import { displaySplashScreen } from '../splash-screen.js';
// Helper function: Display help information
export function showHelp() {
console.log(chalk.cyan.bold('AI Penetration Testing Agent'));
console.log(chalk.gray('Automated security assessment tool\n'));
console.log(chalk.yellow.bold('NORMAL MODE (Creates Sessions):'));
console.log(' ./shannon.mjs <WEB_URL> <REPO_PATH> [--config config.yaml] [--pipeline-testing]');
console.log(' ./shannon.mjs <WEB_URL> <REPO_PATH> --setup-only # Setup local repo and create session only\n');
console.log(chalk.yellow.bold('DEVELOPER MODE (Operates on Existing Sessions):'));
console.log(' ./shannon.mjs --run-phase <phase-name> [--pipeline-testing]');
console.log(' ./shannon.mjs --run-all [--pipeline-testing]');
console.log(' ./shannon.mjs --rollback-to <agent-name>');
console.log(' ./shannon.mjs --rerun <agent-name> [--pipeline-testing]');
console.log(' ./shannon.mjs --status');
console.log(' ./shannon.mjs --list-agents');
console.log(' ./shannon.mjs --cleanup [session-id] # Delete sessions\n');
console.log(chalk.yellow.bold('OPTIONS:'));
console.log(' --config <file> YAML configuration file for authentication and testing parameters');
console.log(' --pipeline-testing Use minimal prompts for fast pipeline testing (creates minimal deliverables)\n');
console.log(chalk.yellow.bold('DEVELOPER COMMANDS:'));
console.log(' --run-phase Run all agents in a phase (parallel execution for 5x speedup)');
console.log(' --run-all Run all remaining agents to completion (parallel execution)');
console.log(' --rollback-to Rollback git workspace to agent checkpoint');
console.log(' --rerun Rollback and rerun specific agent');
console.log(' --status Show current session status and progress');
console.log(' --list-agents List all available agents and phases');
console.log(' --cleanup Delete all sessions or specific session by ID\n');
console.log(chalk.yellow.bold('EXAMPLES:'));
console.log(' # Normal mode - create new session');
console.log(' ./shannon.mjs "https://example.com" "/path/to/local/repo"');
console.log(' ./shannon.mjs "https://example.com" "/path/to/local/repo" --config auth.yaml');
console.log(' ./shannon.mjs "https://example.com" "/path/to/local/repo" --setup-only # Setup only\n');
console.log(' # Developer mode - operate on existing session');
console.log(' ./shannon.mjs --status # Show session status');
console.log(' ./shannon.mjs --run-phase exploitation # Run entire phase');
console.log(' ./shannon.mjs --run-all # Run all remaining agents');
console.log(' ./shannon.mjs --rerun xss-vuln # Fix and rerun failed agent');
console.log(' ./shannon.mjs --cleanup # Delete all sessions');
console.log(' ./shannon.mjs --cleanup <session-id> # Delete specific session\n');
console.log(chalk.yellow.bold('REQUIREMENTS:'));
console.log(' • WEB_URL must start with http:// or https://');
console.log(' • REPO_PATH must be an accessible local directory');
console.log(' • Only test systems you own or have permission to test');
console.log(' • Developer mode requires existing pentest session\n');
console.log(chalk.yellow.bold('ENVIRONMENT VARIABLES:'));
console.log(' PENTEST_MAX_RETRIES Number of retries for AI agents (default: 3)');
}
// Export the splash screen function for use in main
export { displaySplashScreen };
+307
View File
@@ -0,0 +1,307 @@
import { fs } from 'zx';
import yaml from 'js-yaml';
import Ajv from 'ajv';
import addFormats from 'ajv-formats';
import { PentestError } from './error-handling.js';
// Initialize AJV with formats
const ajv = new Ajv({ allErrors: true, verbose: true });
addFormats(ajv);
// Load JSON Schema
let configSchema;
try {
const schemaPath = new URL('../configs/config-schema.json', import.meta.url);
const schemaContent = await fs.readFile(schemaPath, 'utf8');
configSchema = JSON.parse(schemaContent);
} catch (error) {
throw new PentestError(
`Failed to load configuration schema: ${error.message}`,
'config',
false,
{ schemaPath: '../configs/config-schema.json', originalError: error.message }
);
}
// Compile the schema validator
const validateSchema = ajv.compile(configSchema);
// Security patterns to block
const DANGEROUS_PATTERNS = [
/\.\.\//, // Path traversal
/[<>]/, // HTML/XML injection
/javascript:/i, // JavaScript URLs
/data:/i, // Data URLs
/file:/i // File URLs
];
// Parse and load YAML configuration file with enhanced safety
export const parseConfig = async (configPath) => {
try {
// File existence check
if (!await fs.pathExists(configPath)) {
throw new Error(`Configuration file not found: ${configPath}`);
}
// File size check (prevent extremely large files)
const stats = await fs.stat(configPath);
const maxFileSize = 1024 * 1024; // 1MB
if (stats.size > maxFileSize) {
throw new Error(`Configuration file too large: ${stats.size} bytes (maximum: ${maxFileSize} bytes)`);
}
// Read file content
const configContent = await fs.readFile(configPath, 'utf8');
// Basic content validation
if (!configContent.trim()) {
throw new Error('Configuration file is empty');
}
// Parse YAML with safety options
let config;
try {
config = yaml.load(configContent, {
schema: yaml.FAILSAFE_SCHEMA, // Only basic YAML types, no JS evaluation
json: false, // Don't allow JSON-specific syntax
filename: configPath
});
} catch (yamlError) {
throw new Error(`YAML parsing failed: ${yamlError.message}`);
}
// Additional safety check
if (config === null || config === undefined) {
throw new Error('Configuration file resulted in null/undefined after parsing');
}
// Validate the configuration structure and content
validateConfig(config);
return config;
} catch (error) {
// Enhance error message with context
if (error.message.startsWith('Configuration file not found') ||
error.message.startsWith('YAML parsing failed') ||
error.message.includes('must be') ||
error.message.includes('exceeds maximum')) {
// These are already well-formatted errors, re-throw as-is
throw error;
} else {
// Wrap other errors with context
throw new Error(`Failed to parse configuration file '${configPath}': ${error.message}`);
}
}
};
// Validate overall configuration structure using JSON Schema
const validateConfig = (config) => {
// Basic structure validation
if (!config || typeof config !== 'object') {
throw new Error('Configuration must be a valid object');
}
if (Array.isArray(config)) {
throw new Error('Configuration must be an object, not an array');
}
// JSON Schema validation
const isValid = validateSchema(config);
if (!isValid) {
const errors = validateSchema.errors || [];
const errorMessages = errors.map(err => {
const path = err.instancePath || 'root';
return `${path}: ${err.message}`;
});
throw new Error(`Configuration validation failed:\n - ${errorMessages.join('\n - ')}`);
}
// Additional security validation
performSecurityValidation(config);
// Warn if deprecated fields are used
if (config.login) {
console.warn('⚠️ The "login" section is deprecated. Please use "authentication" instead.');
}
// Ensure at least some configuration is provided
if (!config.rules && !config.authentication) {
console.warn('⚠️ Configuration file contains no rules or authentication. The pentest will run without any scoping restrictions or login capabilities.');
} else if (config.rules && !config.rules.avoid && !config.rules.focus) {
console.warn('⚠️ Configuration file contains no rules. The pentest will run without any scoping restrictions.');
}
};
// Perform additional security validation beyond JSON Schema
const performSecurityValidation = (config) => {
// Validate authentication section for security issues
if (config.authentication) {
const auth = config.authentication;
// Check for dangerous patterns in credentials
if (auth.credentials) {
for (const pattern of DANGEROUS_PATTERNS) {
if (pattern.test(auth.credentials.username)) {
throw new Error('authentication.credentials.username contains potentially dangerous pattern');
}
if (pattern.test(auth.credentials.password)) {
throw new Error('authentication.credentials.password contains potentially dangerous pattern');
}
}
}
// Check login flow for dangerous patterns
if (auth.login_flow) {
auth.login_flow.forEach((step, index) => {
for (const pattern of DANGEROUS_PATTERNS) {
if (pattern.test(step)) {
throw new Error(`authentication.login_flow[${index}] contains potentially dangerous pattern: ${pattern.source}`);
}
}
});
}
}
// Validate rules section for security issues
if (config.rules) {
validateRulesSecurity(config.rules.avoid, 'avoid');
validateRulesSecurity(config.rules.focus, 'focus');
// Check for duplicate and conflicting rules
checkForDuplicates(config.rules.avoid || [], 'avoid');
checkForDuplicates(config.rules.focus || [], 'focus');
checkForConflicts(config.rules.avoid, config.rules.focus);
}
};
// Validate rules for security issues
const validateRulesSecurity = (rules, ruleType) => {
if (!rules) return;
rules.forEach((rule, index) => {
// Security validation
for (const pattern of DANGEROUS_PATTERNS) {
if (pattern.test(rule.url_path)) {
throw new Error(`rules.${ruleType}[${index}].url_path contains potentially dangerous pattern: ${pattern.source}`);
}
if (pattern.test(rule.description)) {
throw new Error(`rules.${ruleType}[${index}].description contains potentially dangerous pattern: ${pattern.source}`);
}
}
// Type-specific validation
validateRuleTypeSpecific(rule, ruleType, index);
});
};
// Validate rule based on its specific type
const validateRuleTypeSpecific = (rule, ruleType, index) => {
switch (rule.type) {
case 'path':
if (!rule.url_path.startsWith('/')) {
throw new Error(`rules.${ruleType}[${index}].url_path for type 'path' must start with '/'`);
}
break;
case 'subdomain':
case 'domain':
// Basic domain validation - no slashes allowed
if (rule.url_path.includes('/')) {
throw new Error(`rules.${ruleType}[${index}].url_path for type '${rule.type}' cannot contain '/' characters`);
}
// Must contain at least one dot for domains
if (rule.type === 'domain' && !rule.url_path.includes('.')) {
throw new Error(`rules.${ruleType}[${index}].url_path for type 'domain' must be a valid domain name`);
}
break;
case 'method':
const allowedMethods = ['GET', 'POST', 'PUT', 'DELETE', 'PATCH', 'HEAD', 'OPTIONS'];
if (!allowedMethods.includes(rule.url_path.toUpperCase())) {
throw new Error(`rules.${ruleType}[${index}].url_path for type 'method' must be one of: ${allowedMethods.join(', ')}`);
}
break;
case 'header':
// Header name validation (basic)
if (!rule.url_path.match(/^[a-zA-Z0-9\-_]+$/)) {
throw new Error(`rules.${ruleType}[${index}].url_path for type 'header' must be a valid header name (alphanumeric, hyphens, underscores only)`);
}
break;
case 'parameter':
// Parameter name validation (basic)
if (!rule.url_path.match(/^[a-zA-Z0-9\-_]+$/)) {
throw new Error(`rules.${ruleType}[${index}].url_path for type 'parameter' must be a valid parameter name (alphanumeric, hyphens, underscores only)`);
}
break;
}
};
// Check for duplicate rules
const checkForDuplicates = (rules, ruleType) => {
const seen = new Set();
rules.forEach((rule, index) => {
const key = `${rule.type}:${rule.url_path}`;
if (seen.has(key)) {
throw new Error(`Duplicate rule found in rules.${ruleType}[${index}]: ${rule.type} '${rule.url_path}'`);
}
seen.add(key);
});
};
// Check for conflicting rules between avoid and focus
const checkForConflicts = (avoidRules = [], focusRules = []) => {
const avoidSet = new Set(avoidRules.map(rule => `${rule.type}:${rule.url_path}`));
focusRules.forEach((rule, index) => {
const key = `${rule.type}:${rule.url_path}`;
if (avoidSet.has(key)) {
throw new Error(`Conflicting rule found: rules.focus[${index}] '${rule.url_path}' also exists in rules.avoid`);
}
});
};
// Sanitize and normalize rule values
const sanitizeRule = (rule) => {
return {
description: rule.description.trim(),
type: rule.type.toLowerCase().trim(),
url_path: rule.url_path.trim()
};
};
// Distribute configuration sections to different agents with sanitization
export const distributeConfig = (config) => {
const avoid = config?.rules?.avoid || [];
const focus = config?.rules?.focus || [];
const authentication = config?.authentication || null;
return {
avoid: avoid.map(sanitizeRule),
focus: focus.map(sanitizeRule),
authentication: authentication ? sanitizeAuthentication(authentication) : null
};
};
// Sanitize and normalize authentication values
const sanitizeAuthentication = (auth) => {
return {
login_type: auth.login_type.toLowerCase().trim(),
login_url: auth.login_url.trim(),
credentials: {
username: auth.credentials.username.trim(),
password: auth.credentials.password,
...(auth.credentials.totp_secret && { totp_secret: auth.credentials.totp_secret.trim() })
},
login_flow: auth.login_flow.map(step => step.trim()),
success_condition: {
type: auth.success_condition.type.toLowerCase().trim(),
value: auth.success_condition.value.trim()
}
};
};
// Additional validation functions are already exported above
+138
View File
@@ -0,0 +1,138 @@
import { path, fs } from 'zx';
import chalk from 'chalk';
import { validateQueueAndDeliverable } from './queue-validation.js';
// MCP agent mapping - assigns each agent to a specific Playwright instance to prevent conflicts
export const MCP_AGENT_MAPPING = Object.freeze({
// Phase 1: Pre-reconnaissance (actual prompt name is 'pre-recon-code')
// NOTE: Pre-recon is pure code analysis and doesn't use browser automation,
// but assigning MCP server anyway for consistency and future extensibility
'pre-recon-code': 'playwright-agent1',
// Phase 2: Reconnaissance (actual prompt name is 'recon')
'recon': 'playwright-agent2',
// Phase 3: Vulnerability Analysis (5 parallel agents)
'vuln-injection': 'playwright-agent1',
'vuln-xss': 'playwright-agent2',
'vuln-auth': 'playwright-agent3',
'vuln-ssrf': 'playwright-agent4',
'vuln-authz': 'playwright-agent5',
// Phase 4: Exploitation (5 parallel agents - same as vuln counterparts)
'exploit-injection': 'playwright-agent1',
'exploit-xss': 'playwright-agent2',
'exploit-auth': 'playwright-agent3',
'exploit-ssrf': 'playwright-agent4',
'exploit-authz': 'playwright-agent5',
// Phase 5: Reporting (actual prompt name is 'report-executive')
// NOTE: Report generation is typically text-based and doesn't use browser automation,
// but assigning MCP server anyway for potential screenshot inclusion or future needs
'report-executive': 'playwright-agent3'
});
// Direct agent-to-validator mapping - much simpler than pattern matching
export const AGENT_VALIDATORS = Object.freeze({
// Pre-reconnaissance agent - validates the code analysis deliverable created by the agent
'pre-recon': async (sourceDir) => {
const codeAnalysisFile = path.join(sourceDir, 'deliverables', 'code_analysis_deliverable.md');
return await fs.pathExists(codeAnalysisFile);
},
// Reconnaissance agent
'recon': async (sourceDir) => {
const reconFile = path.join(sourceDir, 'deliverables', 'recon_deliverable.md');
return await fs.pathExists(reconFile);
},
// Vulnerability analysis agents
'injection-vuln': async (sourceDir) => {
try {
await validateQueueAndDeliverable('injection', sourceDir);
return true;
} catch (error) {
console.log(chalk.yellow(` Queue validation failed for injection: ${error.message}`));
return false;
}
},
'xss-vuln': async (sourceDir) => {
try {
await validateQueueAndDeliverable('xss', sourceDir);
return true;
} catch (error) {
console.log(chalk.yellow(` Queue validation failed for xss: ${error.message}`));
return false;
}
},
'auth-vuln': async (sourceDir) => {
try {
await validateQueueAndDeliverable('auth', sourceDir);
return true;
} catch (error) {
console.log(chalk.yellow(` Queue validation failed for auth: ${error.message}`));
return false;
}
},
'ssrf-vuln': async (sourceDir) => {
try {
await validateQueueAndDeliverable('ssrf', sourceDir);
return true;
} catch (error) {
console.log(chalk.yellow(` Queue validation failed for ssrf: ${error.message}`));
return false;
}
},
'authz-vuln': async (sourceDir) => {
try {
await validateQueueAndDeliverable('authz', sourceDir);
return true;
} catch (error) {
console.log(chalk.yellow(` Queue validation failed for authz: ${error.message}`));
return false;
}
},
// Exploitation agents
'injection-exploit': async (sourceDir) => {
const evidenceFile = path.join(sourceDir, 'deliverables', 'injection_exploitation_evidence.md');
return await fs.pathExists(evidenceFile);
},
'xss-exploit': async (sourceDir) => {
const evidenceFile = path.join(sourceDir, 'deliverables', 'xss_exploitation_evidence.md');
return await fs.pathExists(evidenceFile);
},
'auth-exploit': async (sourceDir) => {
const evidenceFile = path.join(sourceDir, 'deliverables', 'auth_exploitation_evidence.md');
return await fs.pathExists(evidenceFile);
},
'ssrf-exploit': async (sourceDir) => {
const evidenceFile = path.join(sourceDir, 'deliverables', 'ssrf_exploitation_evidence.md');
return await fs.pathExists(evidenceFile);
},
'authz-exploit': async (sourceDir) => {
const evidenceFile = path.join(sourceDir, 'deliverables', 'authz_exploitation_evidence.md');
return await fs.pathExists(evidenceFile);
},
// Executive report agent
'report': async (sourceDir) => {
const reportFile = path.join(sourceDir, 'deliverables', 'comprehensive_security_assessment_report.md');
const reportExists = await fs.pathExists(reportFile);
if (!reportExists) {
console.log(chalk.red(` ❌ Missing required deliverable: comprehensive_security_assessment_report.md`));
}
return reportExists;
}
});
+188
View File
@@ -0,0 +1,188 @@
import chalk from 'chalk';
import { fs, path } from 'zx';
// Custom error class for pentest operations
export class PentestError extends Error {
constructor(message, type, retryable = false, context = {}) {
super(message);
this.name = 'PentestError';
this.type = type; // 'config', 'network', 'tool', 'prompt', 'filesystem', 'validation'
this.retryable = retryable;
this.context = context;
this.timestamp = new Date().toISOString();
}
}
// Centralized error logging function
export const logError = async (error, contextMsg, sourceDir = null) => {
const timestamp = new Date().toISOString();
const logEntry = {
timestamp,
context: contextMsg,
error: {
name: error.name || error.constructor.name,
message: error.message,
type: error.type || 'unknown',
retryable: error.retryable || false,
stack: error.stack
}
};
// Console logging with color
const prefix = error.retryable ? '⚠️' : '❌';
const color = error.retryable ? chalk.yellow : chalk.red;
console.log(color(`${prefix} ${contextMsg}:`));
console.log(color(` ${error.message}`));
if (error.context && Object.keys(error.context).length > 0) {
console.log(chalk.gray(` Context: ${JSON.stringify(error.context)}`));
}
// File logging (if source directory available)
if (sourceDir) {
try {
const logPath = path.join(sourceDir, 'error.log');
await fs.appendFile(logPath, JSON.stringify(logEntry) + '\n');
} catch (logErr) {
console.log(chalk.gray(` (Failed to write error log: ${logErr.message})`));
}
}
return logEntry;
};
// Handle configuration parsing errors
const handleConfigError = (error, configPath) => {
const configError = new PentestError(
`Configuration error in ${configPath}: ${error.message}. Check your config.yaml file format and try again.`,
'config',
false,
{ configPath, originalError: error.message }
);
throw configError;
};
// Handle tool execution errors
export const handleToolError = (toolName, error) => {
const isRetryable = error.code === 'ECONNRESET' || error.code === 'ETIMEDOUT' || error.code === 'ENOTFOUND';
return {
tool: toolName,
output: `Error: ${error.message}`,
status: 'error',
duration: 0,
success: false,
error: new PentestError(
`${toolName} execution failed: ${error.message}`,
'tool',
isRetryable,
{ toolName, originalError: error.message, errorCode: error.code }
)
};
};
// Handle prompt loading errors
export const handlePromptError = (promptName, error) => {
return {
success: false,
error: new PentestError(
`Failed to load prompt '${promptName}': ${error.message}`,
'prompt',
false,
{ promptName, originalError: error.message }
)
};
};
// Check if an error should trigger a retry for Claude agents
export const isRetryableError = (error) => {
const message = error.message.toLowerCase();
// Network and connection errors - always retryable
if (message.includes('network') ||
message.includes('connection') ||
message.includes('timeout') ||
message.includes('econnreset') ||
message.includes('enotfound') ||
message.includes('econnrefused')) {
return true;
}
// Rate limiting - retryable with longer backoff
if (message.includes('rate limit') ||
message.includes('429') ||
message.includes('too many requests')) {
return true;
}
// Server errors - retryable
if (message.includes('server error') ||
message.includes('5xx') ||
message.includes('internal server error') ||
message.includes('service unavailable') ||
message.includes('bad gateway')) {
return true;
}
// Claude API specific errors - retryable
if (message.includes('mcp server') ||
message.includes('model unavailable') ||
message.includes('service temporarily unavailable') ||
message.includes('api error') ||
message.includes('terminated')) {
return true;
}
// Max turns without completion - retryable once
if (message.includes('max turns') ||
message.includes('maximum turns')) {
return true;
}
// Non-retryable errors
if (message.includes('authentication') ||
message.includes('invalid prompt') ||
message.includes('out of memory') ||
message.includes('permission denied') ||
message.includes('invalid api key')) {
return false;
}
// Default to non-retryable for unknown errors
return false;
};
// Get retry delay based on error type and attempt number
export const getRetryDelay = (error, attempt) => {
const message = error.message.toLowerCase();
// Rate limiting gets longer delays
if (message.includes('rate limit') || message.includes('429')) {
return Math.min(30000 + (attempt * 10000), 120000); // 30s, 40s, 50s, max 2min
}
// Exponential backoff with jitter for other retryable errors
const baseDelay = Math.pow(2, attempt) * 1000; // 2s, 4s, 8s
const jitter = Math.random() * 1000; // 0-1s random
return Math.min(baseDelay + jitter, 30000); // Max 30s
};
// General error handler with context
const handleError = (error, context, isFatal = false) => {
const pentestError = error instanceof PentestError
? error
: new PentestError(error.message, 'unknown', false, { context, originalError: error.message });
if (isFatal) {
pentestError.type = 'fatal';
throw pentestError;
}
return {
success: false,
error: pentestError,
continuable: !isFatal
};
};
+289
View File
@@ -0,0 +1,289 @@
import { $, fs, path } from 'zx';
import chalk from 'chalk';
import { Timer, timingResults, formatDuration } from '../utils/metrics.js';
import { handleToolError, PentestError } from '../error-handling.js';
import { AGENTS } from '../session-manager.js';
import { runClaudePromptWithRetry } from '../ai/claude-executor.js';
import { loadPrompt } from '../prompts/prompt-manager.js';
// Pure function: Run terminal scanning tools
async function runTerminalScan(tool, target, sourceDir = null) {
const timer = new Timer(`command-${tool}`);
try {
let command, result;
switch (tool) {
case 'nmap':
console.log(chalk.blue(` 🔍 Running ${tool} scan...`));
const nmapHostname = new URL(target).hostname;
result = await $({ silent: true, stdio: ['ignore', 'pipe', 'ignore'] })`nmap -sV -sC ${nmapHostname}`;
const duration = timer.stop();
timingResults.commands[tool] = duration;
console.log(chalk.green(`${tool} completed in ${formatDuration(duration)}`));
return { tool: 'nmap', output: result.stdout, status: 'success', duration };
case 'subfinder':
console.log(chalk.blue(` 🔍 Running ${tool} scan...`));
const hostname = new URL(target).hostname;
result = await $({ silent: true, stdio: ['ignore', 'pipe', 'ignore'] })`subfinder -d ${hostname}`;
const subfinderDuration = timer.stop();
timingResults.commands[tool] = subfinderDuration;
console.log(chalk.green(`${tool} completed in ${formatDuration(subfinderDuration)}`));
return { tool: 'subfinder', output: result.stdout, status: 'success', duration: subfinderDuration };
case 'whatweb':
console.log(chalk.blue(` 🔍 Running ${tool} scan...`));
command = `whatweb --open-timeout 30 --read-timeout 60 ${target}`;
console.log(chalk.gray(` Command: ${command}`));
result = await $({ silent: true, stdio: ['ignore', 'pipe', 'ignore'] })`whatweb --open-timeout 30 --read-timeout 60 ${target}`;
const whatwebDuration = timer.stop();
timingResults.commands[tool] = whatwebDuration;
console.log(chalk.green(`${tool} completed in ${formatDuration(whatwebDuration)}`));
return { tool: 'whatweb', output: result.stdout, status: 'success', duration: whatwebDuration };
case 'schemathesis':
// Only run if API schemas found
const schemasDir = path.join(sourceDir || '.', 'outputs', 'schemas');
if (await fs.pathExists(schemasDir)) {
const schemaFiles = await fs.readdir(schemasDir);
const apiSchemas = schemaFiles.filter(f => f.endsWith('.json') || f.endsWith('.yml') || f.endsWith('.yaml'));
if (apiSchemas.length > 0) {
console.log(chalk.blue(` 🔍 Running ${tool} scan...`));
let allResults = [];
// Run schemathesis on each schema file
for (const schemaFile of apiSchemas) {
const schemaPath = path.join(schemasDir, schemaFile);
try {
result = await $({ silent: true, stdio: ['ignore', 'pipe', 'ignore'] })`schemathesis run ${schemaPath} -u ${target} --max-failures=5`;
allResults.push(`Schema: ${schemaFile}\n${result.stdout}`);
} catch (schemaError) {
allResults.push(`Schema: ${schemaFile}\nError: ${schemaError.stdout || schemaError.message}`);
}
}
const schemaDuration = timer.stop();
timingResults.commands[tool] = schemaDuration;
console.log(chalk.green(`${tool} completed in ${formatDuration(schemaDuration)}`));
return { tool: 'schemathesis', output: allResults.join('\n\n'), status: 'success', duration: schemaDuration };
} else {
console.log(chalk.gray(` ⏭️ ${tool} - no API schemas found`));
return { tool: 'schemathesis', output: 'No API schemas found', status: 'skipped', duration: timer.stop() };
}
} else {
console.log(chalk.gray(` ⏭️ ${tool} - schemas directory not found`));
return { tool: 'schemathesis', output: 'Schemas directory not found', status: 'skipped', duration: timer.stop() };
}
default:
throw new Error(`Unknown tool: ${tool}`);
}
} catch (error) {
const duration = timer.stop();
timingResults.commands[tool] = duration;
console.log(chalk.red(`${tool} failed in ${formatDuration(duration)}`));
return handleToolError(tool, error);
}
}
// Wave 1: Initial footprinting + authentication
async function runPreReconWave1(webUrl, sourceDir, variables, config, pipelineTestingMode = false, sessionId = null) {
console.log(chalk.blue(' → Launching Wave 1 operations in parallel...'));
const operations = [];
// Skip external commands in pipeline testing mode
if (pipelineTestingMode) {
console.log(chalk.gray(' ⏭️ Skipping external tools (pipeline testing mode)'));
operations.push(
runClaudePromptWithRetry(
await loadPrompt('pre-recon-code', variables, null, pipelineTestingMode),
sourceDir,
'*',
'',
AGENTS['pre-recon'].displayName,
'pre-recon', // Agent name for snapshot creation
chalk.cyan,
{ webUrl, sessionId } // Session metadata for logging
)
);
const [codeAnalysis] = await Promise.all(operations);
return {
nmap: 'Skipped (pipeline testing mode)',
subfinder: 'Skipped (pipeline testing mode)',
whatweb: 'Skipped (pipeline testing mode)',
codeAnalysis
};
} else {
operations.push(
runTerminalScan('nmap', webUrl),
runTerminalScan('subfinder', webUrl),
runTerminalScan('whatweb', webUrl),
runClaudePromptWithRetry(
await loadPrompt('pre-recon-code', variables, null, pipelineTestingMode),
sourceDir,
'*',
'',
AGENTS['pre-recon'].displayName,
'pre-recon', // Agent name for snapshot creation
chalk.cyan,
{ webUrl, sessionId } // Session metadata for logging
)
);
}
// Check if authentication config is provided for login instructions injection
console.log(chalk.gray(` → Config check: ${config ? 'present' : 'missing'}, Auth: ${config?.authentication ? 'present' : 'missing'}`));
const [nmap, subfinder, whatweb, naabu, codeAnalysis] = await Promise.all(operations);
return { nmap, subfinder, whatweb, naabu, codeAnalysis };
}
// Wave 2: Additional scanning
async function runPreReconWave2(webUrl, sourceDir, toolAvailability, pipelineTestingMode = false) {
console.log(chalk.blue(' → Running Wave 2 additional scans in parallel...'));
// Skip external commands in pipeline testing mode
if (pipelineTestingMode) {
console.log(chalk.gray(' ⏭️ Skipping external tools (pipeline testing mode)'));
return {
schemathesis: { tool: 'schemathesis', output: 'Skipped (pipeline testing mode)', status: 'skipped', duration: 0 }
};
}
const operations = [];
// Parallel additional scans (only run if tools are available)
if (toolAvailability.schemathesis) {
operations.push(runTerminalScan('schemathesis', webUrl, sourceDir));
}
// If no tools are available, return early
if (operations.length === 0) {
console.log(chalk.gray(' ⏭️ No Wave 2 tools available'));
return {
schemathesis: { tool: 'schemathesis', output: 'Tool not available', status: 'skipped', duration: 0 }
};
}
// Run all operations in parallel
const results = await Promise.all(operations);
// Map results back to named properties
const response = {};
let resultIndex = 0;
if (toolAvailability.schemathesis) {
response.schemathesis = results[resultIndex++];
} else {
console.log(chalk.gray(' ⏭️ schemathesis - tool not available'));
response.schemathesis = { tool: 'schemathesis', output: 'Tool not available', status: 'skipped', duration: 0 };
}
return response;
}
// Pure function: Stitch together pre-recon outputs and save to file
async function stitchPreReconOutputs(outputs, sourceDir) {
const [nmap, subfinder, whatweb, naabu, codeAnalysis, ...additionalScans] = outputs;
// Try to read the code analysis deliverable file
let codeAnalysisContent = 'No analysis available';
try {
const codeAnalysisPath = path.join(sourceDir, 'deliverables', 'code_analysis_deliverable.md');
codeAnalysisContent = await fs.readFile(codeAnalysisPath, 'utf8');
} catch (error) {
console.log(chalk.yellow(`⚠️ Could not read code analysis deliverable: ${error.message}`));
// Fallback message if file doesn't exist
codeAnalysisContent = 'Analysis located in deliverables/code_analysis_deliverable.md';
}
// Build additional scans section
let additionalSection = '';
if (additionalScans && additionalScans.length > 0) {
additionalSection = '\n## Authenticated Scans\n';
additionalScans.forEach(scan => {
if (scan && scan.tool) {
additionalSection += `
### ${scan.tool.toUpperCase()}
Status: ${scan.status}
${scan.output}
`;
}
});
}
const report = `
# Pre-Reconnaissance Report
## Port Discovery (naabu)
Status: ${naabu?.status || 'Skipped'}
${naabu?.output || naabu || 'No output'}
## Network Scanning (nmap)
Status: ${nmap?.status || 'Skipped'}
${nmap?.output || nmap || 'No output'}
## Subdomain Discovery (subfinder)
Status: ${subfinder?.status || 'Skipped'}
${subfinder?.output || subfinder || 'No output'}
## Technology Detection (whatweb)
Status: ${whatweb?.status || 'Skipped'}
${whatweb?.output || whatweb || 'No output'}
## Code Analysis
${codeAnalysisContent}
${additionalSection}
---
Report generated at: ${new Date().toISOString()}
`.trim();
// Ensure deliverables directory exists in the cloned repo
try {
const deliverablePath = path.join(sourceDir, 'deliverables', 'pre_recon_deliverable.md');
await fs.ensureDir(path.join(sourceDir, 'deliverables'));
// Write to file in the cloned repository
await fs.writeFile(deliverablePath, report);
} catch (error) {
throw new PentestError(
`Failed to write pre-recon report: ${error.message}`,
'filesystem',
false,
{ sourceDir, originalError: error.message }
);
}
return report;
}
// Main pre-recon phase execution function
export async function executePreReconPhase(webUrl, sourceDir, variables, config, toolAvailability, pipelineTestingMode, sessionId = null) {
console.log(chalk.yellow.bold('\n🔍 PHASE 1: PRE-RECONNAISSANCE'));
const timer = new Timer('phase-1-pre-recon');
console.log(chalk.yellow('Wave 1: Initial footprinting...'));
const wave1Results = await runPreReconWave1(webUrl, sourceDir, variables, config, pipelineTestingMode, sessionId);
console.log(chalk.green(' ✅ Wave 1 operations completed'));
console.log(chalk.yellow('Wave 2: Additional scanning...'));
const wave2Results = await runPreReconWave2(webUrl, sourceDir, toolAvailability, pipelineTestingMode);
console.log(chalk.green(' ✅ Wave 2 operations completed'));
console.log(chalk.blue('📝 Stitching pre-recon outputs...'));
// Combine wave 1 and wave 2 results for stitching
const allResults = [
wave1Results.nmap,
wave1Results.subfinder,
wave1Results.whatweb,
wave1Results.naabu,
wave1Results.codeAnalysis,
...(wave2Results.schemathesis ? [wave2Results.schemathesis] : [])
];
const preReconReport = await stitchPreReconOutputs(allResults, sourceDir);
const duration = timer.stop();
console.log(chalk.green(`✅ Pre-reconnaissance complete in ${formatDuration(duration)}`));
console.log(chalk.green(`💾 Saved to ${sourceDir}/deliverables/pre_recon_deliverable.md`));
return { duration, report: preReconReport };
}
+53
View File
@@ -0,0 +1,53 @@
import { fs, path } from 'zx';
import chalk from 'chalk';
import { PentestError } from '../error-handling.js';
// Pure function: Assemble final report from specialist deliverables
export async function assembleFinalReport(sourceDir) {
const deliverableFiles = [
{ name: 'Injection', path: 'injection_exploitation_evidence.md', required: false },
{ name: 'XSS', path: 'xss_exploitation_evidence.md', required: false },
{ name: 'Authentication', path: 'auth_exploitation_evidence.md', required: false },
{ name: 'SSRF', path: 'ssrf_exploitation_evidence.md', required: false },
{ name: 'Authorization', path: 'authz_exploitation_evidence.md', required: false }
];
const sections = [];
for (const file of deliverableFiles) {
const filePath = path.join(sourceDir, 'deliverables', file.path);
try {
if (await fs.pathExists(filePath)) {
const content = await fs.readFile(filePath, 'utf8');
sections.push(content);
console.log(chalk.green(`✅ Added ${file.name} findings`));
} else if (file.required) {
throw new Error(`Required file ${file.path} not found`);
} else {
console.log(chalk.gray(`⏭️ No ${file.name} deliverable found`));
}
} catch (error) {
if (file.required) {
throw error;
}
console.log(chalk.yellow(`⚠️ Could not read ${file.path}: ${error.message}`));
}
}
const finalContent = sections.join('\n\n');
const finalReportPath = path.join(sourceDir, 'deliverables', 'comprehensive_security_assessment_report.md');
try {
await fs.writeFile(finalReportPath, finalContent);
console.log(chalk.green(`✅ Final report assembled at ${finalReportPath}`));
} catch (error) {
throw new PentestError(
`Failed to write final report: ${error.message}`,
'filesystem',
false,
{ finalReportPath, originalError: error.message }
);
}
return finalContent;
}
+46
View File
@@ -0,0 +1,46 @@
import chalk from 'chalk';
export class ProgressIndicator {
constructor(message = 'Working...') {
this.message = message;
this.frames = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'];
this.frameIndex = 0;
this.interval = null;
this.isRunning = false;
}
start() {
if (this.isRunning) return;
this.isRunning = true;
this.frameIndex = 0;
this.interval = setInterval(() => {
// Clear the line and write the spinner
process.stdout.write(`\r${chalk.cyan(this.frames[this.frameIndex])} ${chalk.dim(this.message)}`);
this.frameIndex = (this.frameIndex + 1) % this.frames.length;
}, 100);
}
updateMessage(newMessage) {
this.message = newMessage;
}
stop() {
if (!this.isRunning) return;
if (this.interval) {
clearInterval(this.interval);
this.interval = null;
}
// Clear the spinner line
process.stdout.write('\r' + ' '.repeat(this.message.length + 5) + '\r');
this.isRunning = false;
}
finish(successMessage = 'Complete') {
this.stop();
console.log(chalk.green(`${successMessage}`));
}
}
+242
View File
@@ -0,0 +1,242 @@
import { fs, path } from 'zx';
import chalk from 'chalk';
import { PentestError, handlePromptError } from '../error-handling.js';
import { MCP_AGENT_MAPPING } from '../constants.js';
// Pure function: Build complete login instructions from config
async function buildLoginInstructions(authentication) {
try {
// Load the login instructions template
const loginInstructionsPath = path.join(import.meta.dirname, '..', '..', 'login_resources', 'login_instructions.txt');
if (!await fs.pathExists(loginInstructionsPath)) {
throw new PentestError(
'Login instructions template not found',
'filesystem',
false,
{ loginInstructionsPath }
);
}
const fullTemplate = await fs.readFile(loginInstructionsPath, 'utf8');
// Helper function to extract sections based on markers
const getSection = (content, sectionName) => {
const regex = new RegExp(`<!-- BEGIN:${sectionName} -->([\\s\\S]*?)<!-- END:${sectionName} -->`, 'g');
const match = regex.exec(content);
return match ? match[1].trim() : '';
};
// Extract sections based on login type
const loginType = authentication.login_type?.toUpperCase();
let loginInstructions = '';
// Build instructions with only relevant sections
const commonSection = getSection(fullTemplate, 'COMMON');
const authSection = getSection(fullTemplate, loginType); // FORM or SSO
const verificationSection = getSection(fullTemplate, 'VERIFICATION');
// Fallback to full template if markers are missing (backward compatibility)
if (!commonSection && !authSection && !verificationSection) {
console.log(chalk.yellow('⚠️ Section markers not found, using full login instructions template'));
loginInstructions = fullTemplate;
} else {
// Combine relevant sections
loginInstructions = [commonSection, authSection, verificationSection]
.filter(section => section) // Remove empty sections
.join('\n\n');
}
// Replace the user instructions placeholder with the login flow from config
let userInstructions = authentication.login_flow.join('\n');
// Replace credential placeholders within the user instructions
if (authentication.credentials) {
if (authentication.credentials.username) {
userInstructions = userInstructions.replace(/\$username/g, authentication.credentials.username);
}
if (authentication.credentials.password) {
userInstructions = userInstructions.replace(/\$password/g, authentication.credentials.password);
}
if (authentication.credentials.totp_secret) {
userInstructions = userInstructions.replace(/\$totp/g, `generated TOTP code using secret "${authentication.credentials.totp_secret}"`);
}
}
loginInstructions = loginInstructions.replace(/{{user_instructions}}/g, userInstructions);
// Replace TOTP secret placeholder if present in template
if (authentication.credentials?.totp_secret) {
loginInstructions = loginInstructions.replace(/{{totp_secret}}/g, authentication.credentials.totp_secret);
}
return loginInstructions;
} catch (error) {
if (error instanceof PentestError) {
throw error;
}
throw new PentestError(
`Failed to build login instructions: ${error.message}`,
'config',
false,
{ authentication, originalError: error.message }
);
}
}
// Pure function: Variable interpolation
async function interpolateVariables(template, variables, config = null) {
try {
if (!template || typeof template !== 'string') {
throw new PentestError(
'Template must be a non-empty string',
'validation',
false,
{ templateType: typeof template, templateLength: template?.length }
);
}
if (!variables || !variables.webUrl || !variables.repoPath) {
throw new PentestError(
'Variables must include webUrl and repoPath',
'validation',
false,
{ variables: Object.keys(variables || {}) }
);
}
let result = template
.replace(/{{WEB_URL}}/g, variables.webUrl)
.replace(/{{REPO_PATH}}/g, variables.repoPath)
.replace(/{{MCP_SERVER}}/g, variables.MCP_SERVER || 'playwright-agent1');
if (config) {
// Handle rules section - if both are empty, use cleaner messaging
const hasAvoidRules = config.avoid && config.avoid.length > 0;
const hasFocusRules = config.focus && config.focus.length > 0;
if (!hasAvoidRules && !hasFocusRules) {
// Replace the entire rules section with a clean message
const cleanRulesSection = '<rules>\nNo specific rules or focus areas provided for this test.\n</rules>';
result = result.replace(/<rules>[\s\S]*?<\/rules>/g, cleanRulesSection);
} else {
const avoidRules = hasAvoidRules ? config.avoid.map(r => `- ${r.description}`).join('\n') : 'None';
const focusRules = hasFocusRules ? config.focus.map(r => `- ${r.description}`).join('\n') : 'None';
result = result
.replace(/{{RULES_AVOID}}/g, avoidRules)
.replace(/{{RULES_FOCUS}}/g, focusRules);
}
// Extract and inject login instructions from config
if (config.authentication?.login_flow) {
const loginInstructions = await buildLoginInstructions(config.authentication);
result = result.replace(/{{LOGIN_INSTRUCTIONS}}/g, loginInstructions);
} else {
result = result.replace(/{{LOGIN_INSTRUCTIONS}}/g, '');
}
} else {
// Replace the entire rules section with a clean message when no config provided
const cleanRulesSection = '<rules>\nNo specific rules or focus areas provided for this test.\n</rules>';
result = result.replace(/<rules>[\s\S]*?<\/rules>/g, cleanRulesSection);
result = result.replace(/{{LOGIN_INSTRUCTIONS}}/g, '');
}
// Validate that all placeholders have been replaced (excluding instructional text)
const remainingPlaceholders = result.match(/\{\{[^}]+\}\}/g);
if (remainingPlaceholders) {
console.log(chalk.yellow(`⚠️ Warning: Found unresolved placeholders in prompt: ${remainingPlaceholders.join(', ')}`));
}
return result;
} catch (error) {
if (error instanceof PentestError) {
throw error;
}
throw new PentestError(
`Variable interpolation failed: ${error.message}`,
'prompt',
false,
{ originalError: error.message }
);
}
}
// Pure function: Load and interpolate prompt template
export async function loadPrompt(promptName, variables, config = null, pipelineTestingMode = false) {
try {
// Use pipeline testing prompts if pipeline testing mode is enabled
const baseDir = pipelineTestingMode ? 'prompts/pipeline-testing' : 'prompts';
const promptsDir = path.join(import.meta.dirname, '..', '..', baseDir);
const promptPath = path.join(promptsDir, `${promptName}.txt`);
// Debug message for pipeline testing mode
if (pipelineTestingMode) {
console.log(chalk.yellow(`⚡ Using pipeline testing prompt: ${promptPath}`));
}
// Check if file exists first
if (!await fs.pathExists(promptPath)) {
throw new PentestError(
`Prompt file not found: ${promptPath}`,
'prompt',
false,
{ promptName, promptPath }
);
}
// Add MCP server assignment to variables
const enhancedVariables = { ...variables };
// Assign MCP server based on prompt name (agent name)
if (MCP_AGENT_MAPPING[promptName]) {
enhancedVariables.MCP_SERVER = MCP_AGENT_MAPPING[promptName];
console.log(chalk.gray(` 🎭 Assigned ${promptName}${enhancedVariables.MCP_SERVER}`));
} else {
// Fallback for unknown agents
enhancedVariables.MCP_SERVER = 'playwright-agent1';
console.log(chalk.yellow(` 🎭 Unknown agent ${promptName}, using fallback → ${enhancedVariables.MCP_SERVER}`));
}
const template = await fs.readFile(promptPath, 'utf8');
return await interpolateVariables(template, enhancedVariables, config);
} catch (error) {
if (error instanceof PentestError) {
throw error;
}
const promptError = handlePromptError(promptName, error);
throw promptError.error;
}
}
// Save prompt snapshot for successful agent runs only
export async function savePromptSnapshot(sourceDir, agentName, promptContent) {
const snapshotDir = path.join(sourceDir, 'prompt-snapshots');
await fs.ensureDir(snapshotDir);
// Use deterministic naming - one snapshot per agent
const fileName = `${agentName}.md`;
const filePath = path.join(snapshotDir, fileName);
const timestamp = new Date().toISOString();
const snapshotContent = `# Prompt Snapshot: ${agentName}
**Generated:** ${timestamp}
**Agent:** ${agentName}
---
## Full Interpolated Prompt
\`\`\`markdown
${promptContent}
\`\`\`
---
*This snapshot represents the exact prompt that was sent to Claude Code to generate the current deliverables for this agent.*
`;
await fs.writeFile(filePath, snapshotContent);
console.log(chalk.gray(` 📸 Prompt snapshot saved: prompt-snapshots/${fileName}`));
}
+224
View File
@@ -0,0 +1,224 @@
import { fs, path } from 'zx';
import { PentestError } from './error-handling.js';
// Vulnerability type configuration as immutable data
const VULN_TYPE_CONFIG = Object.freeze({
injection: Object.freeze({
deliverable: 'injection_analysis_deliverable.md',
queue: 'injection_exploitation_queue.json'
}),
xss: Object.freeze({
deliverable: 'xss_analysis_deliverable.md',
queue: 'xss_exploitation_queue.json'
}),
auth: Object.freeze({
deliverable: 'auth_analysis_deliverable.md',
queue: 'auth_exploitation_queue.json'
}),
ssrf: Object.freeze({
deliverable: 'ssrf_analysis_deliverable.md',
queue: 'ssrf_exploitation_queue.json'
}),
authz: Object.freeze({
deliverable: 'authz_analysis_deliverable.md',
queue: 'authz_exploitation_queue.json'
})
});
// Functional composition utilities - async pipe for promise chain
const pipe = (...fns) => x => fns.reduce(async (v, f) => f(await v), x);
const compose = (...fns) => x => fns.reduceRight((v, f) => f(v), x);
// Pure function to create validation rule
const createValidationRule = (predicate, errorMessage, retryable = true) =>
Object.freeze({ predicate, errorMessage, retryable });
// Validation rules for file existence (following QUEUE_VALIDATION_FLOW.md)
const fileExistenceRules = Object.freeze([
// Rule 1: Neither deliverable nor queue exists
createValidationRule(
({ deliverableExists, queueExists }) => deliverableExists || queueExists,
'Analysis failed: Neither deliverable nor queue file exists. Analysis agent must create both files.'
),
// Rule 2: Queue doesn't exist but deliverable exists
createValidationRule(
({ deliverableExists, queueExists }) => !(!queueExists && deliverableExists),
'Analysis incomplete: Deliverable exists but queue file missing. Analysis agent must create both files.'
),
// Rule 3: Queue exists but deliverable doesn't exist
createValidationRule(
({ deliverableExists, queueExists }) => !(queueExists && !deliverableExists),
'Analysis incomplete: Queue exists but deliverable file missing. Analysis agent must create both files.'
)
]);
// Pure function to create file paths
const createPaths = (vulnType, sourceDir) => {
const config = VULN_TYPE_CONFIG[vulnType];
if (!config) {
return {
error: new PentestError(
`Unknown vulnerability type: ${vulnType}`,
'validation',
false,
{ vulnType }
)
};
}
return Object.freeze({
vulnType,
deliverable: path.join(sourceDir, 'deliverables', config.deliverable),
queue: path.join(sourceDir, 'deliverables', config.queue),
sourceDir
});
};
// Pure function to check file existence
const checkFileExistence = async (paths) => {
if (paths.error) return paths;
const [deliverableExists, queueExists] = await Promise.all([
fs.pathExists(paths.deliverable),
fs.pathExists(paths.queue)
]);
return Object.freeze({
...paths,
existence: Object.freeze({ deliverableExists, queueExists })
});
};
// Pure function to validate existence rules
const validateExistenceRules = (pathsWithExistence) => {
if (pathsWithExistence.error) return pathsWithExistence;
const { existence, vulnType } = pathsWithExistence;
// Find the first rule that fails
const failedRule = fileExistenceRules.find(rule => !rule.predicate(existence));
if (failedRule) {
return {
...pathsWithExistence,
error: new PentestError(
`${failedRule.errorMessage} (${vulnType})`,
'validation',
failedRule.retryable,
{
vulnType,
deliverablePath: pathsWithExistence.deliverable,
queuePath: pathsWithExistence.queue,
existence
}
)
};
}
return pathsWithExistence;
};
// Pure function to validate queue structure
const validateQueueStructure = (content) => {
try {
const parsed = JSON.parse(content);
return Object.freeze({
valid: parsed.vulnerabilities && Array.isArray(parsed.vulnerabilities),
data: parsed,
error: null
});
} catch (parseError) {
return Object.freeze({
valid: false,
data: null,
error: parseError.message
});
}
};
// Pure function to read and validate queue content
const validateQueueContent = async (pathsWithExistence) => {
if (pathsWithExistence.error) return pathsWithExistence;
try {
const queueContent = await fs.readFile(pathsWithExistence.queue, 'utf8');
const queueValidation = validateQueueStructure(queueContent);
if (!queueValidation.valid) {
// Rule 6: Both exist, queue invalid
return {
...pathsWithExistence,
error: new PentestError(
queueValidation.error
? `Queue validation failed for ${pathsWithExistence.vulnType}: Invalid JSON structure. Analysis agent must fix queue format.`
: `Queue validation failed for ${pathsWithExistence.vulnType}: Missing or invalid 'vulnerabilities' array. Analysis agent must fix queue structure.`,
'validation',
true, // retryable
{
vulnType: pathsWithExistence.vulnType,
queuePath: pathsWithExistence.queue,
originalError: queueValidation.error,
queueStructure: queueValidation.data ? Object.keys(queueValidation.data) : []
}
)
};
}
return Object.freeze({
...pathsWithExistence,
queueData: queueValidation.data
});
} catch (readError) {
return {
...pathsWithExistence,
error: new PentestError(
`Failed to read queue file for ${pathsWithExistence.vulnType}: ${readError.message}`,
'filesystem',
false,
{
vulnType: pathsWithExistence.vulnType,
queuePath: pathsWithExistence.queue,
originalError: readError.message
}
)
};
}
};
// Pure function to determine exploitation decision
const determineExploitationDecision = (validatedData) => {
if (validatedData.error) {
throw validatedData.error;
}
const hasVulnerabilities = validatedData.queueData.vulnerabilities.length > 0;
// Rule 4: Both exist, queue valid and populated
// Rule 5: Both exist, queue valid but empty
return Object.freeze({
shouldExploit: hasVulnerabilities,
shouldRetry: false,
vulnerabilityCount: validatedData.queueData.vulnerabilities.length,
vulnType: validatedData.vulnType
});
};
// Main functional validation pipeline
export const validateQueueAndDeliverable = async (vulnType, sourceDir) =>
await pipe(
() => createPaths(vulnType, sourceDir),
checkFileExistence,
validateExistenceRules,
validateQueueContent,
determineExploitationDecision
)();
// Pure function to safely validate (returns result instead of throwing)
export const safeValidateQueueAndDeliverable = async (vulnType, sourceDir) => {
try {
const result = await validateQueueAndDeliverable(vulnType, sourceDir);
return { success: true, data: result };
} catch (error) {
return { success: false, error };
}
};
+722
View File
@@ -0,0 +1,722 @@
import { fs, path } from 'zx';
import chalk from 'chalk';
import crypto from 'crypto';
import { PentestError } from './error-handling.js';
// Generate a session-based log folder path
export const generateSessionLogPath = (webUrl, sessionId) => {
// Create a hash of the webUrl for uniqueness while keeping it readable
const urlHash = crypto.createHash('md5').update(webUrl).digest('hex').substring(0, 8);
const hostname = new URL(webUrl).hostname.replace(/[^a-zA-Z0-9-]/g, '-');
const shortSessionId = sessionId.substring(0, 8);
const sessionFolderName = `${hostname}_${urlHash}_${shortSessionId}`;
return path.join(process.cwd(), 'agent-logs', sessionFolderName);
};
// Mutex for session file operations to prevent race conditions
class SessionMutex {
constructor() {
this.locks = new Map();
}
async lock(sessionId) {
if (this.locks.has(sessionId)) {
// Wait for existing lock to be released
await this.locks.get(sessionId);
}
let resolve;
const promise = new Promise(r => resolve = r);
this.locks.set(sessionId, promise);
return () => {
this.locks.delete(sessionId);
resolve();
};
}
}
const sessionMutex = new SessionMutex();
// Agent definitions according to PRD
export const AGENTS = Object.freeze({
// Phase 1 - Pre-reconnaissance
'pre-recon': {
name: 'pre-recon',
displayName: 'Pre-recon agent',
phase: 'pre-reconnaissance',
order: 1,
prerequisites: []
},
// Phase 2 - Reconnaissance
'recon': {
name: 'recon',
displayName: 'Recon agent',
phase: 'reconnaissance',
order: 2,
prerequisites: ['pre-recon']
},
// Phase 3 - Vulnerability Analysis
'injection-vuln': {
name: 'injection-vuln',
displayName: 'Injection vuln agent',
phase: 'vulnerability-analysis',
order: 3,
prerequisites: ['recon']
},
'xss-vuln': {
name: 'xss-vuln',
displayName: 'XSS vuln agent',
phase: 'vulnerability-analysis',
order: 4,
prerequisites: ['recon']
},
'auth-vuln': {
name: 'auth-vuln',
displayName: 'Auth vuln agent',
phase: 'vulnerability-analysis',
order: 5,
prerequisites: ['recon']
},
'ssrf-vuln': {
name: 'ssrf-vuln',
displayName: 'SSRF vuln agent',
phase: 'vulnerability-analysis',
order: 6,
prerequisites: ['recon']
},
'authz-vuln': {
name: 'authz-vuln',
displayName: 'Authz vuln agent',
phase: 'vulnerability-analysis',
order: 7,
prerequisites: ['recon']
},
// Phase 4 - Exploitation
'injection-exploit': {
name: 'injection-exploit',
displayName: 'Injection exploit agent',
phase: 'exploitation',
order: 8,
prerequisites: ['injection-vuln']
},
'xss-exploit': {
name: 'xss-exploit',
displayName: 'XSS exploit agent',
phase: 'exploitation',
order: 9,
prerequisites: ['xss-vuln']
},
'auth-exploit': {
name: 'auth-exploit',
displayName: 'Auth exploit agent',
phase: 'exploitation',
order: 10,
prerequisites: ['auth-vuln']
},
'ssrf-exploit': {
name: 'ssrf-exploit',
displayName: 'SSRF exploit agent',
phase: 'exploitation',
order: 11,
prerequisites: ['ssrf-vuln']
},
'authz-exploit': {
name: 'authz-exploit',
displayName: 'Authz exploit agent',
phase: 'exploitation',
order: 12,
prerequisites: ['authz-vuln']
},
// Phase 5 - Reporting
'report': {
name: 'report',
displayName: 'Report agent',
phase: 'reporting',
order: 13,
prerequisites: ['authz-exploit']
}
});
// Phase definitions
export const PHASES = Object.freeze({
'pre-reconnaissance': ['pre-recon'],
'reconnaissance': ['recon'],
'vulnerability-analysis': ['injection-vuln', 'xss-vuln', 'auth-vuln', 'ssrf-vuln', 'authz-vuln'],
'exploitation': ['injection-exploit', 'xss-exploit', 'auth-exploit', 'ssrf-exploit', 'authz-exploit'],
'reporting': ['report']
});
// Session store file path
const STORE_FILE = path.join(process.cwd(), '.shannon-store.json');
// Load sessions from store file
const loadSessions = async () => {
try {
if (!await fs.pathExists(STORE_FILE)) {
return { sessions: {} };
}
const content = await fs.readFile(STORE_FILE, 'utf8');
const store = JSON.parse(content);
// Validate store structure
if (!store || typeof store !== 'object' || !store.sessions) {
console.log(chalk.yellow('⚠️ Invalid session store format, creating new store'));
return { sessions: {} };
}
return store;
} catch (error) {
console.log(chalk.yellow(`⚠️ Failed to load session store: ${error.message}, creating new store`));
return { sessions: {} };
}
};
// Save sessions to store file atomically
const saveSessions = async (store) => {
try {
const tempFile = `${STORE_FILE}.tmp`;
await fs.writeJSON(tempFile, store, { spaces: 2 });
await fs.move(tempFile, STORE_FILE, { overwrite: true });
} catch (error) {
throw new PentestError(
`Failed to save session store: ${error.message}`,
'filesystem',
false,
{ storeFile: STORE_FILE, originalError: error.message }
);
}
};
// Find existing session for the same web URL and repository path
const findExistingSession = async (webUrl, targetRepo) => {
const store = await loadSessions();
const sessions = Object.values(store.sessions);
// Normalize paths for comparison
const normalizedTargetRepo = path.resolve(targetRepo);
// Look for existing session with same webUrl and targetRepo
const existingSession = sessions.find(session => {
const normalizedSessionRepo = path.resolve(session.targetRepo || session.repoPath);
return session.webUrl === webUrl && normalizedSessionRepo === normalizedTargetRepo;
});
return existingSession;
};
// Generate session ID as unique UUID
const generateSessionId = () => {
// Always generate a unique UUID for each session
return crypto.randomUUID();
};
// Create new session or return existing one
export const createSession = async (webUrl, repoPath, configFile = null, targetRepo = null) => {
// Use targetRepo if provided, otherwise use repoPath
const resolvedTargetRepo = targetRepo || repoPath;
// Check for existing session first
const existingSession = await findExistingSession(webUrl, resolvedTargetRepo);
if (existingSession) {
// If session is not completed, reuse it
if (existingSession.status !== 'completed') {
console.log(chalk.blue(`📝 Reusing existing session: ${existingSession.id.substring(0, 8)}...`));
console.log(chalk.gray(` Progress: ${existingSession.completedAgents.length}/${Object.keys(AGENTS).length} agents completed`));
// Update last activity timestamp
await updateSession(existingSession.id, { lastActivity: new Date().toISOString() });
return existingSession;
}
// If completed, create a new session (allows re-running after completion)
console.log(chalk.gray(`Previous session was completed, creating new session...`));
}
const sessionId = generateSessionId();
const session = {
id: sessionId,
webUrl,
repoPath,
configFile,
targetRepo: resolvedTargetRepo,
status: 'in-progress',
completedAgents: [],
failedAgents: [],
checkpoints: {},
createdAt: new Date().toISOString(),
lastActivity: new Date().toISOString()
};
const store = await loadSessions();
store.sessions[sessionId] = session;
await saveSessions(store);
return session;
};
// Get session by ID
export const getSession = async (sessionId) => {
const store = await loadSessions();
return store.sessions[sessionId] || null;
};
// Update session
export const updateSession = async (sessionId, updates) => {
const store = await loadSessions();
if (!store.sessions[sessionId]) {
throw new PentestError(
`Session ${sessionId} not found`,
'validation',
false,
{ sessionId }
);
}
store.sessions[sessionId] = {
...store.sessions[sessionId],
...updates,
lastActivity: new Date().toISOString()
};
await saveSessions(store);
return store.sessions[sessionId];
};
// List all sessions
const listSessions = async () => {
const store = await loadSessions();
return Object.values(store.sessions);
};
// Interactive session selection
export const selectSession = async () => {
const sessions = await listSessions();
if (sessions.length === 0) {
throw new PentestError(
'No pentest sessions found. Run a normal pentest first to create a session.',
'validation',
false
);
}
if (sessions.length === 1) {
return sessions[0];
}
// Display session options
console.log(chalk.cyan('\nMultiple pentest sessions found:\n'));
sessions.forEach((session, index) => {
const completedCount = session.completedAgents.length;
const totalAgents = Object.keys(AGENTS).length;
const timeAgo = getTimeAgo(session.lastActivity);
// Use dynamic status calculation instead of stored status
const { status } = getSessionStatus(session);
const statusColor = status === 'completed' ? chalk.green : chalk.blue;
const statusIcon = status === 'completed' ? '✅' : '🔄';
console.log(statusColor(`${index + 1}) ${new URL(session.webUrl).hostname} + ${path.basename(session.repoPath)} [${status}]`));
console.log(chalk.gray(` Last activity: ${timeAgo}, Completed: ${completedCount}/${totalAgents} agents`));
console.log(chalk.gray(` Session ID: ${session.id}`));
if (session.configFile) {
console.log(chalk.gray(` Config: ${session.configFile}`));
}
console.log(); // Empty line between sessions
});
// Get user selection
const { createInterface } = await import('readline');
const readline = createInterface({
input: process.stdin,
output: process.stdout
});
return new Promise((resolve, reject) => {
readline.question(chalk.cyan(`Select session (1-${sessions.length}): `), (answer) => {
readline.close();
const choice = parseInt(answer);
if (isNaN(choice) || choice < 1 || choice > sessions.length) {
reject(new PentestError(
`Invalid selection. Please enter a number between 1 and ${sessions.length}`,
'validation',
false,
{ choice: answer }
));
} else {
resolve(sessions[choice - 1]);
}
});
});
};
// Validate agent name
export const validateAgent = (agentName) => {
if (!AGENTS[agentName]) {
throw new PentestError(
`Agent '${agentName}' not recognized. Use --list-agents to see valid names.`,
'validation',
false,
{ agentName, validAgents: Object.keys(AGENTS) }
);
}
return AGENTS[agentName];
};
// Validate agent range
export const validateAgentRange = (startAgent, endAgent) => {
const start = validateAgent(startAgent);
const end = validateAgent(endAgent);
if (start.order >= end.order) {
throw new PentestError(
`End agent '${endAgent}' must come after start agent '${startAgent}' in sequence.`,
'validation',
false,
{ startAgent, endAgent, startOrder: start.order, endOrder: end.order }
);
}
// Get all agents in range
const agentList = Object.values(AGENTS)
.filter(agent => agent.order >= start.order && agent.order <= end.order)
.sort((a, b) => a.order - b.order);
return agentList;
};
// Validate phase name
export const validatePhase = (phaseName) => {
if (!PHASES[phaseName]) {
throw new PentestError(
`Phase '${phaseName}' not recognized. Valid phases: ${Object.keys(PHASES).join(', ')}`,
'validation',
false,
{ phaseName, validPhases: Object.keys(PHASES) }
);
}
return PHASES[phaseName].map(agentName => AGENTS[agentName]);
};
// Check prerequisites for an agent
export const checkPrerequisites = (session, agentName) => {
const agent = validateAgent(agentName);
const missingPrereqs = agent.prerequisites.filter(prereq =>
!session.completedAgents.includes(prereq)
);
if (missingPrereqs.length > 0) {
throw new PentestError(
`Cannot run '${agentName}': prerequisite agent(s) not completed: ${missingPrereqs.join(', ')}`,
'validation',
false,
{ agentName, missingPrerequisites: missingPrereqs, completedAgents: session.completedAgents }
);
}
return true;
};
// Get next suggested agent
export const getNextAgent = (session) => {
const completed = new Set(session.completedAgents);
const failed = new Set(session.failedAgents);
// Find the next agent that hasn't been completed and has all prerequisites
const nextAgent = Object.values(AGENTS)
.sort((a, b) => a.order - b.order)
.find(agent => {
if (completed.has(agent.name)) return false; // Already completed
// Check if all prerequisites are completed
const prereqsMet = agent.prerequisites.every(prereq => completed.has(prereq));
return prereqsMet;
});
return nextAgent;
};
// Mark agent as completed with checkpoint
export const markAgentCompleted = async (sessionId, agentName, checkpointCommit, timingData = null, costData = null, validationData = null) => {
// Use mutex to prevent race conditions during parallel agent execution
const unlock = await sessionMutex.lock(sessionId);
try {
// Get fresh session data under lock
const session = await getSession(sessionId);
if (!session) {
throw new PentestError(`Session ${sessionId} not found`, 'validation', false);
}
validateAgent(agentName);
const updates = {
completedAgents: [...new Set([...session.completedAgents, agentName])],
failedAgents: session.failedAgents.filter(agent => agent !== agentName),
checkpoints: {
...session.checkpoints,
[agentName]: checkpointCommit
}
};
// Update timing data if provided
if (timingData) {
updates.timingBreakdown = {
...session.timingBreakdown,
agents: {
...session.timingBreakdown?.agents,
[agentName]: timingData
}
};
}
// Update cost data if provided
if (costData) {
const existingCost = session.costBreakdown?.total || 0;
updates.costBreakdown = {
total: existingCost + costData,
agents: {
...session.costBreakdown?.agents,
[agentName]: costData
}
};
}
// Update validation data if provided (for vulnerability agents)
if (validationData && agentName.includes('-vuln')) {
updates.validationResults = {
...session.validationResults,
[agentName]: validationData
};
}
// Check if all agents are now completed and update session status
const totalAgents = Object.keys(AGENTS).length;
if (updates.completedAgents.length === totalAgents) {
updates.status = 'completed';
}
return await updateSession(sessionId, updates);
} finally {
// Always release the lock, even if an error occurs
unlock();
}
};
// Mark agent as failed
export const markAgentFailed = async (sessionId, agentName) => {
const session = await getSession(sessionId);
if (!session) {
throw new PentestError(`Session ${sessionId} not found`, 'validation', false);
}
validateAgent(agentName);
const updates = {
failedAgents: [...new Set([...session.failedAgents, agentName])],
completedAgents: session.completedAgents.filter(agent => agent !== agentName)
};
return await updateSession(sessionId, updates);
};
// Get time ago helper
const getTimeAgo = (timestamp) => {
const now = new Date();
const past = new Date(timestamp);
const diffMs = now - past;
const diffMins = Math.floor(diffMs / (1000 * 60));
const diffHours = Math.floor(diffMs / (1000 * 60 * 60));
const diffDays = Math.floor(diffMs / (1000 * 60 * 60 * 24));
if (diffMins < 60) {
return `${diffMins}m ago`;
} else if (diffHours < 24) {
return `${diffHours}h ago`;
} else {
return `${diffDays}d ago`;
}
};
// Get session status summary
export const getSessionStatus = (session) => {
const totalAgents = Object.keys(AGENTS).length;
const completedCount = session.completedAgents.length;
const failedCount = session.failedAgents.length;
let status;
if (completedCount === totalAgents) {
status = 'completed';
} else if (failedCount > 0) {
status = 'failed';
} else {
status = 'in-progress';
}
return {
status,
completedCount,
totalAgents,
failedCount,
completionPercentage: Math.round((completedCount / totalAgents) * 100)
};
};
// Calculate comprehensive summary statistics for vulnerability analysis
export const calculateVulnerabilityAnalysisSummary = (session) => {
const vulnAgents = PHASES['vulnerability-analysis'];
const completedVulnAgents = session.completedAgents.filter(agent => vulnAgents.includes(agent));
const validationResults = session.validationResults || {};
let totalVulnerabilities = 0;
let agentsWithVulns = 0;
for (const agent of completedVulnAgents) {
const validation = validationResults[agent];
if (validation?.vulnerabilityCount > 0) {
totalVulnerabilities += validation.vulnerabilityCount;
agentsWithVulns++;
}
}
return Object.freeze({
totalAnalyses: completedVulnAgents.length,
totalVulnerabilities,
agentsWithVulnerabilities: agentsWithVulns,
successRate: completedVulnAgents.length > 0 ? (agentsWithVulns / completedVulnAgents.length) * 100 : 0,
exploitationCandidates: Object.values(validationResults).filter(v => v?.shouldExploit).length
});
};
// Calculate exploitation summary statistics
export const calculateExploitationSummary = (session) => {
const exploitAgents = PHASES['exploitation'];
const completedExploitAgents = session.completedAgents.filter(agent => exploitAgents.includes(agent));
const validationResults = session.validationResults || {};
// Count how many exploitation agents were eligible to run
const eligibleExploits = exploitAgents.filter(agentName => {
const vulnAgentName = agentName.replace('-exploit', '-vuln');
return validationResults[vulnAgentName]?.shouldExploit;
});
return Object.freeze({
totalAttempts: completedExploitAgents.length,
eligibleExploits: eligibleExploits.length,
skippedExploits: eligibleExploits.length - completedExploitAgents.length,
successRate: eligibleExploits.length > 0 ? (completedExploitAgents.length / eligibleExploits.length) * 100 : 0
});
};
// Rollback session to specific agent checkpoint
export const rollbackToAgent = async (sessionId, targetAgent) => {
const session = await getSession(sessionId);
if (!session) {
throw new PentestError(`Session ${sessionId} not found`, 'validation', false);
}
validateAgent(targetAgent);
if (!session.checkpoints[targetAgent]) {
throw new PentestError(
`No checkpoint found for agent '${targetAgent}' in session history`,
'validation',
false,
{ targetAgent, availableCheckpoints: Object.keys(session.checkpoints) }
);
}
// Find agents that need to be removed (those after the target agent)
const targetOrder = AGENTS[targetAgent].order;
const agentsToRemove = Object.values(AGENTS)
.filter(agent => agent.order > targetOrder)
.map(agent => agent.name);
const updates = {
completedAgents: session.completedAgents.filter(agent => !agentsToRemove.includes(agent)),
failedAgents: session.failedAgents.filter(agent => !agentsToRemove.includes(agent)),
checkpoints: Object.fromEntries(
Object.entries(session.checkpoints).filter(([agent]) => !agentsToRemove.includes(agent))
)
};
// Clean up timing data for rolled-back agents
if (session.timingBreakdown?.agents) {
const filteredTimingAgents = Object.fromEntries(
Object.entries(session.timingBreakdown.agents).filter(([agent]) => !agentsToRemove.includes(agent))
);
updates.timingBreakdown = {
...session.timingBreakdown,
agents: filteredTimingAgents
};
}
// Clean up cost data for rolled-back agents and recalculate total
if (session.costBreakdown?.agents) {
const filteredCostAgents = Object.fromEntries(
Object.entries(session.costBreakdown.agents).filter(([agent]) => !agentsToRemove.includes(agent))
);
const recalculatedTotal = Object.values(filteredCostAgents).reduce((sum, cost) => sum + cost, 0);
updates.costBreakdown = {
total: recalculatedTotal,
agents: filteredCostAgents
};
}
return await updateSession(sessionId, updates);
};
// Delete a specific session by ID
export const deleteSession = async (sessionId) => {
const store = await loadSessions();
if (!store.sessions[sessionId]) {
throw new PentestError(
`Session ${sessionId} not found`,
'validation',
false,
{ sessionId }
);
}
const deletedSession = store.sessions[sessionId];
delete store.sessions[sessionId];
await saveSessions(store);
return deletedSession;
};
// Delete all sessions (remove entire storage)
export const deleteAllSessions = async () => {
try {
if (await fs.pathExists(STORE_FILE)) {
await fs.remove(STORE_FILE);
return true;
}
return false; // File didn't exist
} catch (error) {
throw new PentestError(
`Failed to delete session storage: ${error.message}`,
'filesystem',
false,
{ storeFile: STORE_FILE, originalError: error.message }
);
}
};
+136
View File
@@ -0,0 +1,136 @@
import { fs, path, os } from 'zx';
import chalk from 'chalk';
import { PentestError, logError } from '../error-handling.js';
// Pure function: Save deliverables permanently to user directory
export async function savePermanentDeliverables(sourceDir, webUrl, repoPath, session, timingBreakdown, costBreakdown) {
try {
// Simple universal approach - try Documents, fallback to home
const homeDir = os.homedir();
const documentsDir = path.join(homeDir, 'Documents');
// Use Documents if it exists, otherwise use home directory
const baseDir = await fs.pathExists(documentsDir) ? documentsDir : homeDir;
const permanentBaseDir = path.join(baseDir, 'pentest-deliverables');
// Generate directory name from repo path and web URL
const repoName = path.basename(repoPath);
const webDomain = new URL(webUrl).hostname.replace(/[^a-zA-Z0-9-]/g, '-');
const timestamp = new Date().toISOString().replace(/[-:]/g, '').replace(/T/, '-').split('.')[0];
const dirName = `${webDomain}_${repoName}_${timestamp}`;
const permanentDir = path.join(permanentBaseDir, dirName);
// Ensure base directory exists
await fs.ensureDir(permanentBaseDir);
// Create the specific pentest directory
await fs.ensureDir(permanentDir);
// Copy deliverables folder if it exists
const deliverablesSource = path.join(sourceDir, 'deliverables');
const deliverablesDest = path.join(permanentDir, 'deliverables');
if (await fs.pathExists(deliverablesSource)) {
await fs.copy(deliverablesSource, deliverablesDest, { overwrite: true });
}
// Save metadata with session information
const metadata = {
session: {
id: session.id,
webUrl,
repoPath,
configFile: session.configFile,
status: session.status,
completedAgents: session.completedAgents,
createdAt: session.createdAt,
completedAt: new Date().toISOString()
},
timing: timingBreakdown,
cost: costBreakdown,
sourceDirectory: sourceDir,
savedAt: new Date().toISOString()
};
await fs.writeJSON(path.join(permanentDir, 'metadata.json'), metadata, { spaces: 2 });
// Copy prompts directory for reproducibility
const promptsSource = path.join(import.meta.dirname, '..', '..', 'prompts');
const promptsDest = path.join(permanentDir, 'prompts');
if (await fs.pathExists(promptsSource)) {
await fs.copy(promptsSource, promptsDest, { overwrite: true });
}
console.log(chalk.green(`✅ Deliverables saved to permanent location: ${permanentDir}`));
return permanentDir;
} catch (error) {
// Non-fatal error - log but don't throw
console.log(chalk.yellow(`⚠️ Failed to save permanent deliverables: ${error.message}`));
return null;
}
}
// Pure function: Save run metadata for debugging and reproducibility
export async function saveRunMetadata(sourceDir, webUrl, repoPath) {
console.log(chalk.blue('💾 Saving run metadata...'));
try {
// Read package.json to get version info with error handling
const packagePath = path.join(import.meta.dirname, '..', '..', 'package.json');
let packageJson;
try {
packageJson = await fs.readJSON(packagePath);
} catch (packageError) {
throw new PentestError(
`Cannot read package.json: ${packageError.message}`,
'filesystem',
false,
{ packagePath, originalError: packageError.message }
);
}
const metadata = {
timestamp: new Date().toISOString(),
targets: { webUrl, repoPath },
environment: {
nodeVersion: process.version,
platform: process.platform,
arch: process.arch,
cwd: process.cwd()
},
dependencies: {
claudeCodeVersion: packageJson.dependencies?.['@anthropic-ai/claude-code'] || 'unknown',
zxVersion: packageJson.dependencies?.['zx'] || 'unknown',
chalkVersion: packageJson.dependencies?.['chalk'] || 'unknown'
},
execution: {
args: process.argv,
env: {
PLAYWRIGHT_HEADLESS: process.env.PLAYWRIGHT_HEADLESS || 'true',
NODE_ENV: process.env.NODE_ENV
}
}
};
const metadataPath = path.join(sourceDir, 'run-metadata.json');
await fs.writeJSON(metadataPath, metadata, { spaces: 2 });
console.log(chalk.green(`✅ Run metadata saved to: ${metadataPath}`));
return metadata;
} catch (error) {
if (error instanceof PentestError) {
await logError(error, 'Saving run metadata', sourceDir);
throw error; // Re-throw PentestError to be handled by caller
}
const metadataError = new PentestError(
`Run metadata saving failed: ${error.message}`,
'filesystem',
false,
{ sourceDir, originalError: error.message }
);
await logError(metadataError, 'Saving run metadata', sourceDir);
throw metadataError;
}
}
+146
View File
@@ -0,0 +1,146 @@
import { $, fs, path } from 'zx';
import chalk from 'chalk';
import { PentestError, logError } from '../error-handling.js';
// Pure function: Setup MCP with multiple isolated Playwright instances
export async function setupMCP(sourceDir) {
console.log(chalk.blue('🎭 Setting up 5 isolated Playwright MCP instances...'));
// Set headless mode for all instances
process.env.PLAYWRIGHT_HEADLESS = 'true';
try {
// Clean slate - remove any existing instances
const instancesToRemove = ['playwright', ...Array.from({length: 5}, (_, i) => `playwright-agent${i + 1}`)];
for (const instance of instancesToRemove) {
try {
await $`claude mcp remove ${instance} --scope user 2>/dev/null`;
} catch {
// Silent ignore - instance might not exist
}
}
// Ensure screenshot directories exist
await fs.ensureDir(path.join(sourceDir, 'screenshots'));
// Create 5 isolated instances sequentially to avoid config conflicts
for (let i = 1; i <= 5; i++) {
const instanceName = `playwright-agent${i}`;
const screenshotDir = path.join(sourceDir, 'screenshots', instanceName);
const userDataDir = `/tmp/${instanceName}`;
// Ensure both directories exist
await fs.ensureDir(screenshotDir);
await fs.ensureDir(userDataDir);
try {
await $`claude mcp add ${instanceName} --scope user -- npx @playwright/mcp@latest --isolated --user-data-dir ${userDataDir} --output-dir ${screenshotDir}`;
console.log(chalk.green(`${instanceName} configured`));
} catch (error) {
if (error.message?.includes('already exists')) {
console.log(chalk.gray(` ⏭️ ${instanceName} already exists`));
} else {
console.log(chalk.yellow(` ⚠️ ${instanceName} failed: ${error.message}, continuing...`));
}
}
}
console.log(chalk.green('✅ All 5 Playwright MCP instances ready for parallel execution'));
} catch (error) {
// All MCP setup failures are fatal
const mcpError = new PentestError(
`Critical MCP setup failure: ${error.message}. Browser automation required for pentesting.`,
'tool',
false,
{ sourceDir, originalError: error.message }
);
await logError(mcpError, 'MCP setup failure', sourceDir);
throw mcpError;
}
}
// Pure function: Cleanup MCP instances
export async function cleanupMCP() {
console.log(chalk.blue('🧹 Cleaning up Playwright MCP instances...'));
try {
// Remove all instances (including legacy 'playwright' if it exists)
const instancesToRemove = ['playwright', ...Array.from({length: 5}, (_, i) => `playwright-agent${i + 1}`)];
for (const instance of instancesToRemove) {
try {
await $`claude mcp remove ${instance} --scope user 2>/dev/null`;
console.log(chalk.gray(` 🗑️ Removed ${instance}`));
} catch {
// Silent ignore - instance might not exist
}
}
console.log(chalk.green('✅ Playwright MCP cleanup complete'));
} catch (error) {
// Non-fatal - log warning but don't throw
console.log(chalk.yellow(`⚠️ MCP cleanup warning: ${error.message}`));
}
}
// Pure function: Setup local repository for testing
export async function setupLocalRepo(repoPath) {
try {
const sourceDir = path.resolve(repoPath);
// Setup MCP in the local repository - critical for browser automation
await setupMCP(sourceDir);
// Initialize git repository if not already initialized and create checkpoint
try {
// Check if it's already a git repository
const isGitRepo = await fs.pathExists(path.join(sourceDir, '.git'));
if (!isGitRepo) {
await $`cd ${sourceDir} && git init`;
console.log(chalk.blue('✅ Git repository initialized'));
}
// Configure git for pentest agent
await $`cd ${sourceDir} && git config user.name "Pentest Agent"`;
await $`cd ${sourceDir} && git config user.email "agent@localhost"`;
// Create initial checkpoint
await $`cd ${sourceDir} && git add -A && git commit -m "Initial checkpoint: Local repository setup" --allow-empty`;
console.log(chalk.green('✅ Initial checkpoint created'));
} catch (gitError) {
console.log(chalk.yellow(`⚠️ Git setup warning: ${gitError.message}`));
// Non-fatal - continue without Git setup
}
// Copy TOTP generation script to local repository for agent accessibility
try {
const totpScriptSource = path.join(import.meta.dirname, '..', '..', 'login_resources', 'generate-totp-standalone.mjs');
const totpScriptDest = path.join(sourceDir, 'generate-totp.mjs');
if (await fs.pathExists(totpScriptSource)) {
await fs.copy(totpScriptSource, totpScriptDest);
await fs.chmod(totpScriptDest, '755'); // Make executable
console.log(chalk.green('✅ TOTP generation script (standalone) copied to target repository'));
} else {
console.log(chalk.yellow('⚠️ TOTP script not found, authentication may fail if TOTP is required'));
}
} catch (totpError) {
console.log(chalk.yellow(`⚠️ Failed to copy TOTP script: ${totpError.message}`));
// Non-fatal - continue without TOTP script
}
return sourceDir;
} catch (error) {
if (error instanceof PentestError) {
throw error;
}
throw new PentestError(
`Local repository setup failed: ${error.message}`,
'filesystem',
false,
{ repoPath, originalError: error.message }
);
}
}
+78
View File
@@ -0,0 +1,78 @@
import figlet from 'figlet';
import gradient from 'gradient-string';
import boxen from 'boxen';
import chalk from 'chalk';
import { fs, path } from 'zx';
export const displaySplashScreen = async () => {
try {
// Get version info from package.json
const packagePath = path.join(import.meta.dirname, '..', 'package.json');
const packageJson = await fs.readJSON(packagePath);
const version = packageJson.version || '1.0.0';
// Create the main SHANNON ASCII art
const shannonText = figlet.textSync('SHANNON', {
font: 'ANSI Shadow',
horizontalLayout: 'default',
verticalLayout: 'default'
});
// Apply golden gradient to SHANNON
const gradientShannon = gradient(['#F4C542', '#FFD700'])(shannonText);
// Create minimal tagline with styling
const tagline = chalk.bold.white('AI Penetration Testing Framework');
const versionInfo = chalk.gray(`v${version}`);
// Build the complete splash content
const content = [
gradientShannon,
'',
chalk.bold.cyan(' ╔════════════════════════════════════╗'),
chalk.bold.cyan(' ║') + ' ' + tagline + ' ' + chalk.bold.cyan('║'),
chalk.bold.cyan(' ╚════════════════════════════════════╝'),
'',
` ${versionInfo}`,
'',
chalk.bold.yellow(' 🔐 DEFENSIVE SECURITY ONLY 🔐'),
''
].join('\n');
// Create boxed output with minimal styling
const boxedContent = boxen(content, {
padding: 1,
margin: 1,
borderStyle: 'double',
borderColor: 'cyan',
dimBorder: false
});
// Clear screen and display splash
console.clear();
console.log(boxedContent);
// Add loading animation
const loadingFrames = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'];
let frameIndex = 0;
return new Promise((resolve) => {
const loadingInterval = setInterval(() => {
process.stdout.write(`\r${chalk.cyan(loadingFrames[frameIndex])} ${chalk.dim('Initializing systems...')}`);
frameIndex = (frameIndex + 1) % loadingFrames.length;
}, 100);
setTimeout(() => {
clearInterval(loadingInterval);
process.stdout.write(`\r${chalk.green('✓')} ${chalk.dim('Systems initialized. ')}\n\n`);
resolve();
}, 2000);
});
} catch (error) {
// Fallback to simple splash if anything fails
console.log(chalk.cyan.bold('\n🚀 SHANNON - AI Penetration Testing Framework\n'));
console.log(chalk.yellow('⚠️ Could not load full splash screen:', error.message));
console.log('');
}
};
+64
View File
@@ -0,0 +1,64 @@
import { $ } from 'zx';
import chalk from 'chalk';
// Check availability of required tools
export const checkToolAvailability = async () => {
const tools = ['nmap', 'subfinder', 'whatweb', 'schemathesis'];
const availability = {};
console.log(chalk.blue('🔧 Checking tool availability...'));
for (const tool of tools) {
try {
await $`command -v ${tool}`;
availability[tool] = true;
console.log(chalk.green(`${tool} - available`));
} catch {
availability[tool] = false;
console.log(chalk.yellow(` ⚠️ ${tool} - not found`));
}
}
return availability;
};
// Handle missing tools with user-friendly messages
export const handleMissingTools = (toolAvailability) => {
const missing = Object.entries(toolAvailability)
.filter(([tool, available]) => !available)
.map(([tool]) => tool);
if (missing.length > 0) {
console.log(chalk.yellow(`\n⚠️ Missing tools: ${missing.join(', ')}`));
console.log(chalk.gray('Some functionality will be limited. Install missing tools for full capability.'));
// Provide installation hints
const installHints = {
'nmap': 'brew install nmap (macOS) or apt install nmap (Ubuntu)',
'subfinder': 'go install -v github.com/projectdiscovery/subfinder/v2/cmd/subfinder@latest',
'whatweb': 'gem install whatweb',
'schemathesis': 'pip install schemathesis'
};
console.log(chalk.gray('\nInstallation hints:'));
missing.forEach(tool => {
if (installHints[tool]) {
console.log(chalk.gray(` ${tool}: ${installHints[tool]}`));
}
});
console.log('');
}
return missing;
};
// Check if a specific tool is available
const isToolAvailable = async (toolName) => {
try {
await $`command -v ${toolName}`;
return true;
} catch {
return false;
}
};
+195
View File
@@ -0,0 +1,195 @@
import { $ } from 'zx';
import chalk from 'chalk';
// Global git operations semaphore to prevent index.lock conflicts during parallel execution
class GitSemaphore {
constructor() {
this.queue = [];
this.running = false;
}
async acquire() {
return new Promise((resolve) => {
this.queue.push(resolve);
this.process();
});
}
release() {
this.running = false;
this.process();
}
process() {
if (!this.running && this.queue.length > 0) {
this.running = true;
const resolve = this.queue.shift();
resolve();
}
}
}
const gitSemaphore = new GitSemaphore();
// Execute git commands with retry logic for index.lock conflicts
export const executeGitCommandWithRetry = async (commandArgs, sourceDir, description, maxRetries = 5) => {
await gitSemaphore.acquire();
try {
for (let attempt = 1; attempt <= maxRetries; attempt++) {
try {
// Handle both array and string commands
let result;
if (Array.isArray(commandArgs)) {
// For arrays like ['git', 'status', '--porcelain'], execute parts separately
const [cmd, ...args] = commandArgs;
result = await $`cd ${sourceDir} && ${cmd} ${args}`;
} else {
// For string commands
result = await $`cd ${sourceDir} && ${commandArgs}`;
}
return result;
} catch (error) {
const isLockError = error.message.includes('index.lock') ||
error.message.includes('unable to lock') ||
error.message.includes('Another git process') ||
error.message.includes('fatal: Unable to create') ||
error.message.includes('fatal: index file');
if (isLockError && attempt < maxRetries) {
const delay = Math.pow(2, attempt - 1) * 1000; // Exponential backoff: 1s, 2s, 4s, 8s, 16s
console.log(chalk.yellow(` ⚠️ Git lock conflict during ${description} (attempt ${attempt}/${maxRetries}). Retrying in ${delay}ms...`));
await new Promise(resolve => setTimeout(resolve, delay));
continue;
}
throw error;
}
}
} finally {
gitSemaphore.release();
}
};
// Pure functions for Git workspace management
export const cleanWorkspace = async (sourceDir, reason = 'clean start') => {
console.log(chalk.blue(` 🧹 Cleaning workspace for ${reason}`));
try {
// Check for uncommitted changes
const status = await $`cd ${sourceDir} && git status --porcelain`;
const hasChanges = status.stdout.trim().length > 0;
if (hasChanges) {
// Show what we're about to remove
const changes = status.stdout.trim().split('\n').filter(line => line.length > 0);
console.log(chalk.yellow(` 🔄 Rolling back workspace for ${reason}`));
await $`cd ${sourceDir} && git reset --hard HEAD`;
await $`cd ${sourceDir} && git clean -fd`;
console.log(chalk.yellow(` ✅ Rollback completed - removed ${changes.length} contaminated changes:`));
changes.slice(0, 3).forEach(change => console.log(chalk.gray(` ${change}`)));
if (changes.length > 3) {
console.log(chalk.gray(` ... and ${changes.length - 3} more files`));
}
} else {
console.log(chalk.blue(` ✅ Workspace already clean (no changes to remove)`));
}
return { success: true, hadChanges: hasChanges };
} catch (error) {
console.log(chalk.yellow(` ⚠️ Workspace cleanup failed: ${error.message}`));
return { success: false, error };
}
};
export const createGitCheckpoint = async (sourceDir, description, attempt) => {
console.log(chalk.blue(` 📍 Creating checkpoint for ${description} (attempt ${attempt})`));
try {
// Only clean workspace on retry attempts (attempt > 1), not on first attempts
// This preserves deliverables between agents while still cleaning on actual retries
if (attempt > 1) {
const cleanResult = await cleanWorkspace(sourceDir, `${description} (retry cleanup)`);
if (!cleanResult.success) {
console.log(chalk.yellow(` ⚠️ Workspace cleanup failed, continuing anyway: ${cleanResult.error.message}`));
}
}
// Check for uncommitted changes with retry logic
const status = await executeGitCommandWithRetry(['git', 'status', '--porcelain'], sourceDir, 'status check');
const hasChanges = status.stdout.trim().length > 0;
// Stage changes with retry logic
await executeGitCommandWithRetry(['git', 'add', '-A'], sourceDir, 'staging changes');
// Create commit with retry logic
await executeGitCommandWithRetry(['git', 'commit', '-m', `📍 Checkpoint: ${description} (attempt ${attempt})`, '--allow-empty'], sourceDir, 'creating commit');
if (hasChanges) {
console.log(chalk.blue(` ✅ Checkpoint created with uncommitted changes staged`));
} else {
console.log(chalk.blue(` ✅ Empty checkpoint created (no workspace changes)`));
}
return { success: true };
} catch (error) {
console.log(chalk.yellow(` ⚠️ Checkpoint creation failed after retries: ${error.message}`));
return { success: false, error };
}
};
export const commitGitSuccess = async (sourceDir, description) => {
console.log(chalk.green(` 💾 Committing successful results for ${description}`));
try {
// Check what we're about to commit with retry logic
const status = await executeGitCommandWithRetry(['git', 'status', '--porcelain'], sourceDir, 'status check for success commit');
const changes = status.stdout.trim().split('\n').filter(line => line.length > 0);
// Stage changes with retry logic
await executeGitCommandWithRetry(['git', 'add', '-A'], sourceDir, 'staging changes for success commit');
// Create success commit with retry logic
await executeGitCommandWithRetry(['git', 'commit', '-m', `${description}: completed successfully`, '--allow-empty'], sourceDir, 'creating success commit');
if (changes.length > 0) {
console.log(chalk.green(` ✅ Success commit created with ${changes.length} file changes:`));
changes.slice(0, 5).forEach(change => console.log(chalk.gray(` ${change}`)));
if (changes.length > 5) {
console.log(chalk.gray(` ... and ${changes.length - 5} more files`));
}
} else {
console.log(chalk.green(` ✅ Empty success commit created (agent made no file changes)`));
}
return { success: true };
} catch (error) {
console.log(chalk.yellow(` ⚠️ Success commit failed after retries: ${error.message}`));
return { success: false, error };
}
};
export const rollbackGitWorkspace = async (sourceDir, reason = 'retry preparation') => {
console.log(chalk.yellow(` 🔄 Rolling back workspace for ${reason}`));
try {
// Show what we're about to remove with retry logic
const status = await executeGitCommandWithRetry(['git', 'status', '--porcelain'], sourceDir, 'status check for rollback');
const changes = status.stdout.trim().split('\n').filter(line => line.length > 0);
// Reset to HEAD with retry logic
await executeGitCommandWithRetry(['git', 'reset', '--hard', 'HEAD'], sourceDir, 'hard reset for rollback');
// Clean untracked files with retry logic
await executeGitCommandWithRetry(['git', 'clean', '-fd'], sourceDir, 'cleaning untracked files for rollback');
if (changes.length > 0) {
console.log(chalk.yellow(` ✅ Rollback completed - removed ${changes.length} contaminated changes:`));
changes.slice(0, 3).forEach(change => console.log(chalk.gray(` ${change}`)));
if (changes.length > 3) {
console.log(chalk.gray(` ... and ${changes.length - 3} more files`));
}
} else {
console.log(chalk.yellow(` ✅ Rollback completed - no changes to remove`));
}
return { success: true };
} catch (error) {
console.log(chalk.red(` ❌ Rollback failed after retries: ${error.message}`));
return { success: false, error };
}
};
+104
View File
@@ -0,0 +1,104 @@
import chalk from 'chalk';
// Timing utilities
export const formatDuration = (ms) => {
if (ms < 1000) return `${ms}ms`;
if (ms < 60000) return `${(ms / 1000).toFixed(1)}s`;
const minutes = Math.floor(ms / 60000);
const seconds = Math.floor((ms % 60000) / 1000);
return `${minutes}m ${seconds}s`;
};
export class Timer {
constructor(name) {
this.name = name;
this.startTime = Date.now();
this.endTime = null;
}
stop() {
this.endTime = Date.now();
return this.duration();
}
duration() {
const end = this.endTime || Date.now();
return end - this.startTime;
}
}
// Global timing and cost tracker
export const timingResults = {
total: null,
phases: {},
commands: {},
agents: {}
};
export const costResults = {
agents: {},
total: 0
};
// Function to display comprehensive timing summary
export const displayTimingSummary = () => {
const totalDuration = timingResults.total.stop();
console.log(chalk.cyan.bold('\n⏱️ TIMING SUMMARY'));
console.log(chalk.gray('─'.repeat(60)));
// Total execution time
console.log(chalk.cyan(`📊 Total Execution Time: ${formatDuration(totalDuration)}`));
console.log();
// Phase breakdown
if (Object.keys(timingResults.phases).length > 0) {
console.log(chalk.yellow.bold('🔍 Phase Breakdown:'));
let phaseTotal = 0;
for (const [phase, duration] of Object.entries(timingResults.phases)) {
const percentage = ((duration / totalDuration) * 100).toFixed(1);
console.log(chalk.yellow(` ${phase.padEnd(20)} ${formatDuration(duration).padStart(8)} (${percentage}%)`));
phaseTotal += duration;
}
console.log(chalk.gray(` ${'Phases Total'.padEnd(20)} ${formatDuration(phaseTotal).padStart(8)} (${((phaseTotal / totalDuration) * 100).toFixed(1)}%)`));
console.log();
}
// Command breakdown
if (Object.keys(timingResults.commands).length > 0) {
console.log(chalk.blue.bold('🖥️ Command Breakdown:'));
let commandTotal = 0;
for (const [command, duration] of Object.entries(timingResults.commands)) {
const percentage = ((duration / totalDuration) * 100).toFixed(1);
console.log(chalk.blue(` ${command.padEnd(20)} ${formatDuration(duration).padStart(8)} (${percentage}%)`));
commandTotal += duration;
}
console.log(chalk.gray(` ${'Commands Total'.padEnd(20)} ${formatDuration(commandTotal).padStart(8)} (${((commandTotal / totalDuration) * 100).toFixed(1)}%)`));
console.log();
}
// Agent breakdown
if (Object.keys(timingResults.agents).length > 0) {
console.log(chalk.magenta.bold('🤖 Agent Breakdown:'));
let agentTotal = 0;
for (const [agent, duration] of Object.entries(timingResults.agents)) {
const percentage = ((duration / totalDuration) * 100).toFixed(1);
const displayName = agent.replace(/-/g, ' ');
console.log(chalk.magenta(` ${displayName.padEnd(20)} ${formatDuration(duration).padStart(8)} (${percentage}%)`));
agentTotal += duration;
}
console.log(chalk.gray(` ${'Agents Total'.padEnd(20)} ${formatDuration(agentTotal).padStart(8)} (${((agentTotal / totalDuration) * 100).toFixed(1)}%)`));
}
// Cost breakdown
if (Object.keys(costResults.agents).length > 0) {
console.log(chalk.green.bold('\n💰 Cost Breakdown:'));
for (const [agent, cost] of Object.entries(costResults.agents)) {
const displayName = agent.replace(/-/g, ' ');
console.log(chalk.green(` ${displayName.padEnd(20)} $${cost.toFixed(4).padStart(8)}`));
}
console.log(chalk.gray(` ${'Total Cost'.padEnd(20)} $${costResults.total.toFixed(4).padStart(8)}`));
}
console.log(chalk.gray('─'.repeat(60)));
};
+238
View File
@@ -0,0 +1,238 @@
import { AGENTS } from '../session-manager.js';
/**
* Extract domain from URL for display
*/
function extractDomain(url) {
try {
const urlObj = new URL(url);
return urlObj.hostname || url.slice(0, 30);
} catch {
return url.slice(0, 30);
}
}
/**
* Summarize TodoWrite updates into clean progress indicators
*/
function summarizeTodoUpdate(input) {
if (!input?.todos || !Array.isArray(input.todos)) {
return null;
}
const todos = input.todos;
const completed = todos.filter(t => t.status === 'completed');
const inProgress = todos.filter(t => t.status === 'in_progress');
// Show recently completed tasks
if (completed.length > 0) {
const recent = completed[completed.length - 1];
return `${recent.content}`;
}
// Show current in-progress task
if (inProgress.length > 0) {
const current = inProgress[0];
return `🔄 ${current.content}`;
}
return null;
}
/**
* Get agent prefix for parallel execution
*/
export function getAgentPrefix(description) {
// Map agent names to their prefixes
const agentPrefixes = {
'injection-vuln': '[SQLi/Cmd]',
'xss-vuln': '[XSS]',
'auth-vuln': '[Auth]',
'authz-vuln': '[Authz]',
'ssrf-vuln': '[SSRF]',
'injection-exploit': '[SQLi/Cmd]',
'xss-exploit': '[XSS]',
'auth-exploit': '[Auth]',
'authz-exploit': '[Authz]',
'ssrf-exploit': '[SSRF]'
};
// First try to match by agent name directly
for (const [agentName, prefix] of Object.entries(agentPrefixes)) {
if (AGENTS[agentName] && description.includes(AGENTS[agentName].displayName)) {
return prefix;
}
}
// Fallback to partial matches for backwards compatibility
if (description.includes('injection')) return '[SQLi/Cmd]';
if (description.includes('xss')) return '[XSS]';
if (description.includes('authz')) return '[Authz]'; // Check authz before auth
if (description.includes('auth')) return '[Auth]';
if (description.includes('ssrf')) return '[SSRF]';
return '[Agent]';
}
/**
* Format browser tool calls into clean progress indicators
*/
function formatBrowserAction(toolCall) {
const toolName = toolCall.name;
const input = toolCall.input || {};
// Core Browser Operations
if (toolName === 'mcp__playwright__browser_navigate') {
const url = input.url || '';
const domain = extractDomain(url);
return `🌐 Navigating to ${domain}`;
}
if (toolName === 'mcp__playwright__browser_navigate_back') {
return `⬅️ Going back`;
}
// Page Interaction
if (toolName === 'mcp__playwright__browser_click') {
const element = input.element || 'element';
return `🖱️ Clicking ${element.slice(0, 25)}`;
}
if (toolName === 'mcp__playwright__browser_hover') {
const element = input.element || 'element';
return `👆 Hovering over ${element.slice(0, 20)}`;
}
if (toolName === 'mcp__playwright__browser_type') {
const element = input.element || 'field';
return `⌨️ Typing in ${element.slice(0, 20)}`;
}
if (toolName === 'mcp__playwright__browser_press_key') {
const key = input.key || 'key';
return `⌨️ Pressing ${key}`;
}
// Form Handling
if (toolName === 'mcp__playwright__browser_fill_form') {
const fieldCount = input.fields?.length || 0;
return `📝 Filling ${fieldCount} form fields`;
}
if (toolName === 'mcp__playwright__browser_select_option') {
return `📋 Selecting dropdown option`;
}
if (toolName === 'mcp__playwright__browser_file_upload') {
return `📁 Uploading file`;
}
// Page Analysis
if (toolName === 'mcp__playwright__browser_snapshot') {
return `📸 Taking page snapshot`;
}
if (toolName === 'mcp__playwright__browser_take_screenshot') {
return `📸 Taking screenshot`;
}
if (toolName === 'mcp__playwright__browser_evaluate') {
return `🔍 Running JavaScript analysis`;
}
// Waiting & Monitoring
if (toolName === 'mcp__playwright__browser_wait_for') {
if (input.text) {
return `⏳ Waiting for "${input.text.slice(0, 20)}"`;
}
return `⏳ Waiting for page response`;
}
if (toolName === 'mcp__playwright__browser_console_messages') {
return `📜 Checking console logs`;
}
if (toolName === 'mcp__playwright__browser_network_requests') {
return `🌐 Analyzing network traffic`;
}
// Tab Management
if (toolName === 'mcp__playwright__browser_tabs') {
const action = input.action || 'managing';
return `🗂️ ${action} browser tab`;
}
// Dialog Handling
if (toolName === 'mcp__playwright__browser_handle_dialog') {
return `💬 Handling browser dialog`;
}
// Fallback for any missed tools
const actionType = toolName.split('_').pop();
return `🌐 Browser: ${actionType}`;
}
/**
* Filter out JSON tool calls from content, with special handling for Task calls
*/
export function filterJsonToolCalls(content) {
if (!content || typeof content !== 'string') {
return content;
}
const lines = content.split('\n');
const processedLines = [];
for (const line of lines) {
const trimmed = line.trim();
// Skip empty lines
if (trimmed === '') {
continue;
}
// Check if this is a JSON tool call
if (trimmed.startsWith('{"type":"tool_use"')) {
try {
const toolCall = JSON.parse(trimmed);
// Special handling for Task tool calls
if (toolCall.name === 'Task') {
const description = toolCall.input?.description || 'analysis agent';
processedLines.push(`🚀 Launching ${description}`);
continue;
}
// Special handling for TodoWrite tool calls
if (toolCall.name === 'TodoWrite') {
const summary = summarizeTodoUpdate(toolCall.input);
if (summary) {
processedLines.push(summary);
}
continue;
}
// Special handling for browser tool calls
if (toolCall.name.startsWith('mcp__playwright__browser_')) {
const browserAction = formatBrowserAction(toolCall);
if (browserAction) {
processedLines.push(browserAction);
}
continue;
}
// Hide all other tool calls (Read, Write, Grep, etc.)
continue;
} catch (error) {
// If JSON parsing fails, treat as regular text
processedLines.push(line);
}
} else {
// Keep non-JSON lines (assistant text)
processedLines.push(line);
}
}
return processedLines.join('\n');
}