refactor: consolidate file layout and break circular dependencies

- Move error-handling, git-manager, prompt-manager, queue-validation, and reporting into src/services/
- Delete src/constants.ts — relocate AGENT_VALIDATORS and MCP_AGENT_MAPPING into session-manager.ts alongside agent definitions
- Delete src/utils/output-formatter.ts — absorb filterJsonToolCalls and getAgentPrefix into ai/output-formatters.ts
- Extract ActivityLogger interface into src/types/activity-logger.ts to break temporal/ → services circular dependency
- Consolidate VulnType, ExploitationDecision into types/agents.ts and SessionMetadata into types/audit.ts
- Remove dead timingResults/costResults globals from utils/metrics.ts and all consumers
This commit is contained in:
ajmallesh
2026-02-16 18:01:37 -08:00
parent 9074149778
commit b208949345
30 changed files with 480 additions and 476 deletions
+4 -4
View File
@@ -21,13 +21,13 @@
* No Temporal dependencies - pure domain logic.
*/
import type { ActivityLogger } from '../temporal/activity-logger.js';
import type { ActivityLogger } from '../types/activity-logger.js';
import { Result, ok, err, isErr } from '../types/result.js';
import { ErrorCode } from '../types/errors.js';
import { PentestError } from '../error-handling.js';
import { PentestError } from './error-handling.js';
import { isSpendingCapBehavior } from '../utils/billing-detection.js';
import { AGENTS } from '../session-manager.js';
import { loadPrompt } from '../prompts/prompt-manager.js';
import { loadPrompt } from './prompt-manager.js';
import {
runClaudePrompt,
validateAgentOutput,
@@ -38,7 +38,7 @@ import {
commitGitSuccess,
rollbackGitWorkspace,
getGitCommitHash,
} from '../utils/git-manager.js';
} from './git-manager.js';
import { AuditSession } from '../audit/index.js';
import type { AgentEndResult } from '../types/audit.js';
import type { AgentName } from '../types/agents.js';
+1 -1
View File
@@ -12,7 +12,7 @@
*/
import { parseConfig, distributeConfig } from '../config-parser.js';
import { PentestError } from '../error-handling.js';
import { PentestError } from './error-handling.js';
import { Result, ok, err } from '../types/result.js';
import { ErrorCode } from '../types/errors.js';
import type { DistributedConfig } from '../types/config.js';
+276
View File
@@ -0,0 +1,276 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
import {
ErrorCode,
type PentestErrorType,
type PentestErrorContext,
type PromptErrorResult,
} from '../types/errors.js';
import {
matchesBillingApiPattern,
matchesBillingTextPattern,
} from '../utils/billing-detection.js';
// Custom error class for pentest operations
export class PentestError extends Error {
override name = 'PentestError' as const;
type: PentestErrorType;
retryable: boolean;
context: PentestErrorContext;
timestamp: string;
/** Optional specific error code for reliable classification */
code?: ErrorCode;
constructor(
message: string,
type: PentestErrorType,
retryable: boolean = false,
context: PentestErrorContext = {},
code?: ErrorCode
) {
super(message);
this.type = type;
this.retryable = retryable;
this.context = context;
this.timestamp = new Date().toISOString();
if (code !== undefined) {
this.code = code;
}
}
}
// Handle tool execution errors
// Handle prompt loading errors
export function handlePromptError(
promptName: string,
error: Error
): PromptErrorResult {
return {
success: false,
error: new PentestError(
`Failed to load prompt '${promptName}': ${error.message}`,
'prompt',
false,
{ promptName, originalError: error.message }
),
};
}
// Patterns that indicate retryable errors
const RETRYABLE_PATTERNS = [
// Network and connection errors
'network',
'connection',
'timeout',
'econnreset',
'enotfound',
'econnrefused',
// Rate limiting
'rate limit',
'429',
'too many requests',
// Server errors
'server error',
'5xx',
'internal server error',
'service unavailable',
'bad gateway',
// Claude API errors
'mcp server',
'model unavailable',
'service temporarily unavailable',
'api error',
'terminated',
// Max turns
'max turns',
'maximum turns',
];
// Patterns that indicate non-retryable errors (checked before default)
const NON_RETRYABLE_PATTERNS = [
'authentication',
'invalid prompt',
'out of memory',
'permission denied',
'session limit reached',
'invalid api key',
];
// Conservative retry classification - unknown errors don't retry (fail-safe default)
export function isRetryableError(error: Error): boolean {
const message = error.message.toLowerCase();
// Check for explicit non-retryable patterns first
if (NON_RETRYABLE_PATTERNS.some((pattern) => message.includes(pattern))) {
return false;
}
// Check for retryable patterns
return RETRYABLE_PATTERNS.some((pattern) => message.includes(pattern));
}
/**
* Classifies errors by ErrorCode for reliable, code-based classification.
* Used when error is a PentestError with a specific ErrorCode.
*/
function classifyByErrorCode(
code: ErrorCode,
retryableFromError: boolean
): { type: string; retryable: boolean } {
switch (code) {
// Billing errors - retryable (wait for cap reset or credits added)
case ErrorCode.SPENDING_CAP_REACHED:
case ErrorCode.INSUFFICIENT_CREDITS:
return { type: 'BillingError', retryable: true };
case ErrorCode.API_RATE_LIMITED:
return { type: 'RateLimitError', retryable: true };
// Config errors - non-retryable (need manual fix)
case ErrorCode.CONFIG_NOT_FOUND:
case ErrorCode.CONFIG_VALIDATION_FAILED:
case ErrorCode.CONFIG_PARSE_ERROR:
return { type: 'ConfigurationError', retryable: false };
// Prompt errors - non-retryable (need manual fix)
case ErrorCode.PROMPT_LOAD_FAILED:
return { type: 'ConfigurationError', retryable: false };
// Git errors - non-retryable (indicates workspace corruption)
case ErrorCode.GIT_CHECKPOINT_FAILED:
case ErrorCode.GIT_ROLLBACK_FAILED:
return { type: 'GitError', retryable: false };
// Validation errors - retryable (agent may succeed on retry)
case ErrorCode.OUTPUT_VALIDATION_FAILED:
case ErrorCode.DELIVERABLE_NOT_FOUND:
return { type: 'OutputValidationError', retryable: true };
// Agent execution - use the retryable flag from the error
case ErrorCode.AGENT_EXECUTION_FAILED:
return { type: 'AgentExecutionError', retryable: retryableFromError };
default:
// Unknown code - fall through to string matching
return { type: 'UnknownError', retryable: retryableFromError };
}
}
/**
* Classifies errors for Temporal workflow retry behavior.
* Returns error type and whether Temporal should retry.
*
* Used by activities to wrap errors in ApplicationFailure:
* - Retryable errors: Temporal retries with configured backoff
* - Non-retryable errors: Temporal fails immediately
*
* Classification priority:
* 1. If error is PentestError with ErrorCode, classify by code (reliable)
* 2. Fall through to string matching for external errors (SDK, network, etc.)
*/
export function classifyErrorForTemporal(error: unknown): { type: string; retryable: boolean } {
// === CODE-BASED CLASSIFICATION (Preferred for internal errors) ===
if (error instanceof PentestError && error.code !== undefined) {
return classifyByErrorCode(error.code, error.retryable);
}
// === STRING-BASED CLASSIFICATION (Fallback for external errors) ===
const message = (error instanceof Error ? error.message : String(error)).toLowerCase();
// === BILLING ERRORS (Retryable with long backoff) ===
// Anthropic returns billing as 400 invalid_request_error
// Human can add credits OR wait for spending cap to reset (5-30 min backoff)
// Check both API patterns and text patterns for comprehensive detection
if (matchesBillingApiPattern(message) || matchesBillingTextPattern(message)) {
return { type: 'BillingError', retryable: true };
}
// === PERMANENT ERRORS (Non-retryable) ===
// Authentication (401) - bad API key won't fix itself
if (
message.includes('authentication') ||
message.includes('api key') ||
message.includes('401') ||
message.includes('authentication_error')
) {
return { type: 'AuthenticationError', retryable: false };
}
// Permission (403) - access won't be granted
if (
message.includes('permission') ||
message.includes('forbidden') ||
message.includes('403')
) {
return { type: 'PermissionError', retryable: false };
}
// === OUTPUT VALIDATION ERRORS (Retryable) ===
// Agent didn't produce expected deliverables - retry may succeed
// IMPORTANT: Must come BEFORE generic 'validation' check below
if (
message.includes('failed output validation') ||
message.includes('output validation failed')
) {
return { type: 'OutputValidationError', retryable: true };
}
// Invalid Request (400) - malformed request is permanent
// Note: Checked AFTER billing and AFTER output validation
if (
message.includes('invalid_request_error') ||
message.includes('malformed') ||
message.includes('validation')
) {
return { type: 'InvalidRequestError', retryable: false };
}
// Request Too Large (413) - won't fit no matter how many retries
if (
message.includes('request_too_large') ||
message.includes('too large') ||
message.includes('413')
) {
return { type: 'RequestTooLargeError', retryable: false };
}
// Configuration errors - missing files need manual fix
if (
message.includes('enoent') ||
message.includes('no such file') ||
message.includes('cli not installed')
) {
return { type: 'ConfigurationError', retryable: false };
}
// Execution limits - max turns/budget reached
if (
message.includes('max turns') ||
message.includes('budget') ||
message.includes('execution limit') ||
message.includes('error_max_turns') ||
message.includes('error_max_budget')
) {
return { type: 'ExecutionLimitError', retryable: false };
}
// Invalid target URL - bad URL format won't fix itself
if (
message.includes('invalid url') ||
message.includes('invalid target') ||
message.includes('malformed url') ||
message.includes('invalid uri')
) {
return { type: 'InvalidTargetError', retryable: false };
}
// === TRANSIENT ERRORS (Retryable) ===
// Rate limits (429), server errors (5xx), network issues
// Let Temporal retry with configured backoff
return { type: 'TransientError', retryable: true };
}
+2 -2
View File
@@ -17,9 +17,9 @@ import {
validateQueueSafe,
type VulnType,
type ExploitationDecision,
} from '../queue-validation.js';
} from './queue-validation.js';
import { isOk } from '../types/result.js';
import type { ActivityLogger } from '../temporal/activity-logger.js';
import type { ActivityLogger } from '../types/activity-logger.js';
/**
* Service for checking exploitation queue decisions.
+319
View File
@@ -0,0 +1,319 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
import { $ } from 'zx';
import { PentestError } from './error-handling.js';
import { ErrorCode } from '../types/errors.js';
import type { ActivityLogger } from '../types/activity-logger.js';
/**
* Check if a directory is a git repository.
* Returns true if the directory contains a .git folder or is inside a git repo.
*/
export async function isGitRepository(dir: string): Promise<boolean> {
try {
await $`cd ${dir} && git rev-parse --git-dir`.quiet();
return true;
} catch {
return false;
}
}
interface GitOperationResult {
success: boolean;
hadChanges?: boolean;
error?: Error;
}
/**
* Get list of changed files from git status --porcelain output
*/
async function getChangedFiles(
sourceDir: string,
operationDescription: string
): Promise<string[]> {
const status = await executeGitCommandWithRetry(
['git', 'status', '--porcelain'],
sourceDir,
operationDescription
);
return status.stdout
.trim()
.split('\n')
.filter((line) => line.length > 0);
}
/**
* Log a summary of changed files with truncation for long lists
*/
function logChangeSummary(
changes: string[],
messageWithChanges: string,
messageWithoutChanges: string,
logger: ActivityLogger,
level: 'info' | 'warn' = 'info',
maxToShow: number = 5
): void {
if (changes.length > 0) {
const msg = messageWithChanges.replace('{count}', String(changes.length));
const fileList = changes.slice(0, maxToShow).map((c) => ` ${c}`).join(', ');
const suffix = changes.length > maxToShow
? ` ... and ${changes.length - maxToShow} more files`
: '';
logger[level](`${msg} ${fileList}${suffix}`);
} else {
logger[level](messageWithoutChanges);
}
}
/**
* Convert unknown error to GitOperationResult
*/
function toErrorResult(error: unknown): GitOperationResult {
const errMsg = error instanceof Error ? error.message : String(error);
return {
success: false,
error: error instanceof Error ? error : new Error(errMsg),
};
}
// Serializes git operations to prevent index.lock conflicts during parallel agent execution
class GitSemaphore {
private queue: Array<() => void> = [];
private running: boolean = false;
async acquire(): Promise<void> {
return new Promise((resolve) => {
this.queue.push(resolve);
this.process();
});
}
release(): void {
this.running = false;
this.process();
}
private process(): void {
if (!this.running && this.queue.length > 0) {
this.running = true;
const resolve = this.queue.shift();
resolve!();
}
}
}
const gitSemaphore = new GitSemaphore();
const GIT_LOCK_ERROR_PATTERNS = [
'index.lock',
'unable to lock',
'Another git process',
'fatal: Unable to create',
'fatal: index file',
];
function isGitLockError(errorMessage: string): boolean {
return GIT_LOCK_ERROR_PATTERNS.some((pattern) => errorMessage.includes(pattern));
}
// Retries git commands on lock conflicts with exponential backoff
export async function executeGitCommandWithRetry(
commandArgs: string[],
sourceDir: string,
description: string,
maxRetries: number = 5
): Promise<{ stdout: string; stderr: string }> {
await gitSemaphore.acquire();
try {
for (let attempt = 1; attempt <= maxRetries; attempt++) {
try {
const [cmd, ...args] = commandArgs;
const result = await $`cd ${sourceDir} && ${cmd} ${args}`;
return result;
} catch (error) {
const errMsg = error instanceof Error ? error.message : String(error);
if (isGitLockError(errMsg) && attempt < maxRetries) {
const delay = Math.pow(2, attempt - 1) * 1000;
// executeGitCommandWithRetry is also called outside activity context
// (e.g., from resume logic), so we use console.warn as a fallback here
console.warn(
`Git lock conflict during ${description} (attempt ${attempt}/${maxRetries}). Retrying in ${delay}ms...`
);
await new Promise((resolve) => setTimeout(resolve, delay));
continue;
}
throw error;
}
}
throw new PentestError(
`Git command failed after ${maxRetries} retries`,
'filesystem',
true, // Retryable - transient git lock issues
{ maxRetries, description },
ErrorCode.GIT_CHECKPOINT_FAILED
);
} finally {
gitSemaphore.release();
}
}
// Two-phase reset: hard reset (tracked files) + clean (untracked files)
export async function rollbackGitWorkspace(
sourceDir: string,
reason: string = 'retry preparation',
logger: ActivityLogger
): Promise<GitOperationResult> {
// Skip git operations if not a git repository
if (!(await isGitRepository(sourceDir))) {
logger.info('Skipping git rollback (not a git repository)');
return { success: true };
}
logger.info(`Rolling back workspace for ${reason}`);
try {
const changes = await getChangedFiles(sourceDir, 'status check for rollback');
await executeGitCommandWithRetry(
['git', 'reset', '--hard', 'HEAD'],
sourceDir,
'hard reset for rollback'
);
await executeGitCommandWithRetry(
['git', 'clean', '-fd'],
sourceDir,
'cleaning untracked files for rollback'
);
logChangeSummary(
changes,
'Rollback completed - removed {count} contaminated changes:',
'Rollback completed - no changes to remove',
logger,
'info',
3
);
return { success: true };
} catch (error) {
const errMsg = error instanceof Error ? error.message : String(error);
logger.error(`Rollback failed after retries: ${errMsg}`);
return {
success: false,
error: new PentestError(
`Git rollback failed: ${errMsg}`,
'filesystem',
false, // Non-retryable - rollback is best-effort cleanup
{ sourceDir, reason },
ErrorCode.GIT_ROLLBACK_FAILED
),
};
}
}
// Creates checkpoint before each attempt. First attempt preserves workspace; retries clean it.
export async function createGitCheckpoint(
sourceDir: string,
description: string,
attempt: number,
logger: ActivityLogger
): Promise<GitOperationResult> {
// Skip git operations if not a git repository
if (!(await isGitRepository(sourceDir))) {
logger.info('Skipping git checkpoint (not a git repository)');
return { success: true };
}
logger.info(`Creating checkpoint for ${description} (attempt ${attempt})`);
try {
// First attempt: preserve existing deliverables. Retries: clean workspace to prevent pollution
if (attempt > 1) {
const cleanResult = await rollbackGitWorkspace(sourceDir, `${description} (retry cleanup)`, logger);
if (!cleanResult.success) {
logger.warn(`Workspace cleanup failed, continuing anyway: ${cleanResult.error?.message}`);
}
}
const changes = await getChangedFiles(sourceDir, 'status check');
const hasChanges = changes.length > 0;
await executeGitCommandWithRetry(['git', 'add', '-A'], sourceDir, 'staging changes');
await executeGitCommandWithRetry(
['git', 'commit', '-m', `📍 Checkpoint: ${description} (attempt ${attempt})`, '--allow-empty'],
sourceDir,
'creating commit'
);
if (hasChanges) {
logger.info('Checkpoint created with uncommitted changes staged');
} else {
logger.info('Empty checkpoint created (no workspace changes)');
}
return { success: true };
} catch (error) {
const result = toErrorResult(error);
logger.warn(`Checkpoint creation failed after retries: ${result.error?.message}`);
return result;
}
}
export async function commitGitSuccess(
sourceDir: string,
description: string,
logger: ActivityLogger
): Promise<GitOperationResult> {
// Skip git operations if not a git repository
if (!(await isGitRepository(sourceDir))) {
logger.info('Skipping git commit (not a git repository)');
return { success: true };
}
logger.info(`Committing successful results for ${description}`);
try {
const changes = await getChangedFiles(sourceDir, 'status check for success commit');
await executeGitCommandWithRetry(
['git', 'add', '-A'],
sourceDir,
'staging changes for success commit'
);
await executeGitCommandWithRetry(
['git', 'commit', '-m', `${description}: completed successfully`, '--allow-empty'],
sourceDir,
'creating success commit'
);
logChangeSummary(
changes,
'Success commit created with {count} file changes:',
'Empty success commit created (agent made no file changes)',
logger
);
return { success: true };
} catch (error) {
const result = toErrorResult(error);
logger.warn(`Success commit failed after retries: ${result.error?.message}`);
return result;
}
}
/**
* Get current git commit hash.
* Returns null if not a git repository.
*/
export async function getGitCommitHash(sourceDir: string): Promise<string | null> {
if (!(await isGitRepository(sourceDir))) {
return null;
}
try {
const result = await $`cd ${sourceDir} && git rev-parse HEAD`;
return result.stdout.trim();
} catch {
return null;
}
}
+3
View File
@@ -18,3 +18,6 @@ export { ConfigLoaderService } from './config-loader.js';
export { ExploitationCheckerService } from './exploitation-checker.js';
export { AgentExecutionService } from './agent-execution.js';
export type { AgentExecutionInput } from './agent-execution.js';
export { assembleFinalReport, injectModelIntoReport } from './reporting.js';
export { loadPrompt } from './prompt-manager.js';
+267
View File
@@ -0,0 +1,267 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
import { fs, path } from 'zx';
import { PentestError, handlePromptError } from './error-handling.js';
import { MCP_AGENT_MAPPING } from '../session-manager.js';
import type { Authentication, DistributedConfig } from '../types/config.js';
import type { ActivityLogger } from '../types/activity-logger.js';
interface PromptVariables {
webUrl: string;
repoPath: string;
MCP_SERVER?: string;
}
interface IncludeReplacement {
placeholder: string;
content: string;
}
// Pure function: Build complete login instructions from config
async function buildLoginInstructions(authentication: Authentication, logger: ActivityLogger): Promise<string> {
try {
// Load the login instructions template
const loginInstructionsPath = path.join(import.meta.dirname, '..', '..', 'prompts', 'shared', 'login-instructions.txt');
if (!await fs.pathExists(loginInstructionsPath)) {
throw new PentestError(
'Login instructions template not found',
'filesystem',
false,
{ loginInstructionsPath }
);
}
const fullTemplate = await fs.readFile(loginInstructionsPath, 'utf8');
// Helper function to extract sections based on markers
const getSection = (content: string, sectionName: string): string => {
const regex = new RegExp(`<!-- BEGIN:${sectionName} -->([\\s\\S]*?)<!-- END:${sectionName} -->`, 'g');
const match = regex.exec(content);
return match ? match[1]!.trim() : '';
};
// Extract sections based on login type
const loginType = authentication.login_type?.toUpperCase();
let loginInstructions = '';
// Build instructions with only relevant sections
const commonSection = getSection(fullTemplate, 'COMMON');
const authSection = loginType ? getSection(fullTemplate, loginType) : ''; // FORM or SSO
const verificationSection = getSection(fullTemplate, 'VERIFICATION');
// Fallback to full template if markers are missing (backward compatibility)
if (!commonSection && !authSection && !verificationSection) {
logger.warn('Section markers not found, using full login instructions template');
loginInstructions = fullTemplate;
} else {
// Combine relevant sections
loginInstructions = [commonSection, authSection, verificationSection]
.filter(section => section) // Remove empty sections
.join('\n\n');
}
// Replace the user instructions placeholder with the login flow from config
let userInstructions = (authentication.login_flow ?? []).join('\n');
// Replace credential placeholders within the user instructions
if (authentication.credentials) {
if (authentication.credentials.username) {
userInstructions = userInstructions.replace(/\$username/g, authentication.credentials.username);
}
if (authentication.credentials.password) {
userInstructions = userInstructions.replace(/\$password/g, authentication.credentials.password);
}
if (authentication.credentials.totp_secret) {
userInstructions = userInstructions.replace(/\$totp/g, `generated TOTP code using secret "${authentication.credentials.totp_secret}"`);
}
}
loginInstructions = loginInstructions.replace(/{{user_instructions}}/g, userInstructions);
// Replace TOTP secret placeholder if present in template
if (authentication.credentials?.totp_secret) {
loginInstructions = loginInstructions.replace(/{{totp_secret}}/g, authentication.credentials.totp_secret);
}
return loginInstructions;
} catch (error) {
if (error instanceof PentestError) {
throw error;
}
const errMsg = error instanceof Error ? error.message : String(error);
throw new PentestError(
`Failed to build login instructions: ${errMsg}`,
'config',
false,
{ authentication, originalError: errMsg }
);
}
}
// Pure function: Process @include() directives
async function processIncludes(content: string, baseDir: string): Promise<string> {
const includeRegex = /@include\(([^)]+)\)/g;
// Use a Promise.all to handle all includes concurrently
const replacements: IncludeReplacement[] = await Promise.all(
Array.from(content.matchAll(includeRegex)).map(async (match) => {
const includePath = path.join(baseDir, match[1]!);
const sharedContent = await fs.readFile(includePath, 'utf8');
return {
placeholder: match[0],
content: sharedContent,
};
})
);
for (const replacement of replacements) {
content = content.replace(replacement.placeholder, replacement.content);
}
return content;
}
// Pure function: Variable interpolation
async function interpolateVariables(
template: string,
variables: PromptVariables,
config: DistributedConfig | null = null,
logger: ActivityLogger
): Promise<string> {
try {
if (!template || typeof template !== 'string') {
throw new PentestError(
'Template must be a non-empty string',
'validation',
false,
{ templateType: typeof template, templateLength: template?.length }
);
}
if (!variables || !variables.webUrl || !variables.repoPath) {
throw new PentestError(
'Variables must include webUrl and repoPath',
'validation',
false,
{ variables: Object.keys(variables || {}) }
);
}
let result = template
.replace(/{{WEB_URL}}/g, variables.webUrl)
.replace(/{{REPO_PATH}}/g, variables.repoPath)
.replace(/{{MCP_SERVER}}/g, variables.MCP_SERVER || 'playwright-agent1');
if (config) {
// Handle rules section - if both are empty, use cleaner messaging
const hasAvoidRules = config.avoid && config.avoid.length > 0;
const hasFocusRules = config.focus && config.focus.length > 0;
if (!hasAvoidRules && !hasFocusRules) {
// Replace the entire rules section with a clean message
const cleanRulesSection = '<rules>\nNo specific rules or focus areas provided for this test.\n</rules>';
result = result.replace(/<rules>[\s\S]*?<\/rules>/g, cleanRulesSection);
} else {
const avoidRules = hasAvoidRules ? config.avoid!.map(r => `- ${r.description}`).join('\n') : 'None';
const focusRules = hasFocusRules ? config.focus!.map(r => `- ${r.description}`).join('\n') : 'None';
result = result
.replace(/{{RULES_AVOID}}/g, avoidRules)
.replace(/{{RULES_FOCUS}}/g, focusRules);
}
// Extract and inject login instructions from config
if (config.authentication?.login_flow) {
const loginInstructions = await buildLoginInstructions(config.authentication, logger);
result = result.replace(/{{LOGIN_INSTRUCTIONS}}/g, loginInstructions);
} else {
result = result.replace(/{{LOGIN_INSTRUCTIONS}}/g, '');
}
} else {
// Replace the entire rules section with a clean message when no config provided
const cleanRulesSection = '<rules>\nNo specific rules or focus areas provided for this test.\n</rules>';
result = result.replace(/<rules>[\s\S]*?<\/rules>/g, cleanRulesSection);
result = result.replace(/{{LOGIN_INSTRUCTIONS}}/g, '');
}
// Validate that all placeholders have been replaced (excluding instructional text)
const remainingPlaceholders = result.match(/\{\{[^}]+\}\}/g);
if (remainingPlaceholders) {
logger.warn(`Found unresolved placeholders in prompt: ${remainingPlaceholders.join(', ')}`);
}
return result;
} catch (error) {
if (error instanceof PentestError) {
throw error;
}
const errMsg = error instanceof Error ? error.message : String(error);
throw new PentestError(
`Variable interpolation failed: ${errMsg}`,
'prompt',
false,
{ originalError: errMsg }
);
}
}
// Pure function: Load and interpolate prompt template
export async function loadPrompt(
promptName: string,
variables: PromptVariables,
config: DistributedConfig | null = null,
pipelineTestingMode: boolean = false,
logger: ActivityLogger
): Promise<string> {
try {
// Use pipeline testing prompts if pipeline testing mode is enabled
const baseDir = pipelineTestingMode ? 'prompts/pipeline-testing' : 'prompts';
const promptsDir = path.join(import.meta.dirname, '..', '..', baseDir);
const promptPath = path.join(promptsDir, `${promptName}.txt`);
// Debug message for pipeline testing mode
if (pipelineTestingMode) {
logger.info(`Using pipeline testing prompt: ${promptPath}`);
}
// Check if file exists first
if (!await fs.pathExists(promptPath)) {
throw new PentestError(
`Prompt file not found: ${promptPath}`,
'prompt',
false,
{ promptName, promptPath }
);
}
// Add MCP server assignment to variables
const enhancedVariables: PromptVariables = { ...variables };
// Assign MCP server based on prompt name (agent name)
const mcpServer = MCP_AGENT_MAPPING[promptName as keyof typeof MCP_AGENT_MAPPING];
if (mcpServer) {
enhancedVariables.MCP_SERVER = mcpServer;
logger.info(`Assigned ${promptName} -> ${enhancedVariables.MCP_SERVER}`);
} else {
// Fallback for unknown agents
enhancedVariables.MCP_SERVER = 'playwright-agent1';
logger.warn(`Unknown agent ${promptName}, using fallback -> ${enhancedVariables.MCP_SERVER}`);
}
let template = await fs.readFile(promptPath, 'utf8');
// Pre-process the template to handle @include directives
template = await processIncludes(template, promptsDir);
return await interpolateVariables(template, enhancedVariables, config, logger);
} catch (error) {
if (error instanceof PentestError) {
throw error;
}
const promptError = handlePromptError(promptName, error as Error);
throw promptError.error;
}
}
+325
View File
@@ -0,0 +1,325 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
import { fs, path } from 'zx';
import { PentestError } from './error-handling.js';
import { ErrorCode } from '../types/errors.js';
import { type Result, ok, err } from '../types/result.js';
import { asyncPipe } from '../utils/functional.js';
import type { VulnType, ExploitationDecision } from '../types/agents.js';
export type { VulnType, ExploitationDecision } from '../types/agents.js';
interface VulnTypeConfigItem {
deliverable: string;
queue: string;
}
type VulnTypeConfig = Record<VulnType, VulnTypeConfigItem>;
type ErrorMessageResolver = string | ((existence: FileExistence) => string);
interface ValidationRule {
predicate: (existence: FileExistence) => boolean;
errorMessage: ErrorMessageResolver;
retryable: boolean;
}
interface FileExistence {
deliverableExists: boolean;
queueExists: boolean;
}
interface PathsBase {
vulnType: VulnType;
deliverable: string;
queue: string;
sourceDir: string;
}
interface PathsWithExistence extends PathsBase {
existence: FileExistence;
}
interface PathsWithQueue extends PathsWithExistence {
queueData: QueueData;
}
interface PathsWithError {
error: PentestError;
}
interface QueueData {
vulnerabilities: unknown[];
[key: string]: unknown;
}
interface QueueValidationResult {
valid: boolean;
data: QueueData | null;
error: string | null;
}
/**
* Result type for safe validation - explicit error handling.
*/
export type SafeValidationResult = Result<ExploitationDecision, PentestError>;
// Vulnerability type configuration as immutable data
const VULN_TYPE_CONFIG: VulnTypeConfig = Object.freeze({
injection: Object.freeze({
deliverable: 'injection_analysis_deliverable.md',
queue: 'injection_exploitation_queue.json',
}),
xss: Object.freeze({
deliverable: 'xss_analysis_deliverable.md',
queue: 'xss_exploitation_queue.json',
}),
auth: Object.freeze({
deliverable: 'auth_analysis_deliverable.md',
queue: 'auth_exploitation_queue.json',
}),
ssrf: Object.freeze({
deliverable: 'ssrf_analysis_deliverable.md',
queue: 'ssrf_exploitation_queue.json',
}),
authz: Object.freeze({
deliverable: 'authz_analysis_deliverable.md',
queue: 'authz_exploitation_queue.json',
}),
}) as VulnTypeConfig;
// Pure function to create validation rule
function createValidationRule(
predicate: (existence: FileExistence) => boolean,
errorMessage: ErrorMessageResolver,
retryable: boolean = true
): ValidationRule {
return Object.freeze({ predicate, errorMessage, retryable });
}
// Symmetric deliverable rules: queue and deliverable must exist together (prevents partial analysis from triggering exploitation)
const fileExistenceRules: readonly ValidationRule[] = Object.freeze([
createValidationRule(
({ deliverableExists, queueExists }) => deliverableExists && queueExists,
getExistenceErrorMessage
),
]);
// Generate appropriate error message based on which files are missing
function getExistenceErrorMessage(existence: FileExistence): string {
const { deliverableExists, queueExists } = existence;
if (!deliverableExists && !queueExists) {
return 'Analysis failed: Neither deliverable nor queue file exists. Analysis agent must create both files.';
}
if (!queueExists) {
return 'Analysis incomplete: Deliverable exists but queue file missing. Analysis agent must create both files.';
}
return 'Analysis incomplete: Queue exists but deliverable file missing. Analysis agent must create both files.';
}
// Pure function to create file paths
const createPaths = (
vulnType: VulnType,
sourceDir: string
): PathsBase | PathsWithError => {
const config = VULN_TYPE_CONFIG[vulnType];
if (!config) {
return {
error: new PentestError(
`Unknown vulnerability type: ${vulnType}`,
'validation',
false,
{ vulnType }
),
};
}
return Object.freeze({
vulnType,
deliverable: path.join(sourceDir, 'deliverables', config.deliverable),
queue: path.join(sourceDir, 'deliverables', config.queue),
sourceDir,
});
};
// Pure function to check file existence
const checkFileExistence = async (
paths: PathsBase | PathsWithError
): Promise<PathsWithExistence | PathsWithError> => {
if ('error' in paths) return paths;
const [deliverableExists, queueExists] = await Promise.all([
fs.pathExists(paths.deliverable),
fs.pathExists(paths.queue),
]);
return Object.freeze({
...paths,
existence: Object.freeze({ deliverableExists, queueExists }),
});
};
// Validates deliverable/queue symmetry - both must exist or neither
const validateExistenceRules = (
pathsWithExistence: PathsWithExistence | PathsWithError
): PathsWithExistence | PathsWithError => {
if ('error' in pathsWithExistence) return pathsWithExistence;
const { existence, vulnType } = pathsWithExistence;
// Find the first rule that fails
const failedRule = fileExistenceRules.find((rule) => !rule.predicate(existence));
if (failedRule) {
const message =
typeof failedRule.errorMessage === 'function'
? failedRule.errorMessage(existence)
: failedRule.errorMessage;
return {
error: new PentestError(
`${message} (${vulnType})`,
'validation',
failedRule.retryable,
{
vulnType,
deliverablePath: pathsWithExistence.deliverable,
queuePath: pathsWithExistence.queue,
existence,
},
ErrorCode.DELIVERABLE_NOT_FOUND
),
};
}
return pathsWithExistence;
};
// Pure function to validate queue structure
const validateQueueStructure = (content: string): QueueValidationResult => {
try {
const parsed = JSON.parse(content) as unknown;
const isValid =
typeof parsed === 'object' &&
parsed !== null &&
'vulnerabilities' in parsed &&
Array.isArray((parsed as QueueData).vulnerabilities);
return Object.freeze({
valid: isValid,
data: isValid ? (parsed as QueueData) : null,
error: null,
});
} catch (parseError) {
return Object.freeze({
valid: false,
data: null,
error: parseError instanceof Error ? parseError.message : String(parseError),
});
}
};
// Queue parse failures are retryable - agent can fix malformed JSON on retry
const validateQueueContent = async (
pathsWithExistence: PathsWithExistence | PathsWithError
): Promise<PathsWithQueue | PathsWithError> => {
if ('error' in pathsWithExistence) return pathsWithExistence;
try {
const queueContent = await fs.readFile(pathsWithExistence.queue, 'utf8');
const queueValidation = validateQueueStructure(queueContent);
if (!queueValidation.valid) {
// Rule 6: Both exist, queue invalid
return {
error: new PentestError(
queueValidation.error
? `Queue validation failed for ${pathsWithExistence.vulnType}: Invalid JSON structure. Analysis agent must fix queue format.`
: `Queue validation failed for ${pathsWithExistence.vulnType}: Missing or invalid 'vulnerabilities' array. Analysis agent must fix queue structure.`,
'validation',
true, // retryable
{
vulnType: pathsWithExistence.vulnType,
queuePath: pathsWithExistence.queue,
originalError: queueValidation.error,
queueStructure: queueValidation.data ? Object.keys(queueValidation.data) : [],
}
),
};
}
return Object.freeze({
...pathsWithExistence,
queueData: queueValidation.data!,
});
} catch (readError) {
return {
error: new PentestError(
`Failed to read queue file for ${pathsWithExistence.vulnType}: ${readError instanceof Error ? readError.message : String(readError)}`,
'filesystem',
false,
{
vulnType: pathsWithExistence.vulnType,
queuePath: pathsWithExistence.queue,
originalError: readError instanceof Error ? readError.message : String(readError),
}
),
};
}
};
// Final decision: skip if queue says no vulns, proceed if vulns found, error otherwise
const determineExploitationDecision = (
validatedData: PathsWithQueue | PathsWithError
): ExploitationDecision => {
if ('error' in validatedData) {
throw validatedData.error;
}
const hasVulnerabilities = validatedData.queueData.vulnerabilities.length > 0;
// Rule 4: Both exist, queue valid and populated
// Rule 5: Both exist, queue valid but empty
return Object.freeze({
shouldExploit: hasVulnerabilities,
shouldRetry: false,
vulnerabilityCount: validatedData.queueData.vulnerabilities.length,
vulnType: validatedData.vulnType,
});
};
// Main functional validation pipeline
export async function validateQueueAndDeliverable(
vulnType: VulnType,
sourceDir: string
): Promise<ExploitationDecision> {
return asyncPipe<ExploitationDecision>(
createPaths(vulnType, sourceDir),
checkFileExistence,
validateExistenceRules,
validateQueueContent,
determineExploitationDecision
);
}
/**
* Safely validate queue and deliverable files.
* Returns Result<ExploitationDecision, PentestError> for explicit error handling.
*/
export async function validateQueueSafe(
vulnType: VulnType,
sourceDir: string
): Promise<SafeValidationResult> {
try {
const result = await validateQueueAndDeliverable(vulnType, sourceDir);
return ok(result);
} catch (error) {
return err(error as PentestError);
}
}
+162
View File
@@ -0,0 +1,162 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
import { fs, path } from 'zx';
import { PentestError } from './error-handling.js';
import { ErrorCode } from '../types/errors.js';
import type { ActivityLogger } from '../types/activity-logger.js';
interface DeliverableFile {
name: string;
path: string;
required: boolean;
}
// Pure function: Assemble final report from specialist deliverables
export async function assembleFinalReport(sourceDir: string, logger: ActivityLogger): Promise<string> {
const deliverableFiles: DeliverableFile[] = [
{ name: 'Injection', path: 'injection_exploitation_evidence.md', required: false },
{ name: 'XSS', path: 'xss_exploitation_evidence.md', required: false },
{ name: 'Authentication', path: 'auth_exploitation_evidence.md', required: false },
{ name: 'SSRF', path: 'ssrf_exploitation_evidence.md', required: false },
{ name: 'Authorization', path: 'authz_exploitation_evidence.md', required: false }
];
const sections: string[] = [];
for (const file of deliverableFiles) {
const filePath = path.join(sourceDir, 'deliverables', file.path);
try {
if (await fs.pathExists(filePath)) {
const content = await fs.readFile(filePath, 'utf8');
sections.push(content);
logger.info(`Added ${file.name} findings`);
} else if (file.required) {
throw new PentestError(
`Required deliverable file not found: ${file.path}`,
'filesystem',
false,
{ deliverableFile: file.path, sourceDir },
ErrorCode.DELIVERABLE_NOT_FOUND
);
} else {
logger.info(`No ${file.name} deliverable found`);
}
} catch (error) {
if (file.required) {
throw error;
}
const err = error as Error;
logger.warn(`Could not read ${file.path}: ${err.message}`);
}
}
const finalContent = sections.join('\n\n');
const deliverablesDir = path.join(sourceDir, 'deliverables');
const finalReportPath = path.join(deliverablesDir, 'comprehensive_security_assessment_report.md');
try {
// Ensure deliverables directory exists
await fs.ensureDir(deliverablesDir);
await fs.writeFile(finalReportPath, finalContent);
logger.info(`Final report assembled at ${finalReportPath}`);
} catch (error) {
const err = error as Error;
throw new PentestError(
`Failed to write final report: ${err.message}`,
'filesystem',
false,
{ finalReportPath, originalError: err.message }
);
}
return finalContent;
}
/**
* Inject model information into the final security report.
* Reads session.json to get the model(s) used, then injects a "Model:" line
* into the Executive Summary section of the report.
*/
export async function injectModelIntoReport(
repoPath: string,
outputPath: string,
logger: ActivityLogger
): Promise<void> {
// 1. Read session.json to get model information
const sessionJsonPath = path.join(outputPath, 'session.json');
if (!(await fs.pathExists(sessionJsonPath))) {
logger.warn('session.json not found, skipping model injection');
return;
}
interface SessionData {
metrics: {
agents: Record<string, { model?: string }>;
};
}
const sessionData: SessionData = await fs.readJson(sessionJsonPath);
// 2. Extract unique models from all agents
const models = new Set<string>();
for (const agent of Object.values(sessionData.metrics.agents)) {
if (agent.model) {
models.add(agent.model);
}
}
if (models.size === 0) {
logger.warn('No model information found in session.json');
return;
}
const modelStr = Array.from(models).join(', ');
logger.info(`Injecting model info into report: ${modelStr}`);
// 3. Read the final report
const reportPath = path.join(repoPath, 'deliverables', 'comprehensive_security_assessment_report.md');
if (!(await fs.pathExists(reportPath))) {
logger.warn('Final report not found, skipping model injection');
return;
}
let reportContent = await fs.readFile(reportPath, 'utf8');
// 4. Find and inject model line after "Assessment Date" in Executive Summary
// Pattern: "- Assessment Date: <date>" followed by a newline
const assessmentDatePattern = /^(- Assessment Date: .+)$/m;
const match = reportContent.match(assessmentDatePattern);
if (match) {
// Inject model line after Assessment Date
const modelLine = `- Model: ${modelStr}`;
reportContent = reportContent.replace(
assessmentDatePattern,
`$1\n${modelLine}`
);
logger.info('Model info injected into Executive Summary');
} else {
// If no Assessment Date line found, try to add after Executive Summary header
const execSummaryPattern = /^## Executive Summary$/m;
if (reportContent.match(execSummaryPattern)) {
// Add model as first item in Executive Summary
reportContent = reportContent.replace(
execSummaryPattern,
`## Executive Summary\n- Model: ${modelStr}`
);
logger.info('Model info added to Executive Summary header');
} else {
logger.warn('Could not find Executive Summary section');
return;
}
}
// 5. Write modified report back
await fs.writeFile(reportPath, reportContent);
}