refactor: consolidate file layout and break circular dependencies
- Move error-handling, git-manager, prompt-manager, queue-validation, and reporting into src/services/ - Delete src/constants.ts — relocate AGENT_VALIDATORS and MCP_AGENT_MAPPING into session-manager.ts alongside agent definitions - Delete src/utils/output-formatter.ts — absorb filterJsonToolCalls and getAgentPrefix into ai/output-formatters.ts - Extract ActivityLogger interface into src/types/activity-logger.ts to break temporal/ → services circular dependency - Consolidate VulnType, ExploitationDecision into types/agents.ts and SessionMetadata into types/audit.ts - Remove dead timingResults/costResults globals from utils/metrics.ts and all consumers
This commit is contained in:
@@ -21,13 +21,13 @@
|
||||
* No Temporal dependencies - pure domain logic.
|
||||
*/
|
||||
|
||||
import type { ActivityLogger } from '../temporal/activity-logger.js';
|
||||
import type { ActivityLogger } from '../types/activity-logger.js';
|
||||
import { Result, ok, err, isErr } from '../types/result.js';
|
||||
import { ErrorCode } from '../types/errors.js';
|
||||
import { PentestError } from '../error-handling.js';
|
||||
import { PentestError } from './error-handling.js';
|
||||
import { isSpendingCapBehavior } from '../utils/billing-detection.js';
|
||||
import { AGENTS } from '../session-manager.js';
|
||||
import { loadPrompt } from '../prompts/prompt-manager.js';
|
||||
import { loadPrompt } from './prompt-manager.js';
|
||||
import {
|
||||
runClaudePrompt,
|
||||
validateAgentOutput,
|
||||
@@ -38,7 +38,7 @@ import {
|
||||
commitGitSuccess,
|
||||
rollbackGitWorkspace,
|
||||
getGitCommitHash,
|
||||
} from '../utils/git-manager.js';
|
||||
} from './git-manager.js';
|
||||
import { AuditSession } from '../audit/index.js';
|
||||
import type { AgentEndResult } from '../types/audit.js';
|
||||
import type { AgentName } from '../types/agents.js';
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
*/
|
||||
|
||||
import { parseConfig, distributeConfig } from '../config-parser.js';
|
||||
import { PentestError } from '../error-handling.js';
|
||||
import { PentestError } from './error-handling.js';
|
||||
import { Result, ok, err } from '../types/result.js';
|
||||
import { ErrorCode } from '../types/errors.js';
|
||||
import type { DistributedConfig } from '../types/config.js';
|
||||
|
||||
@@ -0,0 +1,276 @@
|
||||
// Copyright (C) 2025 Keygraph, Inc.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License version 3
|
||||
// as published by the Free Software Foundation.
|
||||
|
||||
import {
|
||||
ErrorCode,
|
||||
type PentestErrorType,
|
||||
type PentestErrorContext,
|
||||
type PromptErrorResult,
|
||||
} from '../types/errors.js';
|
||||
import {
|
||||
matchesBillingApiPattern,
|
||||
matchesBillingTextPattern,
|
||||
} from '../utils/billing-detection.js';
|
||||
|
||||
// Custom error class for pentest operations
|
||||
export class PentestError extends Error {
|
||||
override name = 'PentestError' as const;
|
||||
type: PentestErrorType;
|
||||
retryable: boolean;
|
||||
context: PentestErrorContext;
|
||||
timestamp: string;
|
||||
/** Optional specific error code for reliable classification */
|
||||
code?: ErrorCode;
|
||||
|
||||
constructor(
|
||||
message: string,
|
||||
type: PentestErrorType,
|
||||
retryable: boolean = false,
|
||||
context: PentestErrorContext = {},
|
||||
code?: ErrorCode
|
||||
) {
|
||||
super(message);
|
||||
this.type = type;
|
||||
this.retryable = retryable;
|
||||
this.context = context;
|
||||
this.timestamp = new Date().toISOString();
|
||||
if (code !== undefined) {
|
||||
this.code = code;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Handle tool execution errors
|
||||
// Handle prompt loading errors
|
||||
export function handlePromptError(
|
||||
promptName: string,
|
||||
error: Error
|
||||
): PromptErrorResult {
|
||||
return {
|
||||
success: false,
|
||||
error: new PentestError(
|
||||
`Failed to load prompt '${promptName}': ${error.message}`,
|
||||
'prompt',
|
||||
false,
|
||||
{ promptName, originalError: error.message }
|
||||
),
|
||||
};
|
||||
}
|
||||
|
||||
// Patterns that indicate retryable errors
|
||||
const RETRYABLE_PATTERNS = [
|
||||
// Network and connection errors
|
||||
'network',
|
||||
'connection',
|
||||
'timeout',
|
||||
'econnreset',
|
||||
'enotfound',
|
||||
'econnrefused',
|
||||
// Rate limiting
|
||||
'rate limit',
|
||||
'429',
|
||||
'too many requests',
|
||||
// Server errors
|
||||
'server error',
|
||||
'5xx',
|
||||
'internal server error',
|
||||
'service unavailable',
|
||||
'bad gateway',
|
||||
// Claude API errors
|
||||
'mcp server',
|
||||
'model unavailable',
|
||||
'service temporarily unavailable',
|
||||
'api error',
|
||||
'terminated',
|
||||
// Max turns
|
||||
'max turns',
|
||||
'maximum turns',
|
||||
];
|
||||
|
||||
// Patterns that indicate non-retryable errors (checked before default)
|
||||
const NON_RETRYABLE_PATTERNS = [
|
||||
'authentication',
|
||||
'invalid prompt',
|
||||
'out of memory',
|
||||
'permission denied',
|
||||
'session limit reached',
|
||||
'invalid api key',
|
||||
];
|
||||
|
||||
// Conservative retry classification - unknown errors don't retry (fail-safe default)
|
||||
export function isRetryableError(error: Error): boolean {
|
||||
const message = error.message.toLowerCase();
|
||||
|
||||
// Check for explicit non-retryable patterns first
|
||||
if (NON_RETRYABLE_PATTERNS.some((pattern) => message.includes(pattern))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check for retryable patterns
|
||||
return RETRYABLE_PATTERNS.some((pattern) => message.includes(pattern));
|
||||
}
|
||||
|
||||
/**
|
||||
* Classifies errors by ErrorCode for reliable, code-based classification.
|
||||
* Used when error is a PentestError with a specific ErrorCode.
|
||||
*/
|
||||
function classifyByErrorCode(
|
||||
code: ErrorCode,
|
||||
retryableFromError: boolean
|
||||
): { type: string; retryable: boolean } {
|
||||
switch (code) {
|
||||
// Billing errors - retryable (wait for cap reset or credits added)
|
||||
case ErrorCode.SPENDING_CAP_REACHED:
|
||||
case ErrorCode.INSUFFICIENT_CREDITS:
|
||||
return { type: 'BillingError', retryable: true };
|
||||
|
||||
case ErrorCode.API_RATE_LIMITED:
|
||||
return { type: 'RateLimitError', retryable: true };
|
||||
|
||||
// Config errors - non-retryable (need manual fix)
|
||||
case ErrorCode.CONFIG_NOT_FOUND:
|
||||
case ErrorCode.CONFIG_VALIDATION_FAILED:
|
||||
case ErrorCode.CONFIG_PARSE_ERROR:
|
||||
return { type: 'ConfigurationError', retryable: false };
|
||||
|
||||
// Prompt errors - non-retryable (need manual fix)
|
||||
case ErrorCode.PROMPT_LOAD_FAILED:
|
||||
return { type: 'ConfigurationError', retryable: false };
|
||||
|
||||
// Git errors - non-retryable (indicates workspace corruption)
|
||||
case ErrorCode.GIT_CHECKPOINT_FAILED:
|
||||
case ErrorCode.GIT_ROLLBACK_FAILED:
|
||||
return { type: 'GitError', retryable: false };
|
||||
|
||||
// Validation errors - retryable (agent may succeed on retry)
|
||||
case ErrorCode.OUTPUT_VALIDATION_FAILED:
|
||||
case ErrorCode.DELIVERABLE_NOT_FOUND:
|
||||
return { type: 'OutputValidationError', retryable: true };
|
||||
|
||||
// Agent execution - use the retryable flag from the error
|
||||
case ErrorCode.AGENT_EXECUTION_FAILED:
|
||||
return { type: 'AgentExecutionError', retryable: retryableFromError };
|
||||
|
||||
default:
|
||||
// Unknown code - fall through to string matching
|
||||
return { type: 'UnknownError', retryable: retryableFromError };
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Classifies errors for Temporal workflow retry behavior.
|
||||
* Returns error type and whether Temporal should retry.
|
||||
*
|
||||
* Used by activities to wrap errors in ApplicationFailure:
|
||||
* - Retryable errors: Temporal retries with configured backoff
|
||||
* - Non-retryable errors: Temporal fails immediately
|
||||
*
|
||||
* Classification priority:
|
||||
* 1. If error is PentestError with ErrorCode, classify by code (reliable)
|
||||
* 2. Fall through to string matching for external errors (SDK, network, etc.)
|
||||
*/
|
||||
export function classifyErrorForTemporal(error: unknown): { type: string; retryable: boolean } {
|
||||
// === CODE-BASED CLASSIFICATION (Preferred for internal errors) ===
|
||||
if (error instanceof PentestError && error.code !== undefined) {
|
||||
return classifyByErrorCode(error.code, error.retryable);
|
||||
}
|
||||
|
||||
// === STRING-BASED CLASSIFICATION (Fallback for external errors) ===
|
||||
const message = (error instanceof Error ? error.message : String(error)).toLowerCase();
|
||||
|
||||
// === BILLING ERRORS (Retryable with long backoff) ===
|
||||
// Anthropic returns billing as 400 invalid_request_error
|
||||
// Human can add credits OR wait for spending cap to reset (5-30 min backoff)
|
||||
// Check both API patterns and text patterns for comprehensive detection
|
||||
if (matchesBillingApiPattern(message) || matchesBillingTextPattern(message)) {
|
||||
return { type: 'BillingError', retryable: true };
|
||||
}
|
||||
|
||||
// === PERMANENT ERRORS (Non-retryable) ===
|
||||
|
||||
// Authentication (401) - bad API key won't fix itself
|
||||
if (
|
||||
message.includes('authentication') ||
|
||||
message.includes('api key') ||
|
||||
message.includes('401') ||
|
||||
message.includes('authentication_error')
|
||||
) {
|
||||
return { type: 'AuthenticationError', retryable: false };
|
||||
}
|
||||
|
||||
// Permission (403) - access won't be granted
|
||||
if (
|
||||
message.includes('permission') ||
|
||||
message.includes('forbidden') ||
|
||||
message.includes('403')
|
||||
) {
|
||||
return { type: 'PermissionError', retryable: false };
|
||||
}
|
||||
|
||||
// === OUTPUT VALIDATION ERRORS (Retryable) ===
|
||||
// Agent didn't produce expected deliverables - retry may succeed
|
||||
// IMPORTANT: Must come BEFORE generic 'validation' check below
|
||||
if (
|
||||
message.includes('failed output validation') ||
|
||||
message.includes('output validation failed')
|
||||
) {
|
||||
return { type: 'OutputValidationError', retryable: true };
|
||||
}
|
||||
|
||||
// Invalid Request (400) - malformed request is permanent
|
||||
// Note: Checked AFTER billing and AFTER output validation
|
||||
if (
|
||||
message.includes('invalid_request_error') ||
|
||||
message.includes('malformed') ||
|
||||
message.includes('validation')
|
||||
) {
|
||||
return { type: 'InvalidRequestError', retryable: false };
|
||||
}
|
||||
|
||||
// Request Too Large (413) - won't fit no matter how many retries
|
||||
if (
|
||||
message.includes('request_too_large') ||
|
||||
message.includes('too large') ||
|
||||
message.includes('413')
|
||||
) {
|
||||
return { type: 'RequestTooLargeError', retryable: false };
|
||||
}
|
||||
|
||||
// Configuration errors - missing files need manual fix
|
||||
if (
|
||||
message.includes('enoent') ||
|
||||
message.includes('no such file') ||
|
||||
message.includes('cli not installed')
|
||||
) {
|
||||
return { type: 'ConfigurationError', retryable: false };
|
||||
}
|
||||
|
||||
// Execution limits - max turns/budget reached
|
||||
if (
|
||||
message.includes('max turns') ||
|
||||
message.includes('budget') ||
|
||||
message.includes('execution limit') ||
|
||||
message.includes('error_max_turns') ||
|
||||
message.includes('error_max_budget')
|
||||
) {
|
||||
return { type: 'ExecutionLimitError', retryable: false };
|
||||
}
|
||||
|
||||
// Invalid target URL - bad URL format won't fix itself
|
||||
if (
|
||||
message.includes('invalid url') ||
|
||||
message.includes('invalid target') ||
|
||||
message.includes('malformed url') ||
|
||||
message.includes('invalid uri')
|
||||
) {
|
||||
return { type: 'InvalidTargetError', retryable: false };
|
||||
}
|
||||
|
||||
// === TRANSIENT ERRORS (Retryable) ===
|
||||
// Rate limits (429), server errors (5xx), network issues
|
||||
// Let Temporal retry with configured backoff
|
||||
return { type: 'TransientError', retryable: true };
|
||||
}
|
||||
@@ -17,9 +17,9 @@ import {
|
||||
validateQueueSafe,
|
||||
type VulnType,
|
||||
type ExploitationDecision,
|
||||
} from '../queue-validation.js';
|
||||
} from './queue-validation.js';
|
||||
import { isOk } from '../types/result.js';
|
||||
import type { ActivityLogger } from '../temporal/activity-logger.js';
|
||||
import type { ActivityLogger } from '../types/activity-logger.js';
|
||||
|
||||
/**
|
||||
* Service for checking exploitation queue decisions.
|
||||
|
||||
@@ -0,0 +1,319 @@
|
||||
// Copyright (C) 2025 Keygraph, Inc.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License version 3
|
||||
// as published by the Free Software Foundation.
|
||||
|
||||
import { $ } from 'zx';
|
||||
import { PentestError } from './error-handling.js';
|
||||
import { ErrorCode } from '../types/errors.js';
|
||||
import type { ActivityLogger } from '../types/activity-logger.js';
|
||||
|
||||
/**
|
||||
* Check if a directory is a git repository.
|
||||
* Returns true if the directory contains a .git folder or is inside a git repo.
|
||||
*/
|
||||
export async function isGitRepository(dir: string): Promise<boolean> {
|
||||
try {
|
||||
await $`cd ${dir} && git rev-parse --git-dir`.quiet();
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
interface GitOperationResult {
|
||||
success: boolean;
|
||||
hadChanges?: boolean;
|
||||
error?: Error;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get list of changed files from git status --porcelain output
|
||||
*/
|
||||
async function getChangedFiles(
|
||||
sourceDir: string,
|
||||
operationDescription: string
|
||||
): Promise<string[]> {
|
||||
const status = await executeGitCommandWithRetry(
|
||||
['git', 'status', '--porcelain'],
|
||||
sourceDir,
|
||||
operationDescription
|
||||
);
|
||||
return status.stdout
|
||||
.trim()
|
||||
.split('\n')
|
||||
.filter((line) => line.length > 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Log a summary of changed files with truncation for long lists
|
||||
*/
|
||||
function logChangeSummary(
|
||||
changes: string[],
|
||||
messageWithChanges: string,
|
||||
messageWithoutChanges: string,
|
||||
logger: ActivityLogger,
|
||||
level: 'info' | 'warn' = 'info',
|
||||
maxToShow: number = 5
|
||||
): void {
|
||||
if (changes.length > 0) {
|
||||
const msg = messageWithChanges.replace('{count}', String(changes.length));
|
||||
const fileList = changes.slice(0, maxToShow).map((c) => ` ${c}`).join(', ');
|
||||
const suffix = changes.length > maxToShow
|
||||
? ` ... and ${changes.length - maxToShow} more files`
|
||||
: '';
|
||||
logger[level](`${msg} ${fileList}${suffix}`);
|
||||
} else {
|
||||
logger[level](messageWithoutChanges);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert unknown error to GitOperationResult
|
||||
*/
|
||||
function toErrorResult(error: unknown): GitOperationResult {
|
||||
const errMsg = error instanceof Error ? error.message : String(error);
|
||||
return {
|
||||
success: false,
|
||||
error: error instanceof Error ? error : new Error(errMsg),
|
||||
};
|
||||
}
|
||||
|
||||
// Serializes git operations to prevent index.lock conflicts during parallel agent execution
|
||||
class GitSemaphore {
|
||||
private queue: Array<() => void> = [];
|
||||
private running: boolean = false;
|
||||
|
||||
async acquire(): Promise<void> {
|
||||
return new Promise((resolve) => {
|
||||
this.queue.push(resolve);
|
||||
this.process();
|
||||
});
|
||||
}
|
||||
|
||||
release(): void {
|
||||
this.running = false;
|
||||
this.process();
|
||||
}
|
||||
|
||||
private process(): void {
|
||||
if (!this.running && this.queue.length > 0) {
|
||||
this.running = true;
|
||||
const resolve = this.queue.shift();
|
||||
resolve!();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const gitSemaphore = new GitSemaphore();
|
||||
|
||||
const GIT_LOCK_ERROR_PATTERNS = [
|
||||
'index.lock',
|
||||
'unable to lock',
|
||||
'Another git process',
|
||||
'fatal: Unable to create',
|
||||
'fatal: index file',
|
||||
];
|
||||
|
||||
function isGitLockError(errorMessage: string): boolean {
|
||||
return GIT_LOCK_ERROR_PATTERNS.some((pattern) => errorMessage.includes(pattern));
|
||||
}
|
||||
|
||||
// Retries git commands on lock conflicts with exponential backoff
|
||||
export async function executeGitCommandWithRetry(
|
||||
commandArgs: string[],
|
||||
sourceDir: string,
|
||||
description: string,
|
||||
maxRetries: number = 5
|
||||
): Promise<{ stdout: string; stderr: string }> {
|
||||
await gitSemaphore.acquire();
|
||||
|
||||
try {
|
||||
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
||||
try {
|
||||
const [cmd, ...args] = commandArgs;
|
||||
const result = await $`cd ${sourceDir} && ${cmd} ${args}`;
|
||||
return result;
|
||||
} catch (error) {
|
||||
const errMsg = error instanceof Error ? error.message : String(error);
|
||||
|
||||
if (isGitLockError(errMsg) && attempt < maxRetries) {
|
||||
const delay = Math.pow(2, attempt - 1) * 1000;
|
||||
// executeGitCommandWithRetry is also called outside activity context
|
||||
// (e.g., from resume logic), so we use console.warn as a fallback here
|
||||
console.warn(
|
||||
`Git lock conflict during ${description} (attempt ${attempt}/${maxRetries}). Retrying in ${delay}ms...`
|
||||
);
|
||||
await new Promise((resolve) => setTimeout(resolve, delay));
|
||||
continue;
|
||||
}
|
||||
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
throw new PentestError(
|
||||
`Git command failed after ${maxRetries} retries`,
|
||||
'filesystem',
|
||||
true, // Retryable - transient git lock issues
|
||||
{ maxRetries, description },
|
||||
ErrorCode.GIT_CHECKPOINT_FAILED
|
||||
);
|
||||
} finally {
|
||||
gitSemaphore.release();
|
||||
}
|
||||
}
|
||||
|
||||
// Two-phase reset: hard reset (tracked files) + clean (untracked files)
|
||||
export async function rollbackGitWorkspace(
|
||||
sourceDir: string,
|
||||
reason: string = 'retry preparation',
|
||||
logger: ActivityLogger
|
||||
): Promise<GitOperationResult> {
|
||||
// Skip git operations if not a git repository
|
||||
if (!(await isGitRepository(sourceDir))) {
|
||||
logger.info('Skipping git rollback (not a git repository)');
|
||||
return { success: true };
|
||||
}
|
||||
|
||||
logger.info(`Rolling back workspace for ${reason}`);
|
||||
try {
|
||||
const changes = await getChangedFiles(sourceDir, 'status check for rollback');
|
||||
|
||||
await executeGitCommandWithRetry(
|
||||
['git', 'reset', '--hard', 'HEAD'],
|
||||
sourceDir,
|
||||
'hard reset for rollback'
|
||||
);
|
||||
await executeGitCommandWithRetry(
|
||||
['git', 'clean', '-fd'],
|
||||
sourceDir,
|
||||
'cleaning untracked files for rollback'
|
||||
);
|
||||
|
||||
logChangeSummary(
|
||||
changes,
|
||||
'Rollback completed - removed {count} contaminated changes:',
|
||||
'Rollback completed - no changes to remove',
|
||||
logger,
|
||||
'info',
|
||||
3
|
||||
);
|
||||
return { success: true };
|
||||
} catch (error) {
|
||||
const errMsg = error instanceof Error ? error.message : String(error);
|
||||
logger.error(`Rollback failed after retries: ${errMsg}`);
|
||||
return {
|
||||
success: false,
|
||||
error: new PentestError(
|
||||
`Git rollback failed: ${errMsg}`,
|
||||
'filesystem',
|
||||
false, // Non-retryable - rollback is best-effort cleanup
|
||||
{ sourceDir, reason },
|
||||
ErrorCode.GIT_ROLLBACK_FAILED
|
||||
),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Creates checkpoint before each attempt. First attempt preserves workspace; retries clean it.
|
||||
export async function createGitCheckpoint(
|
||||
sourceDir: string,
|
||||
description: string,
|
||||
attempt: number,
|
||||
logger: ActivityLogger
|
||||
): Promise<GitOperationResult> {
|
||||
// Skip git operations if not a git repository
|
||||
if (!(await isGitRepository(sourceDir))) {
|
||||
logger.info('Skipping git checkpoint (not a git repository)');
|
||||
return { success: true };
|
||||
}
|
||||
|
||||
logger.info(`Creating checkpoint for ${description} (attempt ${attempt})`);
|
||||
try {
|
||||
// First attempt: preserve existing deliverables. Retries: clean workspace to prevent pollution
|
||||
if (attempt > 1) {
|
||||
const cleanResult = await rollbackGitWorkspace(sourceDir, `${description} (retry cleanup)`, logger);
|
||||
if (!cleanResult.success) {
|
||||
logger.warn(`Workspace cleanup failed, continuing anyway: ${cleanResult.error?.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
const changes = await getChangedFiles(sourceDir, 'status check');
|
||||
const hasChanges = changes.length > 0;
|
||||
|
||||
await executeGitCommandWithRetry(['git', 'add', '-A'], sourceDir, 'staging changes');
|
||||
await executeGitCommandWithRetry(
|
||||
['git', 'commit', '-m', `📍 Checkpoint: ${description} (attempt ${attempt})`, '--allow-empty'],
|
||||
sourceDir,
|
||||
'creating commit'
|
||||
);
|
||||
|
||||
if (hasChanges) {
|
||||
logger.info('Checkpoint created with uncommitted changes staged');
|
||||
} else {
|
||||
logger.info('Empty checkpoint created (no workspace changes)');
|
||||
}
|
||||
return { success: true };
|
||||
} catch (error) {
|
||||
const result = toErrorResult(error);
|
||||
logger.warn(`Checkpoint creation failed after retries: ${result.error?.message}`);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
export async function commitGitSuccess(
|
||||
sourceDir: string,
|
||||
description: string,
|
||||
logger: ActivityLogger
|
||||
): Promise<GitOperationResult> {
|
||||
// Skip git operations if not a git repository
|
||||
if (!(await isGitRepository(sourceDir))) {
|
||||
logger.info('Skipping git commit (not a git repository)');
|
||||
return { success: true };
|
||||
}
|
||||
|
||||
logger.info(`Committing successful results for ${description}`);
|
||||
try {
|
||||
const changes = await getChangedFiles(sourceDir, 'status check for success commit');
|
||||
|
||||
await executeGitCommandWithRetry(
|
||||
['git', 'add', '-A'],
|
||||
sourceDir,
|
||||
'staging changes for success commit'
|
||||
);
|
||||
await executeGitCommandWithRetry(
|
||||
['git', 'commit', '-m', `✅ ${description}: completed successfully`, '--allow-empty'],
|
||||
sourceDir,
|
||||
'creating success commit'
|
||||
);
|
||||
|
||||
logChangeSummary(
|
||||
changes,
|
||||
'Success commit created with {count} file changes:',
|
||||
'Empty success commit created (agent made no file changes)',
|
||||
logger
|
||||
);
|
||||
return { success: true };
|
||||
} catch (error) {
|
||||
const result = toErrorResult(error);
|
||||
logger.warn(`Success commit failed after retries: ${result.error?.message}`);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get current git commit hash.
|
||||
* Returns null if not a git repository.
|
||||
*/
|
||||
export async function getGitCommitHash(sourceDir: string): Promise<string | null> {
|
||||
if (!(await isGitRepository(sourceDir))) {
|
||||
return null;
|
||||
}
|
||||
try {
|
||||
const result = await $`cd ${sourceDir} && git rev-parse HEAD`;
|
||||
return result.stdout.trim();
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
@@ -18,3 +18,6 @@ export { ConfigLoaderService } from './config-loader.js';
|
||||
export { ExploitationCheckerService } from './exploitation-checker.js';
|
||||
export { AgentExecutionService } from './agent-execution.js';
|
||||
export type { AgentExecutionInput } from './agent-execution.js';
|
||||
|
||||
export { assembleFinalReport, injectModelIntoReport } from './reporting.js';
|
||||
export { loadPrompt } from './prompt-manager.js';
|
||||
|
||||
@@ -0,0 +1,267 @@
|
||||
// Copyright (C) 2025 Keygraph, Inc.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License version 3
|
||||
// as published by the Free Software Foundation.
|
||||
|
||||
import { fs, path } from 'zx';
|
||||
import { PentestError, handlePromptError } from './error-handling.js';
|
||||
import { MCP_AGENT_MAPPING } from '../session-manager.js';
|
||||
import type { Authentication, DistributedConfig } from '../types/config.js';
|
||||
import type { ActivityLogger } from '../types/activity-logger.js';
|
||||
|
||||
interface PromptVariables {
|
||||
webUrl: string;
|
||||
repoPath: string;
|
||||
MCP_SERVER?: string;
|
||||
}
|
||||
|
||||
interface IncludeReplacement {
|
||||
placeholder: string;
|
||||
content: string;
|
||||
}
|
||||
|
||||
// Pure function: Build complete login instructions from config
|
||||
async function buildLoginInstructions(authentication: Authentication, logger: ActivityLogger): Promise<string> {
|
||||
try {
|
||||
// Load the login instructions template
|
||||
const loginInstructionsPath = path.join(import.meta.dirname, '..', '..', 'prompts', 'shared', 'login-instructions.txt');
|
||||
|
||||
if (!await fs.pathExists(loginInstructionsPath)) {
|
||||
throw new PentestError(
|
||||
'Login instructions template not found',
|
||||
'filesystem',
|
||||
false,
|
||||
{ loginInstructionsPath }
|
||||
);
|
||||
}
|
||||
|
||||
const fullTemplate = await fs.readFile(loginInstructionsPath, 'utf8');
|
||||
|
||||
// Helper function to extract sections based on markers
|
||||
const getSection = (content: string, sectionName: string): string => {
|
||||
const regex = new RegExp(`<!-- BEGIN:${sectionName} -->([\\s\\S]*?)<!-- END:${sectionName} -->`, 'g');
|
||||
const match = regex.exec(content);
|
||||
return match ? match[1]!.trim() : '';
|
||||
};
|
||||
|
||||
// Extract sections based on login type
|
||||
const loginType = authentication.login_type?.toUpperCase();
|
||||
let loginInstructions = '';
|
||||
|
||||
// Build instructions with only relevant sections
|
||||
const commonSection = getSection(fullTemplate, 'COMMON');
|
||||
const authSection = loginType ? getSection(fullTemplate, loginType) : ''; // FORM or SSO
|
||||
const verificationSection = getSection(fullTemplate, 'VERIFICATION');
|
||||
|
||||
// Fallback to full template if markers are missing (backward compatibility)
|
||||
if (!commonSection && !authSection && !verificationSection) {
|
||||
logger.warn('Section markers not found, using full login instructions template');
|
||||
loginInstructions = fullTemplate;
|
||||
} else {
|
||||
// Combine relevant sections
|
||||
loginInstructions = [commonSection, authSection, verificationSection]
|
||||
.filter(section => section) // Remove empty sections
|
||||
.join('\n\n');
|
||||
}
|
||||
|
||||
// Replace the user instructions placeholder with the login flow from config
|
||||
let userInstructions = (authentication.login_flow ?? []).join('\n');
|
||||
|
||||
// Replace credential placeholders within the user instructions
|
||||
if (authentication.credentials) {
|
||||
if (authentication.credentials.username) {
|
||||
userInstructions = userInstructions.replace(/\$username/g, authentication.credentials.username);
|
||||
}
|
||||
if (authentication.credentials.password) {
|
||||
userInstructions = userInstructions.replace(/\$password/g, authentication.credentials.password);
|
||||
}
|
||||
if (authentication.credentials.totp_secret) {
|
||||
userInstructions = userInstructions.replace(/\$totp/g, `generated TOTP code using secret "${authentication.credentials.totp_secret}"`);
|
||||
}
|
||||
}
|
||||
|
||||
loginInstructions = loginInstructions.replace(/{{user_instructions}}/g, userInstructions);
|
||||
|
||||
// Replace TOTP secret placeholder if present in template
|
||||
if (authentication.credentials?.totp_secret) {
|
||||
loginInstructions = loginInstructions.replace(/{{totp_secret}}/g, authentication.credentials.totp_secret);
|
||||
}
|
||||
|
||||
return loginInstructions;
|
||||
} catch (error) {
|
||||
if (error instanceof PentestError) {
|
||||
throw error;
|
||||
}
|
||||
const errMsg = error instanceof Error ? error.message : String(error);
|
||||
throw new PentestError(
|
||||
`Failed to build login instructions: ${errMsg}`,
|
||||
'config',
|
||||
false,
|
||||
{ authentication, originalError: errMsg }
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Pure function: Process @include() directives
|
||||
async function processIncludes(content: string, baseDir: string): Promise<string> {
|
||||
const includeRegex = /@include\(([^)]+)\)/g;
|
||||
// Use a Promise.all to handle all includes concurrently
|
||||
const replacements: IncludeReplacement[] = await Promise.all(
|
||||
Array.from(content.matchAll(includeRegex)).map(async (match) => {
|
||||
const includePath = path.join(baseDir, match[1]!);
|
||||
const sharedContent = await fs.readFile(includePath, 'utf8');
|
||||
return {
|
||||
placeholder: match[0],
|
||||
content: sharedContent,
|
||||
};
|
||||
})
|
||||
);
|
||||
|
||||
for (const replacement of replacements) {
|
||||
content = content.replace(replacement.placeholder, replacement.content);
|
||||
}
|
||||
return content;
|
||||
}
|
||||
|
||||
// Pure function: Variable interpolation
|
||||
async function interpolateVariables(
|
||||
template: string,
|
||||
variables: PromptVariables,
|
||||
config: DistributedConfig | null = null,
|
||||
logger: ActivityLogger
|
||||
): Promise<string> {
|
||||
try {
|
||||
if (!template || typeof template !== 'string') {
|
||||
throw new PentestError(
|
||||
'Template must be a non-empty string',
|
||||
'validation',
|
||||
false,
|
||||
{ templateType: typeof template, templateLength: template?.length }
|
||||
);
|
||||
}
|
||||
|
||||
if (!variables || !variables.webUrl || !variables.repoPath) {
|
||||
throw new PentestError(
|
||||
'Variables must include webUrl and repoPath',
|
||||
'validation',
|
||||
false,
|
||||
{ variables: Object.keys(variables || {}) }
|
||||
);
|
||||
}
|
||||
|
||||
let result = template
|
||||
.replace(/{{WEB_URL}}/g, variables.webUrl)
|
||||
.replace(/{{REPO_PATH}}/g, variables.repoPath)
|
||||
.replace(/{{MCP_SERVER}}/g, variables.MCP_SERVER || 'playwright-agent1');
|
||||
|
||||
if (config) {
|
||||
// Handle rules section - if both are empty, use cleaner messaging
|
||||
const hasAvoidRules = config.avoid && config.avoid.length > 0;
|
||||
const hasFocusRules = config.focus && config.focus.length > 0;
|
||||
|
||||
if (!hasAvoidRules && !hasFocusRules) {
|
||||
// Replace the entire rules section with a clean message
|
||||
const cleanRulesSection = '<rules>\nNo specific rules or focus areas provided for this test.\n</rules>';
|
||||
result = result.replace(/<rules>[\s\S]*?<\/rules>/g, cleanRulesSection);
|
||||
} else {
|
||||
const avoidRules = hasAvoidRules ? config.avoid!.map(r => `- ${r.description}`).join('\n') : 'None';
|
||||
const focusRules = hasFocusRules ? config.focus!.map(r => `- ${r.description}`).join('\n') : 'None';
|
||||
|
||||
result = result
|
||||
.replace(/{{RULES_AVOID}}/g, avoidRules)
|
||||
.replace(/{{RULES_FOCUS}}/g, focusRules);
|
||||
}
|
||||
|
||||
// Extract and inject login instructions from config
|
||||
if (config.authentication?.login_flow) {
|
||||
const loginInstructions = await buildLoginInstructions(config.authentication, logger);
|
||||
result = result.replace(/{{LOGIN_INSTRUCTIONS}}/g, loginInstructions);
|
||||
} else {
|
||||
result = result.replace(/{{LOGIN_INSTRUCTIONS}}/g, '');
|
||||
}
|
||||
} else {
|
||||
// Replace the entire rules section with a clean message when no config provided
|
||||
const cleanRulesSection = '<rules>\nNo specific rules or focus areas provided for this test.\n</rules>';
|
||||
result = result.replace(/<rules>[\s\S]*?<\/rules>/g, cleanRulesSection);
|
||||
result = result.replace(/{{LOGIN_INSTRUCTIONS}}/g, '');
|
||||
}
|
||||
|
||||
// Validate that all placeholders have been replaced (excluding instructional text)
|
||||
const remainingPlaceholders = result.match(/\{\{[^}]+\}\}/g);
|
||||
if (remainingPlaceholders) {
|
||||
logger.warn(`Found unresolved placeholders in prompt: ${remainingPlaceholders.join(', ')}`);
|
||||
}
|
||||
|
||||
return result;
|
||||
} catch (error) {
|
||||
if (error instanceof PentestError) {
|
||||
throw error;
|
||||
}
|
||||
const errMsg = error instanceof Error ? error.message : String(error);
|
||||
throw new PentestError(
|
||||
`Variable interpolation failed: ${errMsg}`,
|
||||
'prompt',
|
||||
false,
|
||||
{ originalError: errMsg }
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Pure function: Load and interpolate prompt template
|
||||
export async function loadPrompt(
|
||||
promptName: string,
|
||||
variables: PromptVariables,
|
||||
config: DistributedConfig | null = null,
|
||||
pipelineTestingMode: boolean = false,
|
||||
logger: ActivityLogger
|
||||
): Promise<string> {
|
||||
try {
|
||||
// Use pipeline testing prompts if pipeline testing mode is enabled
|
||||
const baseDir = pipelineTestingMode ? 'prompts/pipeline-testing' : 'prompts';
|
||||
const promptsDir = path.join(import.meta.dirname, '..', '..', baseDir);
|
||||
const promptPath = path.join(promptsDir, `${promptName}.txt`);
|
||||
|
||||
// Debug message for pipeline testing mode
|
||||
if (pipelineTestingMode) {
|
||||
logger.info(`Using pipeline testing prompt: ${promptPath}`);
|
||||
}
|
||||
|
||||
// Check if file exists first
|
||||
if (!await fs.pathExists(promptPath)) {
|
||||
throw new PentestError(
|
||||
`Prompt file not found: ${promptPath}`,
|
||||
'prompt',
|
||||
false,
|
||||
{ promptName, promptPath }
|
||||
);
|
||||
}
|
||||
|
||||
// Add MCP server assignment to variables
|
||||
const enhancedVariables: PromptVariables = { ...variables };
|
||||
|
||||
// Assign MCP server based on prompt name (agent name)
|
||||
const mcpServer = MCP_AGENT_MAPPING[promptName as keyof typeof MCP_AGENT_MAPPING];
|
||||
if (mcpServer) {
|
||||
enhancedVariables.MCP_SERVER = mcpServer;
|
||||
logger.info(`Assigned ${promptName} -> ${enhancedVariables.MCP_SERVER}`);
|
||||
} else {
|
||||
// Fallback for unknown agents
|
||||
enhancedVariables.MCP_SERVER = 'playwright-agent1';
|
||||
logger.warn(`Unknown agent ${promptName}, using fallback -> ${enhancedVariables.MCP_SERVER}`);
|
||||
}
|
||||
|
||||
let template = await fs.readFile(promptPath, 'utf8');
|
||||
|
||||
// Pre-process the template to handle @include directives
|
||||
template = await processIncludes(template, promptsDir);
|
||||
|
||||
return await interpolateVariables(template, enhancedVariables, config, logger);
|
||||
} catch (error) {
|
||||
if (error instanceof PentestError) {
|
||||
throw error;
|
||||
}
|
||||
const promptError = handlePromptError(promptName, error as Error);
|
||||
throw promptError.error;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,325 @@
|
||||
// Copyright (C) 2025 Keygraph, Inc.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License version 3
|
||||
// as published by the Free Software Foundation.
|
||||
|
||||
import { fs, path } from 'zx';
|
||||
import { PentestError } from './error-handling.js';
|
||||
import { ErrorCode } from '../types/errors.js';
|
||||
import { type Result, ok, err } from '../types/result.js';
|
||||
import { asyncPipe } from '../utils/functional.js';
|
||||
import type { VulnType, ExploitationDecision } from '../types/agents.js';
|
||||
|
||||
export type { VulnType, ExploitationDecision } from '../types/agents.js';
|
||||
|
||||
interface VulnTypeConfigItem {
|
||||
deliverable: string;
|
||||
queue: string;
|
||||
}
|
||||
|
||||
type VulnTypeConfig = Record<VulnType, VulnTypeConfigItem>;
|
||||
|
||||
type ErrorMessageResolver = string | ((existence: FileExistence) => string);
|
||||
|
||||
interface ValidationRule {
|
||||
predicate: (existence: FileExistence) => boolean;
|
||||
errorMessage: ErrorMessageResolver;
|
||||
retryable: boolean;
|
||||
}
|
||||
|
||||
interface FileExistence {
|
||||
deliverableExists: boolean;
|
||||
queueExists: boolean;
|
||||
}
|
||||
|
||||
interface PathsBase {
|
||||
vulnType: VulnType;
|
||||
deliverable: string;
|
||||
queue: string;
|
||||
sourceDir: string;
|
||||
}
|
||||
|
||||
interface PathsWithExistence extends PathsBase {
|
||||
existence: FileExistence;
|
||||
}
|
||||
|
||||
interface PathsWithQueue extends PathsWithExistence {
|
||||
queueData: QueueData;
|
||||
}
|
||||
|
||||
interface PathsWithError {
|
||||
error: PentestError;
|
||||
}
|
||||
|
||||
interface QueueData {
|
||||
vulnerabilities: unknown[];
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
interface QueueValidationResult {
|
||||
valid: boolean;
|
||||
data: QueueData | null;
|
||||
error: string | null;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Result type for safe validation - explicit error handling.
|
||||
*/
|
||||
export type SafeValidationResult = Result<ExploitationDecision, PentestError>;
|
||||
|
||||
// Vulnerability type configuration as immutable data
|
||||
const VULN_TYPE_CONFIG: VulnTypeConfig = Object.freeze({
|
||||
injection: Object.freeze({
|
||||
deliverable: 'injection_analysis_deliverable.md',
|
||||
queue: 'injection_exploitation_queue.json',
|
||||
}),
|
||||
xss: Object.freeze({
|
||||
deliverable: 'xss_analysis_deliverable.md',
|
||||
queue: 'xss_exploitation_queue.json',
|
||||
}),
|
||||
auth: Object.freeze({
|
||||
deliverable: 'auth_analysis_deliverable.md',
|
||||
queue: 'auth_exploitation_queue.json',
|
||||
}),
|
||||
ssrf: Object.freeze({
|
||||
deliverable: 'ssrf_analysis_deliverable.md',
|
||||
queue: 'ssrf_exploitation_queue.json',
|
||||
}),
|
||||
authz: Object.freeze({
|
||||
deliverable: 'authz_analysis_deliverable.md',
|
||||
queue: 'authz_exploitation_queue.json',
|
||||
}),
|
||||
}) as VulnTypeConfig;
|
||||
|
||||
// Pure function to create validation rule
|
||||
function createValidationRule(
|
||||
predicate: (existence: FileExistence) => boolean,
|
||||
errorMessage: ErrorMessageResolver,
|
||||
retryable: boolean = true
|
||||
): ValidationRule {
|
||||
return Object.freeze({ predicate, errorMessage, retryable });
|
||||
}
|
||||
|
||||
// Symmetric deliverable rules: queue and deliverable must exist together (prevents partial analysis from triggering exploitation)
|
||||
const fileExistenceRules: readonly ValidationRule[] = Object.freeze([
|
||||
createValidationRule(
|
||||
({ deliverableExists, queueExists }) => deliverableExists && queueExists,
|
||||
getExistenceErrorMessage
|
||||
),
|
||||
]);
|
||||
|
||||
// Generate appropriate error message based on which files are missing
|
||||
function getExistenceErrorMessage(existence: FileExistence): string {
|
||||
const { deliverableExists, queueExists } = existence;
|
||||
|
||||
if (!deliverableExists && !queueExists) {
|
||||
return 'Analysis failed: Neither deliverable nor queue file exists. Analysis agent must create both files.';
|
||||
}
|
||||
if (!queueExists) {
|
||||
return 'Analysis incomplete: Deliverable exists but queue file missing. Analysis agent must create both files.';
|
||||
}
|
||||
return 'Analysis incomplete: Queue exists but deliverable file missing. Analysis agent must create both files.';
|
||||
}
|
||||
|
||||
// Pure function to create file paths
|
||||
const createPaths = (
|
||||
vulnType: VulnType,
|
||||
sourceDir: string
|
||||
): PathsBase | PathsWithError => {
|
||||
const config = VULN_TYPE_CONFIG[vulnType];
|
||||
if (!config) {
|
||||
return {
|
||||
error: new PentestError(
|
||||
`Unknown vulnerability type: ${vulnType}`,
|
||||
'validation',
|
||||
false,
|
||||
{ vulnType }
|
||||
),
|
||||
};
|
||||
}
|
||||
|
||||
return Object.freeze({
|
||||
vulnType,
|
||||
deliverable: path.join(sourceDir, 'deliverables', config.deliverable),
|
||||
queue: path.join(sourceDir, 'deliverables', config.queue),
|
||||
sourceDir,
|
||||
});
|
||||
};
|
||||
|
||||
// Pure function to check file existence
|
||||
const checkFileExistence = async (
|
||||
paths: PathsBase | PathsWithError
|
||||
): Promise<PathsWithExistence | PathsWithError> => {
|
||||
if ('error' in paths) return paths;
|
||||
|
||||
const [deliverableExists, queueExists] = await Promise.all([
|
||||
fs.pathExists(paths.deliverable),
|
||||
fs.pathExists(paths.queue),
|
||||
]);
|
||||
|
||||
return Object.freeze({
|
||||
...paths,
|
||||
existence: Object.freeze({ deliverableExists, queueExists }),
|
||||
});
|
||||
};
|
||||
|
||||
// Validates deliverable/queue symmetry - both must exist or neither
|
||||
const validateExistenceRules = (
|
||||
pathsWithExistence: PathsWithExistence | PathsWithError
|
||||
): PathsWithExistence | PathsWithError => {
|
||||
if ('error' in pathsWithExistence) return pathsWithExistence;
|
||||
|
||||
const { existence, vulnType } = pathsWithExistence;
|
||||
|
||||
// Find the first rule that fails
|
||||
const failedRule = fileExistenceRules.find((rule) => !rule.predicate(existence));
|
||||
|
||||
if (failedRule) {
|
||||
const message =
|
||||
typeof failedRule.errorMessage === 'function'
|
||||
? failedRule.errorMessage(existence)
|
||||
: failedRule.errorMessage;
|
||||
|
||||
return {
|
||||
error: new PentestError(
|
||||
`${message} (${vulnType})`,
|
||||
'validation',
|
||||
failedRule.retryable,
|
||||
{
|
||||
vulnType,
|
||||
deliverablePath: pathsWithExistence.deliverable,
|
||||
queuePath: pathsWithExistence.queue,
|
||||
existence,
|
||||
},
|
||||
ErrorCode.DELIVERABLE_NOT_FOUND
|
||||
),
|
||||
};
|
||||
}
|
||||
|
||||
return pathsWithExistence;
|
||||
};
|
||||
|
||||
// Pure function to validate queue structure
|
||||
const validateQueueStructure = (content: string): QueueValidationResult => {
|
||||
try {
|
||||
const parsed = JSON.parse(content) as unknown;
|
||||
const isValid =
|
||||
typeof parsed === 'object' &&
|
||||
parsed !== null &&
|
||||
'vulnerabilities' in parsed &&
|
||||
Array.isArray((parsed as QueueData).vulnerabilities);
|
||||
|
||||
return Object.freeze({
|
||||
valid: isValid,
|
||||
data: isValid ? (parsed as QueueData) : null,
|
||||
error: null,
|
||||
});
|
||||
} catch (parseError) {
|
||||
return Object.freeze({
|
||||
valid: false,
|
||||
data: null,
|
||||
error: parseError instanceof Error ? parseError.message : String(parseError),
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
// Queue parse failures are retryable - agent can fix malformed JSON on retry
|
||||
const validateQueueContent = async (
|
||||
pathsWithExistence: PathsWithExistence | PathsWithError
|
||||
): Promise<PathsWithQueue | PathsWithError> => {
|
||||
if ('error' in pathsWithExistence) return pathsWithExistence;
|
||||
|
||||
try {
|
||||
const queueContent = await fs.readFile(pathsWithExistence.queue, 'utf8');
|
||||
const queueValidation = validateQueueStructure(queueContent);
|
||||
|
||||
if (!queueValidation.valid) {
|
||||
// Rule 6: Both exist, queue invalid
|
||||
return {
|
||||
error: new PentestError(
|
||||
queueValidation.error
|
||||
? `Queue validation failed for ${pathsWithExistence.vulnType}: Invalid JSON structure. Analysis agent must fix queue format.`
|
||||
: `Queue validation failed for ${pathsWithExistence.vulnType}: Missing or invalid 'vulnerabilities' array. Analysis agent must fix queue structure.`,
|
||||
'validation',
|
||||
true, // retryable
|
||||
{
|
||||
vulnType: pathsWithExistence.vulnType,
|
||||
queuePath: pathsWithExistence.queue,
|
||||
originalError: queueValidation.error,
|
||||
queueStructure: queueValidation.data ? Object.keys(queueValidation.data) : [],
|
||||
}
|
||||
),
|
||||
};
|
||||
}
|
||||
|
||||
return Object.freeze({
|
||||
...pathsWithExistence,
|
||||
queueData: queueValidation.data!,
|
||||
});
|
||||
} catch (readError) {
|
||||
return {
|
||||
error: new PentestError(
|
||||
`Failed to read queue file for ${pathsWithExistence.vulnType}: ${readError instanceof Error ? readError.message : String(readError)}`,
|
||||
'filesystem',
|
||||
false,
|
||||
{
|
||||
vulnType: pathsWithExistence.vulnType,
|
||||
queuePath: pathsWithExistence.queue,
|
||||
originalError: readError instanceof Error ? readError.message : String(readError),
|
||||
}
|
||||
),
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
// Final decision: skip if queue says no vulns, proceed if vulns found, error otherwise
|
||||
const determineExploitationDecision = (
|
||||
validatedData: PathsWithQueue | PathsWithError
|
||||
): ExploitationDecision => {
|
||||
if ('error' in validatedData) {
|
||||
throw validatedData.error;
|
||||
}
|
||||
|
||||
const hasVulnerabilities = validatedData.queueData.vulnerabilities.length > 0;
|
||||
|
||||
// Rule 4: Both exist, queue valid and populated
|
||||
// Rule 5: Both exist, queue valid but empty
|
||||
return Object.freeze({
|
||||
shouldExploit: hasVulnerabilities,
|
||||
shouldRetry: false,
|
||||
vulnerabilityCount: validatedData.queueData.vulnerabilities.length,
|
||||
vulnType: validatedData.vulnType,
|
||||
});
|
||||
};
|
||||
|
||||
// Main functional validation pipeline
|
||||
export async function validateQueueAndDeliverable(
|
||||
vulnType: VulnType,
|
||||
sourceDir: string
|
||||
): Promise<ExploitationDecision> {
|
||||
return asyncPipe<ExploitationDecision>(
|
||||
createPaths(vulnType, sourceDir),
|
||||
checkFileExistence,
|
||||
validateExistenceRules,
|
||||
validateQueueContent,
|
||||
determineExploitationDecision
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Safely validate queue and deliverable files.
|
||||
* Returns Result<ExploitationDecision, PentestError> for explicit error handling.
|
||||
*/
|
||||
export async function validateQueueSafe(
|
||||
vulnType: VulnType,
|
||||
sourceDir: string
|
||||
): Promise<SafeValidationResult> {
|
||||
try {
|
||||
const result = await validateQueueAndDeliverable(vulnType, sourceDir);
|
||||
return ok(result);
|
||||
} catch (error) {
|
||||
return err(error as PentestError);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,162 @@
|
||||
// Copyright (C) 2025 Keygraph, Inc.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License version 3
|
||||
// as published by the Free Software Foundation.
|
||||
|
||||
import { fs, path } from 'zx';
|
||||
import { PentestError } from './error-handling.js';
|
||||
import { ErrorCode } from '../types/errors.js';
|
||||
import type { ActivityLogger } from '../types/activity-logger.js';
|
||||
|
||||
interface DeliverableFile {
|
||||
name: string;
|
||||
path: string;
|
||||
required: boolean;
|
||||
}
|
||||
|
||||
// Pure function: Assemble final report from specialist deliverables
|
||||
export async function assembleFinalReport(sourceDir: string, logger: ActivityLogger): Promise<string> {
|
||||
const deliverableFiles: DeliverableFile[] = [
|
||||
{ name: 'Injection', path: 'injection_exploitation_evidence.md', required: false },
|
||||
{ name: 'XSS', path: 'xss_exploitation_evidence.md', required: false },
|
||||
{ name: 'Authentication', path: 'auth_exploitation_evidence.md', required: false },
|
||||
{ name: 'SSRF', path: 'ssrf_exploitation_evidence.md', required: false },
|
||||
{ name: 'Authorization', path: 'authz_exploitation_evidence.md', required: false }
|
||||
];
|
||||
|
||||
const sections: string[] = [];
|
||||
|
||||
for (const file of deliverableFiles) {
|
||||
const filePath = path.join(sourceDir, 'deliverables', file.path);
|
||||
try {
|
||||
if (await fs.pathExists(filePath)) {
|
||||
const content = await fs.readFile(filePath, 'utf8');
|
||||
sections.push(content);
|
||||
logger.info(`Added ${file.name} findings`);
|
||||
} else if (file.required) {
|
||||
throw new PentestError(
|
||||
`Required deliverable file not found: ${file.path}`,
|
||||
'filesystem',
|
||||
false,
|
||||
{ deliverableFile: file.path, sourceDir },
|
||||
ErrorCode.DELIVERABLE_NOT_FOUND
|
||||
);
|
||||
} else {
|
||||
logger.info(`No ${file.name} deliverable found`);
|
||||
}
|
||||
} catch (error) {
|
||||
if (file.required) {
|
||||
throw error;
|
||||
}
|
||||
const err = error as Error;
|
||||
logger.warn(`Could not read ${file.path}: ${err.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
const finalContent = sections.join('\n\n');
|
||||
const deliverablesDir = path.join(sourceDir, 'deliverables');
|
||||
const finalReportPath = path.join(deliverablesDir, 'comprehensive_security_assessment_report.md');
|
||||
|
||||
try {
|
||||
// Ensure deliverables directory exists
|
||||
await fs.ensureDir(deliverablesDir);
|
||||
await fs.writeFile(finalReportPath, finalContent);
|
||||
logger.info(`Final report assembled at ${finalReportPath}`);
|
||||
} catch (error) {
|
||||
const err = error as Error;
|
||||
throw new PentestError(
|
||||
`Failed to write final report: ${err.message}`,
|
||||
'filesystem',
|
||||
false,
|
||||
{ finalReportPath, originalError: err.message }
|
||||
);
|
||||
}
|
||||
|
||||
return finalContent;
|
||||
}
|
||||
|
||||
/**
|
||||
* Inject model information into the final security report.
|
||||
* Reads session.json to get the model(s) used, then injects a "Model:" line
|
||||
* into the Executive Summary section of the report.
|
||||
*/
|
||||
export async function injectModelIntoReport(
|
||||
repoPath: string,
|
||||
outputPath: string,
|
||||
logger: ActivityLogger
|
||||
): Promise<void> {
|
||||
// 1. Read session.json to get model information
|
||||
const sessionJsonPath = path.join(outputPath, 'session.json');
|
||||
|
||||
if (!(await fs.pathExists(sessionJsonPath))) {
|
||||
logger.warn('session.json not found, skipping model injection');
|
||||
return;
|
||||
}
|
||||
|
||||
interface SessionData {
|
||||
metrics: {
|
||||
agents: Record<string, { model?: string }>;
|
||||
};
|
||||
}
|
||||
|
||||
const sessionData: SessionData = await fs.readJson(sessionJsonPath);
|
||||
|
||||
// 2. Extract unique models from all agents
|
||||
const models = new Set<string>();
|
||||
for (const agent of Object.values(sessionData.metrics.agents)) {
|
||||
if (agent.model) {
|
||||
models.add(agent.model);
|
||||
}
|
||||
}
|
||||
|
||||
if (models.size === 0) {
|
||||
logger.warn('No model information found in session.json');
|
||||
return;
|
||||
}
|
||||
|
||||
const modelStr = Array.from(models).join(', ');
|
||||
logger.info(`Injecting model info into report: ${modelStr}`);
|
||||
|
||||
// 3. Read the final report
|
||||
const reportPath = path.join(repoPath, 'deliverables', 'comprehensive_security_assessment_report.md');
|
||||
|
||||
if (!(await fs.pathExists(reportPath))) {
|
||||
logger.warn('Final report not found, skipping model injection');
|
||||
return;
|
||||
}
|
||||
|
||||
let reportContent = await fs.readFile(reportPath, 'utf8');
|
||||
|
||||
// 4. Find and inject model line after "Assessment Date" in Executive Summary
|
||||
// Pattern: "- Assessment Date: <date>" followed by a newline
|
||||
const assessmentDatePattern = /^(- Assessment Date: .+)$/m;
|
||||
const match = reportContent.match(assessmentDatePattern);
|
||||
|
||||
if (match) {
|
||||
// Inject model line after Assessment Date
|
||||
const modelLine = `- Model: ${modelStr}`;
|
||||
reportContent = reportContent.replace(
|
||||
assessmentDatePattern,
|
||||
`$1\n${modelLine}`
|
||||
);
|
||||
logger.info('Model info injected into Executive Summary');
|
||||
} else {
|
||||
// If no Assessment Date line found, try to add after Executive Summary header
|
||||
const execSummaryPattern = /^## Executive Summary$/m;
|
||||
if (reportContent.match(execSummaryPattern)) {
|
||||
// Add model as first item in Executive Summary
|
||||
reportContent = reportContent.replace(
|
||||
execSummaryPattern,
|
||||
`## Executive Summary\n- Model: ${modelStr}`
|
||||
);
|
||||
logger.info('Model info added to Executive Summary header');
|
||||
} else {
|
||||
logger.warn('Could not find Executive Summary section');
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// 5. Write modified report back
|
||||
await fs.writeFile(reportPath, reportContent);
|
||||
}
|
||||
Reference in New Issue
Block a user