1 Commits

Author SHA1 Message Date
Chris Farhood 47a6e4933a feat: backport auth-validation preflight + email_login credentials
CI / Type-check & lint (pull_request) Successful in 16s
CI / Build & push worker image (pull_request) Has been skipped
CI / Build & push API image (pull_request) Has been skipped
Backport upstream Shannon PR #335:
- Add credential validation activity that drives a real browser login
  before the full pipeline, catching bad credentials early
- New email_login credentials type for magic-link and email-OTP flows
- Make credentials.password optional for passwordless flows
- Playwright stealth config (chrome.runtime, plugin simulation, UA)
- Centralize prompt directory resolution into resolvePromptDir helper
- New AUTH_LOGIN_FAILED error code with non-retryable classification
- Remove dangerous-pattern validation on credentials.password
- Pipeline-testing stub for auth validation (returns success)
- Auth validation timeout of 10 minutes for browser-based login
- .playwright directory workspace overlay for CLI/Docker

Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-05-20 00:59:27 +00:00
40 changed files with 679 additions and 1233 deletions
+2 -1
View File
@@ -65,7 +65,7 @@ export async function start(args: StartArgs): Promise<void> {
const workspacePath = path.join(workspacesDir, workspace);
fs.mkdirSync(workspacePath, { recursive: true });
fs.chmodSync(workspacePath, 0o777);
for (const dir of ['deliverables', 'scratchpad', '.playwright-cli']) {
for (const dir of ['deliverables', 'scratchpad', '.playwright-cli', '.playwright']) {
const dirPath = path.join(workspacePath, dir);
fs.mkdirSync(dirPath, { recursive: true });
fs.chmodSync(dirPath, 0o777);
@@ -76,6 +76,7 @@ export async function start(args: StartArgs): Promise<void> {
for (const dir of ['deliverables', 'scratchpad', '.playwright-cli']) {
fs.mkdirSync(path.join(shannonDir, dir), { recursive: true });
}
fs.mkdirSync(path.join(repo.hostPath, '.playwright'), { recursive: true });
const credentialsPath = getCredentialsPath();
const hasCredentials = fs.existsSync(credentialsPath);
+2 -1
View File
@@ -186,11 +186,12 @@ export function spawnWorker(opts: WorkerOptions): ChildProcess {
args.push('-v', `${opts.workspacesDir}:/app/workspaces`);
args.push('-v', `${opts.repo.hostPath}:${opts.repo.containerPath}:ro`);
// Writable overlays: shadow .shannon/ inside the :ro repo with workspace-backed dirs
// Writable overlays: shadow .shannon/ and .playwright/ inside the :ro repo with workspace-backed dirs
const workspacePath = path.join(opts.workspacesDir, opts.workspace);
args.push('-v', `${path.join(workspacePath, 'deliverables')}:${opts.repo.containerPath}/.shannon/deliverables`);
args.push('-v', `${path.join(workspacePath, 'scratchpad')}:${opts.repo.containerPath}/.shannon/scratchpad`);
args.push('-v', `${path.join(workspacePath, '.playwright-cli')}:${opts.repo.containerPath}/.shannon/.playwright-cli`);
args.push('-v', `${path.join(workspacePath, '.playwright')}:${opts.repo.containerPath}/.playwright`);
// Local mode: mount prompts for live editing
if (opts.promptsDir) {
+31 -56
View File
@@ -39,9 +39,33 @@
"type": "string",
"pattern": "^[A-Za-z2-7]+=*$",
"description": "TOTP secret for two-factor authentication (Base32 encoded, case insensitive)"
},
"email_login": {
"type": "object",
"description": "Email-based login credentials for magic-link and email-OTP flows",
"properties": {
"address": {
"type": "string",
"format": "email",
"description": "Email address for authentication"
},
"password": {
"type": "string",
"minLength": 1,
"maxLength": 255,
"description": "Password for the email account"
},
"totp_secret": {
"type": "string",
"pattern": "^[A-Za-z2-7]+=*$",
"description": "TOTP secret for email 2FA (Base32 encoded)"
}
},
"required": ["address", "password"],
"additionalProperties": false
}
},
"required": ["username", "password"],
"required": ["username"],
"additionalProperties": false
},
"login_flow": {
@@ -118,51 +142,6 @@
},
"additionalProperties": false
},
"vuln_classes": {
"type": "array",
"description": "Vulnerability classes to test. When omitted, all five classes run. When set, only listed classes run; their vuln+exploit agents and report sections are included.",
"items": {
"type": "string",
"enum": ["injection", "xss", "auth", "authz", "ssrf"]
},
"minItems": 1,
"maxItems": 5,
"uniqueItems": true
},
"exploit": {
"type": "string",
"enum": ["true", "false"],
"description": "Whether to run the exploitation phase (default true). Set false to run only analysis."
},
"report": {
"type": "object",
"description": "Report filtering and guidance applied by the report agent.",
"properties": {
"min_severity": {
"type": "string",
"enum": ["low", "medium", "high", "critical"],
"description": "Minimum severity threshold; findings below are dropped by the report agent."
},
"min_confidence": {
"type": "string",
"enum": ["low", "medium", "high"],
"description": "Minimum confidence threshold; findings below are dropped by the report agent."
},
"guidance": {
"type": "string",
"minLength": 1,
"maxLength": 500,
"description": "Free-text guidance to the report agent (e.g., 'Drop findings about missing security headers')."
}
},
"additionalProperties": false
},
"rules_of_engagement": {
"type": "string",
"minLength": 1,
"maxLength": 1000,
"description": "Free-text instructions to the agent that render into every prompt."
},
"login": {
"type": "object",
"description": "Deprecated: Use 'authentication' section instead",
@@ -180,11 +159,7 @@
{ "required": ["authentication"] },
{ "required": ["rules"] },
{ "required": ["authentication", "rules"] },
{ "required": ["description"] },
{ "required": ["vuln_classes"] },
{ "required": ["exploit"] },
{ "required": ["report"] },
{ "required": ["rules_of_engagement"] }
{ "required": ["description"] }
],
"additionalProperties": false,
"$defs": {
@@ -200,17 +175,17 @@
},
"type": {
"type": "string",
"enum": ["url_path", "subdomain", "domain", "method", "header", "parameter", "code_path"],
"description": "Type of rule (what aspect of requests or source code to match against)"
"enum": ["path", "subdomain", "domain", "method", "header", "parameter"],
"description": "Type of rule (what aspect of requests to match against)"
},
"value": {
"url_path": {
"type": "string",
"minLength": 1,
"maxLength": 1000,
"description": "Value to match"
"description": "URL path pattern or value to match"
}
},
"required": ["description", "type", "value"],
"required": ["description", "type", "url_path"],
"additionalProperties": false
}
}
+26 -56
View File
@@ -4,27 +4,6 @@
# Description of the target environment (optional, max 500 chars)
description: "Next.js e-commerce app on PostgreSQL. Local dev environment — .env files contain local-only credentials, not deployed to production."
# Limit which vulnerability classes run end-to-end (optional, default: all five)
# vuln_classes: [injection, xss, auth, authz, ssrf]
# Skip the exploitation phase (optional, default: "true")
# exploit: "false"
# Free-form engagement rules applied to analysis and exploitation agents (optional).
# Example below is illustrative; edit, remove, or add sections as needed.
# rules_of_engagement: |
# Forbidden techniques:
# - No password brute-force or credential stuffing. Cap login attempts at 5 per account.
# - ...
#
# Operational:
# - Throttle to under 5 requests per second per endpoint. Back off 60 seconds on any 429 response.
# - ...
#
# Data handling:
# - Do not include actual values in deliverables — use placeholders like [order_id] or [user_email].
# - ...
authentication:
login_type: form # Options: 'form' or 'sso'
login_url: "https://example.com/login"
@@ -46,55 +25,46 @@ authentication:
value: "/dashboard"
rules:
# Supported types: url_path, subdomain, domain, method, header, parameter, code_path
avoid:
- description: "Do not test the marketing site subdomain"
type: subdomain
value: "www"
url_path: "www"
- description: "Skip logout functionality"
type: url_path
value: "/logout"
type: path
url_path: "/logout"
- description: "No DELETE operations on user API"
type: url_path
value: "/api/v1/users/*"
type: path
url_path: "/api/v1/users/*"
# code_path values are repo-relative file paths or globs (e.g. "src/auth.ts", "test/**").
# - description: "Test fixtures and specs (not production code)"
# type: code_path
# value: "test/**"
#
# - description: "Generated migrations"
# type: code_path
# value: "db/migrations/**"
focus:
- description: "Prioritize beta admin panel subdomain"
type: subdomain
value: "beta-admin"
url_path: "beta-admin"
- description: "Focus on user profile updates"
type: url_path
value: "/api/v2/user-profile"
type: path
url_path: "/api/v2/user-profile"
# code_path values are repo-relative file paths or globs (e.g. "src/auth.ts", "routes/*.ts").
# - description: "Express route handlers"
# type: code_path
# value: "routes/*.ts"
#
# - description: "Sequelize ORM model definitions"
# type: code_path
# value: "models/*.ts"
# Report filters applied by the report agent when assembling the final report (optional).
# Example below is illustrative; edit, remove, or add sections as needed.
# report:
# min_severity: low
# min_confidence: low
# guidance: |
# Drop findings about missing security headers and rate-limit gaps.
# ...
# Email-based login (for magic-link / email-OTP flows)
# authentication:
# login_type: form
# login_url: "https://example.com/login"
# credentials:
# username: "testuser"
# email_login:
# address: "testuser@example.com"
# password: "email-password"
# totp_secret: "JBSWY3DPEHPK3PXP" # Optional TOTP for email 2FA
# login_flow:
# - "Type $username into the username field"
# - "Type $email_address into the email field"
# - "Type $email_password into the email password field"
# - "Enter $email_totp in the verification code field"
# success_condition:
# type: url_contains
# value: "/dashboard"
# Pipeline execution settings (optional)
# pipeline:
-2
View File
@@ -105,8 +105,6 @@ If you only confirmed a vulnerability without attempting to weaponize it, you ha
{{LOGIN_INSTRUCTIONS}}
</login_instructions>
@include(shared/_rules-of-engagement.txt)
<starting_context>
- Your **actionable exploitation queue** is located at `.shannon/deliverables/auth_exploitation_queue.json`. This is your to-do list, containing the precise targets identified by the analysis phase. You MUST process every vulnerability in this queue.
-2
View File
@@ -92,8 +92,6 @@ After exhaustive bypass attempts, determine:
{{LOGIN_INSTRUCTIONS}}
</login_instructions>
@include(shared/_rules-of-engagement.txt)
<starting_context>
- Your **actionable exploitation queue** is located at `.shannon/deliverables/authz_exploitation_queue.json`. This is your to-do list, containing the precise targets identified by the analysis phase. You MUST process every vulnerability in this queue.
@@ -69,8 +69,6 @@ Remember: An unproven vulnerability is worse than no finding at all - it wastes
{{LOGIN_INSTRUCTIONS}}
</login_instructions>
@include(shared/_rules-of-engagement.txt)
<starting_context>
- Your **actionable exploitation queue** is located at `.shannon/deliverables/injection_exploitation_queue.json`. This is your to-do list, containing the precise targets identified by the analysis phase. You MUST process every vulnerability in this queue.
-2
View File
@@ -92,8 +92,6 @@ After exhaustive bypass attempts, determine:
{{LOGIN_INSTRUCTIONS}}
</login_instructions>
@include(shared/_rules-of-engagement.txt)
<starting_context>
- Your **actionable exploitation queue** is located at `.shannon/deliverables/ssrf_exploitation_queue.json`. This is your to-do list, containing the precise targets identified by the analysis phase. You MUST process every vulnerability in this queue.
-2
View File
@@ -90,8 +90,6 @@ After exhaustive bypass attempts, determine:
{{LOGIN_INSTRUCTIONS}}
</login_instructions>
@include(shared/_rules-of-engagement.txt)
<starting_context>
- Your **actionable exploitation queue** is located at `.shannon/deliverables/xss_exploitation_queue.json`. This is your to-do list, containing the precise targets identified by the analysis phase. You MUST process every vulnerability in this queue.
@@ -0,0 +1 @@
Return the structured verdict `{ "login_success": true }` and stop.
-8
View File
@@ -31,14 +31,6 @@ Read `.gitignore` and run `git ls-files --others --ignored --exclude-standard --
{{DESCRIPTION}}
<scope>
Downstream vulnerability analysis will cover these classes: {{VULN_CLASSES_TESTED}}.
</scope>
@include(shared/_code-path-rules.txt)
@include(shared/_rules-of-engagement.txt)
<system_architecture>
**PENTESTING WORKFLOW - YOUR POSITION:**
-8
View File
@@ -20,10 +20,6 @@ Filesystem:
- {{REPO_PATH}}/.shannon/scratchpad/ (read-write) - screenshots, scripts, scratch work, etc.
</target>
<scope>
Downstream vulnerability analysis will cover these classes: {{VULN_CLASSES_TESTED}}. Map only what supports these classes.
</scope>
<rules>
Rules to Avoid:
{{RULES_AVOID}}
@@ -32,14 +28,10 @@ Areas to Focus On:
{{RULES_FOCUS}}
</rules>
@include(shared/_code-path-rules.txt)
<login_instructions>
{{LOGIN_INSTRUCTIONS}}
</login_instructions>
@include(shared/_rules-of-engagement.txt)
<scope_boundaries>
# Penetration Test Scope & Boundaries
+25 -28
View File
@@ -9,11 +9,11 @@ Technical leadership (CTOs, CISOs, Engineering VPs) who need both technical accu
</audience>
<objective>
The orchestrator has already concatenated all per-class deliverables into `comprehensive_security_assessment_report.md`. Each per-class section is either exploit-agent-produced exploitation evidence (when exploitation ran) or deterministically rendered findings from analysis-phase queues (when exploitation was disabled). The cleanup rules below apply uniformly to either source.
The orchestrator has already concatenated all specialist reports into `comprehensive_security_assessment_report.md`.
Your task is to:
1. Read this existing concatenated report
2. Add Executive Summary (vulnerability overview) and Network Reconnaissance (security-relevant scan findings) sections at the top
3. Clean up ALL per-class report sections by removing extraneous content
2. Add an Executive Summary (vulnerability overview) at the top
3. Clean up ALL exploitation evidence sections by removing hallucinated content
4. Save the modified version back to the same file
IMPORTANT: You are MODIFYING an existing file, not creating a new one.
@@ -32,13 +32,8 @@ Filesystem:
<context>
Authentication Context:
{{AUTH_CONTEXT}}
Vulnerability classes tested in this run: {{VULN_CLASSES_TESTED}}
Exploitation: {{EXPLOITATION}}
</context>
{{REPORT_FILTERS_BLOCK}}
<input_files>
You will analyze the following deliverable files:
- `.shannon/deliverables/pre_recon_deliverable.md` - Initial reconnaissance and technology stack
@@ -56,31 +51,34 @@ MODIFY the existing file `.shannon/deliverables/comprehensive_security_assessmen
## Executive Summary
- Target: {{WEB_URL}}
- Assessment Date: {current date}
- Scope: {{VULN_CLASSES_TESTED}}
- Exploitation: {{EXPLOITATION}}
- Scope: Authentication, XSS, SQL and Command Injection, SSRF, Authorization testing
## Summary by Vulnerability Type
{For each vulnerability type below, examine the comprehensive_security_assessment_report.md sections and provide a summary. If no section exists for that type or no vulnerabilities are listed, explicitly state that none were found.}
{{VULN_SUMMARY_SUBSECTIONS}}
**Authentication Vulnerabilities:**
{Check for "Authentication Exploitation Evidence" section. Include actually exploited vulnerabilities and those blocked by security controls. Exclude theoretical vulnerabilities requiring internal network access. If vulnerabilities exist, summarize their impact and severity. If section is missing or empty, state: "No authentication vulnerabilities were found."}
## Network Reconnaissance
{Extract security-relevant findings from automated scanning tools:
- Open ports and exposed services from nmap
- Subdomain discoveries from subfinder that could expand attack surface
- Security headers or misconfigurations detected by whatweb
- Any other security-relevant findings from the automated tools
SKIP stack details - technical leaders know their infrastructure}
**Authorization Vulnerabilities:**
{Check for "Authorization Exploitation Evidence" section. Include actually exploited vulnerabilities and those blocked by security controls. Exclude theoretical vulnerabilities requiring internal network access. If vulnerabilities exist, summarize their impact and severity. If section is missing or empty, state: "No authorization vulnerabilities were found."}
2. KEEPING the existing per-class report sections but CLEANING them according to the rules below
**Cross-Site Scripting (XSS) Vulnerabilities:**
{Check for "XSS Exploitation Evidence" section. Include actually exploited vulnerabilities and those blocked by security controls. Exclude theoretical vulnerabilities requiring internal network access. If vulnerabilities exist, summarize their impact and severity. If section is missing or empty, state: "No XSS vulnerabilities were found."}
**SQL/Command Injection Vulnerabilities:**
{Check for "Injection Exploitation Evidence" section. Include actually exploited vulnerabilities and those blocked by security controls. Exclude theoretical vulnerabilities requiring internal network access. If vulnerabilities exist, summarize their impact and severity. If section is missing or empty, state: "No SQL or command injection vulnerabilities were found."}
**Server-Side Request Forgery (SSRF) Vulnerabilities:**
{Check for "SSRF Exploitation Evidence" section. Include actually exploited vulnerabilities and those blocked by security controls. Exclude theoretical vulnerabilities requiring internal network access. If vulnerabilities exist, summarize their impact and severity. If section is missing or empty, state: "No SSRF vulnerabilities were found."}
2. KEEPING the existing exploitation evidence sections but CLEANING them according to the rules below
3. The final structure should be:
- Executive Summary (new)
- Network Reconnaissance (new)
- All existing per-class report sections (cleaned)
- All existing exploitation evidence sections (cleaned)
IMPORTANT: Do NOT reorder the existing per-class report sections. Maintain the exact order they appear in the concatenated report. Only remove sections that do not match the defined criteria above.
IMPORTANT: Do NOT reorder the existing exploitation evidence sections. Maintain the exact order they appear in the concatenated report. Only remove sections that do not match the defined criteria above.
</deliverable_instructions>
@@ -93,12 +91,11 @@ IMPORTANT: Do NOT reorder the existing per-class report sections. Maintain the e
2. Create the Executive Summary content:
- Executive Summary: Technical overview with actionable findings for engineering leaders
3. Clean the per-class report sections in `.shannon/deliverables/comprehensive_security_assessment_report.md` by applying these rules:
3. Clean the exploitation evidence sections from `.shannon/deliverables/comprehensive_security_assessment_report.md` by applying these rules:
- KEEP these specific section headings:
NOTE: these sections will contain vulnerability lists with IDs matching pattern `### [TYPE]-VULN-[NUMBER]`
* `# [Type] {{REPORT_VULN_HEADING}}`
* `## {{REPORT_VULN_SUBHEADING}}`
{{REPORT_FILTER_RULES}}
* `# [Type] Exploitation Evidence`
* `## Successfully Exploited Vulnerabilities`
- REMOVE ANY OTHER SECTIONS (even if they contain vulnerability IDs), such as:
* `## Potential Vulnerabilities (Validation Blocked)` (All agents)
* Standalone "Recommendations" sections
@@ -110,11 +107,11 @@ IMPORTANT: Do NOT reorder the existing per-class report sections. Maintain the e
* False positives sections
* any intros in the sections
* any counts in the sections
- Preserve exact vulnerability IDs (`### [TYPE]-VULN-NN:`); if the title after the colon is only a short category label rather than a descriptive phrase, rewrite it to a concise human-readable descriptor derived from the finding's Vulnerable location and Overview.
- Preserve exact vulnerability IDs and formatting
4. Combine the content:
- Place the Executive Summary and Network Reconnaissance sections at the top
- Follow with the cleaned per-class report sections
- Follow with the cleaned exploitation evidence sections
- Save as the modified `.shannon/deliverables/comprehensive_security_assessment_report.md`
CRITICAL: You are modifying the existing concatenated report at `.shannon/deliverables/comprehensive_security_assessment_report.md` IN-PLACE, not creating a separate file.
@@ -1,13 +0,0 @@
<code_path_rules>
Source-code routing. Each rule is tagged `[FILE]` (literal path) or `[GLOB]` (pattern). All paths are repository-relative.
How to apply (focus rules):
- For `[FILE]` entries — delegate analysis to the Task tool.
- For `[GLOB]` entries — invoke the Glob tool to enumerate matches, then delegate analysis of every match to the Task tool.
Avoid — out of scope. Skip entirely; the tool layer will block any access attempts.
{{CODE_RULES_AVOID}}
Focus — priority work assignments. Analyze every entry.
{{CODE_RULES_FOCUS}}
</code_path_rules>
@@ -1,3 +0,0 @@
<rules_of_engagement>
{{RULES_OF_ENGAGEMENT}}
</rules_of_engagement>
@@ -0,0 +1,26 @@
<role>
You are a credential validation agent. Your sole job is to verify that the provided login credentials work against the target application.
</role>
<objective>
Attempt to log in to the target application using the provided credentials. Report whether the login succeeded or failed.
</objective>
<target_authentication>
{{AUTH_CONTEXT}}
</target_authentication>
<cli_tools>
Use playwright-cli with session flag: `-s={{PLAYWRIGHT_SESSION}}`
</cli_tools>
<login_instructions>
{{LOGIN_INSTRUCTIONS}}
</login_instructions>
<critical>
- Do NOT explore the application beyond verifying the login.
- Do NOT modify any data or settings.
- After verifying, return your structured verdict immediately and stop.
- If login fails, include which step failed and a brief detail (mask sensitive values like passwords).
</critical>
-4
View File
@@ -20,14 +20,10 @@ Success criterion: A complete, code-backed analysis of every potential authentic
@include(shared/_rules.txt)
</rules>
@include(shared/_code-path-rules.txt)
<login_instructions>
{{LOGIN_INSTRUCTIONS}}
</login_instructions>
@include(shared/_rules-of-engagement.txt)
<critical>
**Your Professional Standard**
- **Severity Context:** A flaw in authentication is a catastrophic failure. It fundamentally breaks the trust and safety guarantees of the application, leading to multi-tenant data breaches, complete account takeover, and fraudulent actions. These are the most critical vulnerabilities you can find.
-4
View File
@@ -19,14 +19,10 @@ Success criterion: A complete, code-backed analysis of every potential authoriza
@include(shared/_rules.txt)
</rules>
@include(shared/_code-path-rules.txt)
<login_instructions>
{{LOGIN_INSTRUCTIONS}}
</login_instructions>
@include(shared/_rules-of-engagement.txt)
<critical>
**Your Professional Standard**
- **Severity Context:** A flaw in authorization is a critical security failure. It allows privilege escalation, horizontal and vertical access control bypass, and unauthorized data access. These vulnerabilities enable attackers to access resources they shouldn't, escalate privileges, and compromise multi-tenant data isolation.
-4
View File
@@ -20,14 +20,10 @@ Success criterion: Complete source-to-sink traces detailing path, sanitizers, si
@include(shared/_rules.txt)
</rules>
@include(shared/_code-path-rules.txt)
<login_instructions>
{{LOGIN_INSTRUCTIONS}}
</login_instructions>
@include(shared/_rules-of-engagement.txt)
<critical>
**Your Professional Standard**
- **Severity Context:** A structural flaw in a backend command, whether an SQL query or a shell command, is one of the most severe classes of vulnerability in a SaaS application. It undermines the foundational trust of the system by creating the *potential* for data exposure (SQLi) or direct server compromise (Command Injection).
-4
View File
@@ -19,14 +19,10 @@ Success criterion: A complete source-to-sink trace for every identified SSRF vul
@include(shared/_rules.txt)
</rules>
@include(shared/_code-path-rules.txt)
<login_instructions>
{{LOGIN_INSTRUCTIONS}}
</login_instructions>
@include(shared/_rules-of-engagement.txt)
<critical>
**Your Professional Standard**
- **Severity Context:** An SSRF flaw is a breach of network segmentation and trust boundaries. It allows attackers to leverage the application server as a proxy to access internal services, cloud metadata endpoints, or perform reconnaissance of internal networks. These vulnerabilities can lead to data exposure, privilege escalation, and complete network compromise.
-4
View File
@@ -19,14 +19,10 @@ Success criterion: Live confirmation of XSS execution for every vulnerability th
@include(shared/_rules.txt)
</rules>
@include(shared/_code-path-rules.txt)
<login_instructions>
{{LOGIN_INSTRUCTIONS}}
</login_instructions>
@include(shared/_rules-of-engagement.txt)
<critical>
**Your Professional Standard**
- Severity Context: An XSS flaw is a breach of trust between the user and the application. It grants an attacker the ability to execute arbitrary code within a victim's browser, leading to session hijacking, credential theft, and complete compromise of the user's account and data within the application.
@@ -0,0 +1,78 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
import fs from 'node:fs/promises';
import path from 'node:path';
export type StealthConfigWriteResult = 'wrote' | 'skipped-existing';
const STEALTH_INIT_SCRIPT = `
// Remove webdriver flag
Object.defineProperty(navigator, 'webdriver', { get: () => false });
// Fake plugins array (Chrome PDF Plugin, PDF Viewer, Native Client)
Object.defineProperty(navigator, 'plugins', {
get: () => {
const plugins = [
{ name: 'Chrome PDF Plugin', filename: 'internal-pdf-viewer', description: 'Portable Document Format' },
{ name: 'Chrome PDF Viewer', filename: 'mhjfbmdgcfjbbpaeojofohoefgiehjai', description: '' },
{ name: 'Native Client', filename: 'internal-nacl-plugin', description: '' },
];
plugins.refresh = () => {};
return plugins;
},
});
// Stub window.chrome.runtime
if (!window.chrome) window.chrome = {};
if (!window.chrome.runtime) window.chrome.runtime = {};
`.trim();
function buildStealthConfig(): object {
return {
browser: 'chromium',
launchOptions: {
headless: true,
args: ['--disable-blink-features=AutomationControlled', '--no-first-run', '--no-default-browser-check'],
ignoreDefaultArgs: ['--enable-automation'],
},
contextOptions: {
viewport: { width: 1920, height: 1080 },
locale: 'en-US',
userAgent:
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36',
},
};
}
/**
* Write Playwright stealth configuration to the source directory.
* No-ops if the config file already exists.
*/
export async function writePlaywrightStealthConfig(sourceDir: string): Promise<StealthConfigWriteResult> {
const playwrightDir = path.join(sourceDir, '.playwright');
const configPath = path.join(playwrightDir, 'cli.config.json');
// Skip if config already exists
try {
await fs.stat(configPath);
return 'skipped-existing';
} catch {
// File doesn't exist, proceed with writing
}
await fs.mkdir(playwrightDir, { recursive: true });
const config = buildStealthConfig();
const initScriptPath = path.join(playwrightDir, 'stealth-init.js');
await Promise.all([
fs.writeFile(configPath, JSON.stringify(config, null, 2), 'utf8'),
fs.writeFile(initScriptPath, STEALTH_INIT_SCRIPT, 'utf8'),
]);
return 'wrote';
}
+24 -115
View File
@@ -17,26 +17,15 @@ import type { AgentName } from '../types/agents.js';
// === Common Fields ===
const ANALYSIS_NOTES_DESCRIPTION = 'Plain context for defenders (caveats, scope, what is at risk). Not attack steps.';
const baseVulnerability = z.object({
ID: z.string(),
vulnerability_type: z.string(),
externally_exploitable: z.boolean(),
confidence: z.string(),
notes: z.string().optional(),
});
function notesField(exploit: boolean) {
const f = z.string().optional();
return exploit ? f : f.describe(ANALYSIS_NOTES_DESCRIPTION);
}
function makeBase(exploit: boolean) {
return z.object({
ID: z.string(),
vulnerability_type: z.string(),
externally_exploitable: z.boolean(),
confidence: z.string(),
notes: notesField(exploit),
});
}
// === Per-Vuln-Type Schemas (used for type inference; notes description is mode-agnostic for types) ===
const baseVulnerability = makeBase(true);
// === Per-Vuln-Type Schemas ===
const InjectionVulnerability = baseVulnerability.extend({
source: z.string().optional(),
@@ -90,13 +79,13 @@ const AuthzVulnerability = baseVulnerability.extend({
minimal_witness: z.string().optional(),
});
// === Inferred Entry Types (consumed by renderer) ===
// === Queue Wrapper Schemas ===
export type InjectionFinding = z.infer<typeof InjectionVulnerability>;
export type XssFinding = z.infer<typeof XssVulnerability>;
export type AuthFinding = z.infer<typeof AuthVulnerability>;
export type SsrfFinding = z.infer<typeof SsrfVulnerability>;
export type AuthzFinding = z.infer<typeof AuthzVulnerability>;
const InjectionQueueSchema = z.object({ vulnerabilities: z.array(InjectionVulnerability) });
const XssQueueSchema = z.object({ vulnerabilities: z.array(XssVulnerability) });
const AuthQueueSchema = z.object({ vulnerabilities: z.array(AuthVulnerability) });
const SsrfQueueSchema = z.object({ vulnerabilities: z.array(SsrfVulnerability) });
const AuthzQueueSchema = z.object({ vulnerabilities: z.array(AuthzVulnerability) });
// === Convert to JSON Schema for SDK ===
@@ -106,95 +95,15 @@ function toOutputFormat(zodSchema: z.ZodType): JsonSchemaOutputFormat {
return { type: 'json_schema', schema: z.toJSONSchema(zodSchema, { target: 'draft-07' }) as Record<string, unknown> };
}
// === Per-Mode Output Format Builders ===
// Two maps cached at module load; the only per-mode difference is the
// description on the `notes` field, which steers the LLM's writing.
// === Lookup Maps ===
function buildOutputFormats(exploit: boolean): Partial<Record<AgentName, JsonSchemaOutputFormat>> {
const base = makeBase(exploit);
return {
'injection-vuln': toOutputFormat(
z.object({
vulnerabilities: z.array(
base.extend({
source: z.string().optional(),
combined_sources: z.string().optional(),
path: z.string().optional(),
sink_call: z.string().optional(),
slot_type: z.string().optional(),
sanitization_observed: z.string().optional(),
concat_occurrences: z.string().optional(),
verdict: z.string().optional(),
mismatch_reason: z.string().optional(),
witness_payload: z.string().optional(),
}),
),
}),
),
'xss-vuln': toOutputFormat(
z.object({
vulnerabilities: z.array(
base.extend({
source: z.string().optional(),
source_detail: z.string().optional(),
path: z.string().optional(),
sink_function: z.string().optional(),
render_context: z.string().optional(),
encoding_observed: z.string().optional(),
verdict: z.string().optional(),
mismatch_reason: z.string().optional(),
witness_payload: z.string().optional(),
}),
),
}),
),
'auth-vuln': toOutputFormat(
z.object({
vulnerabilities: z.array(
base.extend({
source_endpoint: z.string().optional(),
vulnerable_code_location: z.string().optional(),
missing_defense: z.string().optional(),
exploitation_hypothesis: z.string().optional(),
suggested_exploit_technique: z.string().optional(),
}),
),
}),
),
'ssrf-vuln': toOutputFormat(
z.object({
vulnerabilities: z.array(
base.extend({
source_endpoint: z.string().optional(),
vulnerable_parameter: z.string().optional(),
vulnerable_code_location: z.string().optional(),
missing_defense: z.string().optional(),
exploitation_hypothesis: z.string().optional(),
suggested_exploit_technique: z.string().optional(),
}),
),
}),
),
'authz-vuln': toOutputFormat(
z.object({
vulnerabilities: z.array(
base.extend({
endpoint: z.string().optional(),
vulnerable_code_location: z.string().optional(),
role_context: z.string().optional(),
guard_evidence: z.string().optional(),
side_effect: z.string().optional(),
reason: z.string().optional(),
minimal_witness: z.string().optional(),
}),
),
}),
),
};
}
const OUTPUT_FORMATS_EXPLOIT = buildOutputFormats(true);
const OUTPUT_FORMATS_ANALYSIS = buildOutputFormats(false);
const VULN_AGENT_OUTPUT_FORMAT: Partial<Record<AgentName, JsonSchemaOutputFormat>> = {
'injection-vuln': toOutputFormat(InjectionQueueSchema),
'xss-vuln': toOutputFormat(XssQueueSchema),
'auth-vuln': toOutputFormat(AuthQueueSchema),
'ssrf-vuln': toOutputFormat(SsrfQueueSchema),
'authz-vuln': toOutputFormat(AuthzQueueSchema),
};
const VULN_AGENT_QUEUE_FILENAMES: Partial<Record<AgentName, string>> = {
'injection-vuln': 'injection_exploitation_queue.json',
@@ -205,8 +114,8 @@ const VULN_AGENT_QUEUE_FILENAMES: Partial<Record<AgentName, string>> = {
};
/** Returns the structured output format for a vuln agent, or undefined for non-vuln agents. */
export function getOutputFormat(agentName: AgentName, exploit = true): JsonSchemaOutputFormat | undefined {
return (exploit ? OUTPUT_FORMATS_EXPLOIT : OUTPUT_FORMATS_ANALYSIS)[agentName];
export function getOutputFormat(agentName: AgentName): JsonSchemaOutputFormat | undefined {
return VULN_AGENT_OUTPUT_FORMAT[agentName];
}
/** Returns the queue filename for a vuln agent, or undefined for non-vuln agents. */
-41
View File
@@ -1,41 +0,0 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
/**
* Writes ~/.claude/settings.json with permissions.deny rules derived from
* `code_path` avoid patterns. The SDK reads this via `settingSources: ['user']`;
* deny rules fire even in `bypassPermissions` mode.
*/
import os from 'node:os';
import { fs, path } from 'zx';
import type { DistributedConfig } from '../types/config.js';
const FILE_TOOLS = ['Read', 'Edit'] as const;
function denyEntriesFor(pattern: string): string[] {
const arg = `./${pattern.replace(/^[./]+/, '')}`;
return FILE_TOOLS.map((tool) => `${tool}(${arg})`);
}
export async function writeUserSettingsForCodePathAvoids(config: DistributedConfig | null): Promise<void> {
const avoidPatterns = (config?.avoid ?? []).filter((r) => r.type === 'code_path').map((r) => r.value);
const settingsPath = path.join(os.homedir(), '.claude', 'settings.json');
if (avoidPatterns.length === 0) {
await fs.remove(settingsPath);
return;
}
const settings = {
permissions: {
deny: avoidPatterns.flatMap(denyEntriesFor),
},
};
await fs.ensureDir(path.dirname(settingsPath));
await fs.writeJson(settingsPath, settings, { spaces: 2 });
}
+35 -139
View File
@@ -10,13 +10,7 @@ import type { FormatsPlugin } from 'ajv-formats';
import yaml from 'js-yaml';
import { fs } from 'zx';
import { PentestError } from './services/error-handling.js';
import {
ALL_VULN_CLASSES,
type Authentication,
type Config,
type DistributedConfig,
type Rule,
} from './types/config.js';
import type { Authentication, Config, DistributedConfig, Rule } from './types/config.js';
import { ErrorCode } from './types/errors.js';
// Handle ESM/CJS interop for ajv-formats using require
@@ -312,39 +306,6 @@ export const parseConfigYAML = (yamlContent: string): Config => {
return config as Config;
};
function checkDeprecatedFields(config: Config): void {
const messages: string[] = [];
const checkRules = (rules: unknown, where: string): void => {
if (!Array.isArray(rules)) return;
rules.forEach((rule, idx) => {
if (typeof rule !== 'object' || rule === null) return;
const r = rule as Record<string, unknown>;
if (r.type === 'path') {
messages.push(`rules.${where}[${idx}].type: 'path' has been renamed to 'url_path'.`);
}
if ('url_path' in r && !('value' in r)) {
messages.push(`rules.${where}[${idx}]: the rule field 'url_path' has been renamed to 'value'.`);
}
});
};
const raw = config as Record<string, unknown>;
const rules = raw.rules as { avoid?: unknown; focus?: unknown } | undefined;
checkRules(rules?.avoid, 'avoid');
checkRules(rules?.focus, 'focus');
if (messages.length > 0) {
throw new PentestError(
`Configuration uses deprecated fields. Please update:\n - ${messages.join('\n - ')}`,
'config',
false,
{ deprecatedFields: messages },
ErrorCode.CONFIG_VALIDATION_FAILED,
);
}
}
const validateConfig = (config: Config): void => {
if (!config || typeof config !== 'object') {
throw new PentestError(
@@ -366,8 +327,6 @@ const validateConfig = (config: Config): void => {
);
}
checkDeprecatedFields(config);
const isValid = validateSchema(config);
if (!isValid) {
const errors = validateSchema.errors || [];
@@ -383,16 +342,10 @@ const validateConfig = (config: Config): void => {
performSecurityValidation(config);
const hasAnySteering =
!!config.rules ||
!!config.authentication ||
!!config.description ||
!!config.vuln_classes ||
config.exploit !== undefined ||
!!config.report ||
!!config.rules_of_engagement;
if (!hasAnySteering) {
console.warn('⚠️ Configuration file contains no steering fields. The pentest will run with all defaults.');
if (!config.rules && !config.authentication && !config.description) {
console.warn(
'⚠️ Configuration file contains no rules, authentication, or description. The pentest will run without any scoping restrictions or login capabilities.',
);
} else if (config.rules && !config.rules.avoid && !config.rules.focus) {
console.warn('⚠️ Configuration file contains no rules. The pentest will run without any scoping restrictions.');
}
@@ -428,15 +381,6 @@ const performSecurityValidation = (config: Config): void => {
ErrorCode.CONFIG_VALIDATION_FAILED,
);
}
if (pattern.test(auth.credentials.password)) {
throw new PentestError(
`authentication.credentials.password contains potentially dangerous pattern: ${pattern.source}`,
'config',
false,
{ field: 'credentials.password', pattern: pattern.source },
ErrorCode.CONFIG_VALIDATION_FAILED,
);
}
}
}
@@ -479,34 +423,6 @@ const performSecurityValidation = (config: Config): void => {
}
}
}
if (config.rules_of_engagement) {
for (const pattern of DANGEROUS_PATTERNS) {
if (pattern.test(config.rules_of_engagement)) {
throw new PentestError(
`rules_of_engagement contains potentially dangerous pattern: ${pattern.source}`,
'config',
false,
{ field: 'rules_of_engagement', pattern: pattern.source },
ErrorCode.CONFIG_VALIDATION_FAILED,
);
}
}
}
if (config.report?.guidance) {
for (const pattern of DANGEROUS_PATTERNS) {
if (pattern.test(config.report.guidance)) {
throw new PentestError(
`report.guidance contains potentially dangerous pattern: ${pattern.source}`,
'config',
false,
{ field: 'report.guidance', pattern: pattern.source },
ErrorCode.CONFIG_VALIDATION_FAILED,
);
}
}
}
};
const validateRulesSecurity = (rules: Rule[] | undefined, ruleType: string): void => {
@@ -514,12 +430,12 @@ const validateRulesSecurity = (rules: Rule[] | undefined, ruleType: string): voi
rules.forEach((rule, index) => {
for (const pattern of DANGEROUS_PATTERNS) {
if (pattern.test(rule.value)) {
if (pattern.test(rule.url_path)) {
throw new PentestError(
`rules.${ruleType}[${index}].value contains potentially dangerous pattern: ${pattern.source}`,
`rules.${ruleType}[${index}].url_path contains potentially dangerous pattern: ${pattern.source}`,
'config',
false,
{ field: `rules.${ruleType}[${index}].value`, pattern: pattern.source },
{ field: `rules.${ruleType}[${index}].url_path`, pattern: pattern.source },
ErrorCode.CONFIG_VALIDATION_FAILED,
);
}
@@ -539,25 +455,13 @@ const validateRulesSecurity = (rules: Rule[] | undefined, ruleType: string): voi
};
const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number): void => {
const field = `rules.${ruleType}[${index}].value`;
const field = `rules.${ruleType}[${index}].url_path`;
switch (rule.type) {
case 'url_path':
if (!rule.value.startsWith('/')) {
case 'path':
if (!rule.url_path.startsWith('/')) {
throw new PentestError(
`${field} for type 'url_path' must start with '/'`,
'config',
false,
{ field, ruleType: rule.type },
ErrorCode.CONFIG_VALIDATION_FAILED,
);
}
break;
case 'code_path':
if (rule.value.includes('://')) {
throw new PentestError(
`${field} for type 'code_path' must not contain a URL protocol (got '${rule.value}')`,
`${field} for type 'path' must start with '/'`,
'config',
false,
{ field, ruleType: rule.type },
@@ -569,7 +473,7 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number):
case 'subdomain':
case 'domain':
// Basic domain validation - no slashes allowed
if (rule.value.includes('/')) {
if (rule.url_path.includes('/')) {
throw new PentestError(
`${field} for type '${rule.type}' cannot contain '/' characters`,
'config',
@@ -579,7 +483,7 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number):
);
}
// Must contain at least one dot for domains
if (rule.type === 'domain' && !rule.value.includes('.')) {
if (rule.type === 'domain' && !rule.url_path.includes('.')) {
throw new PentestError(
`${field} for type 'domain' must be a valid domain name`,
'config',
@@ -592,7 +496,7 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number):
case 'method': {
const allowedMethods = ['GET', 'POST', 'PUT', 'DELETE', 'PATCH', 'HEAD', 'OPTIONS'];
if (!allowedMethods.includes(rule.value.toUpperCase())) {
if (!allowedMethods.includes(rule.url_path.toUpperCase())) {
throw new PentestError(
`${field} for type 'method' must be one of: ${allowedMethods.join(', ')}`,
'config',
@@ -605,7 +509,7 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number):
}
case 'header':
if (!rule.value.match(/^[a-zA-Z0-9\-_]+$/)) {
if (!rule.url_path.match(/^[a-zA-Z0-9\-_]+$/)) {
throw new PentestError(
`${field} for type 'header' must be a valid header name (alphanumeric, hyphens, underscores only)`,
'config',
@@ -617,7 +521,7 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number):
break;
case 'parameter':
if (!rule.value.match(/^[a-zA-Z0-9\-_]+$/)) {
if (!rule.url_path.match(/^[a-zA-Z0-9\-_]+$/)) {
throw new PentestError(
`${field} for type 'parameter' must be a valid parameter name (alphanumeric, hyphens, underscores only)`,
'config',
@@ -633,13 +537,13 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number):
const checkForDuplicates = (rules: Rule[], ruleType: string): void => {
const seen = new Set<string>();
rules.forEach((rule, index) => {
const key = `${rule.type}:${rule.value}`;
const key = `${rule.type}:${rule.url_path}`;
if (seen.has(key)) {
throw new PentestError(
`Duplicate rule found in rules.${ruleType}[${index}]: ${rule.type} '${rule.value}'`,
`Duplicate rule found in rules.${ruleType}[${index}]: ${rule.type} '${rule.url_path}'`,
'config',
false,
{ field: `rules.${ruleType}[${index}]`, ruleType: rule.type, value: rule.value },
{ field: `rules.${ruleType}[${index}]`, ruleType: rule.type, urlPath: rule.url_path },
ErrorCode.CONFIG_VALIDATION_FAILED,
);
}
@@ -648,16 +552,16 @@ const checkForDuplicates = (rules: Rule[], ruleType: string): void => {
};
const checkForConflicts = (avoidRules: Rule[] = [], focusRules: Rule[] = []): void => {
const avoidSet = new Set(avoidRules.map((rule) => `${rule.type}:${rule.value}`));
const avoidSet = new Set(avoidRules.map((rule) => `${rule.type}:${rule.url_path}`));
focusRules.forEach((rule, index) => {
const key = `${rule.type}:${rule.value}`;
const key = `${rule.type}:${rule.url_path}`;
if (avoidSet.has(key)) {
throw new PentestError(
`Conflicting rule found: rules.focus[${index}] '${rule.value}' also exists in rules.avoid`,
`Conflicting rule found: rules.focus[${index}] '${rule.url_path}' also exists in rules.avoid`,
'config',
false,
{ field: `rules.focus[${index}]`, value: rule.value },
{ field: `rules.focus[${index}]`, urlPath: rule.url_path },
ErrorCode.CONFIG_VALIDATION_FAILED,
);
}
@@ -668,7 +572,7 @@ const sanitizeRule = (rule: Rule): Rule => {
return {
description: rule.description.trim(),
type: rule.type.toLowerCase().trim() as Rule['type'],
value: rule.value.trim(),
url_path: rule.url_path.trim(),
};
};
@@ -678,28 +582,11 @@ export const distributeConfig = (config: Config | null): DistributedConfig => {
const authentication = config?.authentication || null;
const description = config?.description?.trim() || '';
const vuln_classes =
config?.vuln_classes && config.vuln_classes.length > 0 ? [...config.vuln_classes] : [...ALL_VULN_CLASSES];
const exploit = config?.exploit !== undefined ? config.exploit === 'true' : true;
const report = {
...(config?.report?.min_severity && { min_severity: config.report.min_severity }),
...(config?.report?.min_confidence && { min_confidence: config.report.min_confidence }),
...(config?.report?.guidance && { guidance: config.report.guidance.trim() }),
};
const rules_of_engagement = config?.rules_of_engagement?.trim() ?? '';
return {
avoid: avoid.map(sanitizeRule),
focus: focus.map(sanitizeRule),
authentication: authentication ? sanitizeAuthentication(authentication) : null,
description,
vuln_classes,
exploit,
report,
rules_of_engagement,
};
};
@@ -709,8 +596,17 @@ const sanitizeAuthentication = (auth: Authentication): Authentication => {
login_url: auth.login_url.trim(),
credentials: {
username: auth.credentials.username.trim(),
password: auth.credentials.password,
...(auth.credentials.password !== undefined && { password: auth.credentials.password }),
...(auth.credentials.totp_secret && { totp_secret: auth.credentials.totp_secret.trim() }),
...(auth.credentials.email_login && {
email_login: {
address: auth.credentials.email_login.address.trim(),
password: auth.credentials.email_login.password,
...(auth.credentials.email_login.totp_secret && {
totp_secret: auth.credentials.email_login.totp_secret.trim(),
}),
},
}),
},
...(auth.login_flow && { login_flow: auth.login_flow.map((step) => step.trim()) }),
success_condition: {
+1 -1
View File
@@ -161,7 +161,7 @@ export class AgentExecutionService {
await auditSession.startAgent(agentName, prompt, attemptNumber);
// 5. Execute agent
const outputFormat = getOutputFormat(agentName, distributedConfig?.exploit ?? true);
const outputFormat = getOutputFormat(agentName);
const result: ClaudePromptResult = await runClaudePrompt(
prompt,
repoPath,
@@ -138,6 +138,9 @@ function classifyByErrorCode(code: ErrorCode, retryableFromError: boolean): { ty
case ErrorCode.AUTH_FAILED:
return { type: 'AuthenticationError', retryable: false };
case ErrorCode.AUTH_LOGIN_FAILED:
return { type: 'AuthLoginFailedError', retryable: false };
case ErrorCode.BILLING_ERROR:
return { type: 'BillingError', retryable: true };
@@ -1,239 +0,0 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
/**
* Deterministic queue-JSON to findings-MD renderer.
*
* Used when exploit=false: the exploit agents didn't run, so there is no
* `*_exploitation_evidence.md` to concatenate into the report. This module
* reads each `*_exploitation_queue.json` (already SDK-validated against the
* schemas in ../ai/queue-schemas.ts) and writes a `*_findings.md` per class
* in the canonical body shape that report-executive.txt's cleanup expects.
*
* No LLM in the loop — every field maps directly from a JSON key.
*/
import { fs, path } from 'zx';
import type { AuthFinding, AuthzFinding, InjectionFinding, SsrfFinding, XssFinding } from '../ai/queue-schemas.js';
import { deliverablesDir } from '../paths.js';
import type { ActivityLogger } from '../types/activity-logger.js';
import type { VulnClass } from '../types/config.js';
const DISCLAIMER = [
'> Exploitation phase was not run for this assessment. Each entry documents a',
'> vulnerability identified through static analysis; live exploitation steps and',
'> proof of impact are not included.',
].join('\n');
interface ClassConfig<T> {
readonly heading: string;
readonly noneFoundLabel: string;
readonly queueFile: string;
readonly findingsFile: string;
readonly renderEntry: (entry: T) => string;
}
interface QueueDocument<T> {
vulnerabilities?: T[];
}
// === Common Render Helpers ===
function summaryRow(label: string, value: string | undefined | null | boolean): string | null {
if (value === undefined || value === null) return null;
if (typeof value === 'string' && value.trim() === '') return null;
return `- **${label}:** ${value}`;
}
function formatLocation(endpoint: string | undefined, codeLocation: string | undefined): string {
if (endpoint && codeLocation) return `${endpoint} (${codeLocation})`;
return endpoint ?? codeLocation ?? '';
}
function buildEntry(
id: string,
title: string,
summaryRows: ReadonlyArray<string | null>,
notes: string | undefined,
): string {
const lines: string[] = [];
lines.push(`### ${id}: ${title}`);
lines.push('');
lines.push('**Summary:**');
for (const row of summaryRows) {
if (row !== null) lines.push(row);
}
lines.push('');
if (notes && notes.trim() !== '') {
lines.push(`**Notes:** ${notes.trim()}`);
}
return lines.join('\n').trimEnd();
}
// === Per-Class Renderers ===
function renderAuthEntry(e: AuthFinding): string {
return buildEntry(
e.ID,
e.vulnerability_type,
[
summaryRow('Vulnerable location', formatLocation(e.source_endpoint, e.vulnerable_code_location)),
summaryRow('Overview', e.missing_defense),
summaryRow('Impact', e.exploitation_hypothesis),
],
e.notes,
);
}
function renderSsrfEntry(e: SsrfFinding): string {
return buildEntry(
e.ID,
e.vulnerability_type,
[
summaryRow('Vulnerable location', formatLocation(e.source_endpoint, e.vulnerable_code_location)),
summaryRow('Overview', e.missing_defense),
summaryRow('Impact', e.exploitation_hypothesis),
],
e.notes,
);
}
function renderAuthzEntry(e: AuthzFinding): string {
return buildEntry(
e.ID,
e.vulnerability_type,
[
summaryRow('Vulnerable location', formatLocation(e.endpoint, e.vulnerable_code_location)),
summaryRow('Overview', e.guard_evidence),
summaryRow('Impact', e.side_effect),
],
e.notes,
);
}
function renderInjectionEntry(e: InjectionFinding): string {
const location = e.path && e.sink_call ? `${e.sink_call} (path: ${e.path})` : (e.sink_call ?? e.path);
return buildEntry(
e.ID,
e.vulnerability_type,
[summaryRow('Vulnerable location', location), summaryRow('Overview', e.mismatch_reason)],
e.notes,
);
}
function renderXssEntry(e: XssFinding): string {
const location = e.path && e.sink_function ? `${e.sink_function} (path: ${e.path})` : (e.sink_function ?? e.path);
return buildEntry(
e.ID,
e.vulnerability_type,
[summaryRow('Vulnerable location', location), summaryRow('Overview', e.mismatch_reason)],
e.notes,
);
}
// === Class Registry ===
const CLASSES: Record<VulnClass, ClassConfig<unknown>> = {
auth: {
heading: 'Authentication',
noneFoundLabel: 'authentication',
queueFile: 'auth_exploitation_queue.json',
findingsFile: 'auth_findings.md',
renderEntry: (e) => renderAuthEntry(e as AuthFinding),
},
authz: {
heading: 'Authorization',
noneFoundLabel: 'authorization',
queueFile: 'authz_exploitation_queue.json',
findingsFile: 'authz_findings.md',
renderEntry: (e) => renderAuthzEntry(e as AuthzFinding),
},
injection: {
heading: 'Injection',
noneFoundLabel: 'injection',
queueFile: 'injection_exploitation_queue.json',
findingsFile: 'injection_findings.md',
renderEntry: (e) => renderInjectionEntry(e as InjectionFinding),
},
xss: {
heading: 'XSS',
noneFoundLabel: 'XSS',
queueFile: 'xss_exploitation_queue.json',
findingsFile: 'xss_findings.md',
renderEntry: (e) => renderXssEntry(e as XssFinding),
},
ssrf: {
heading: 'SSRF',
noneFoundLabel: 'SSRF',
queueFile: 'ssrf_exploitation_queue.json',
findingsFile: 'ssrf_findings.md',
renderEntry: (e) => renderSsrfEntry(e as SsrfFinding),
},
};
// === Class File Assembly ===
function renderClassFile(config: ClassConfig<unknown>, entries: readonly unknown[]): string {
const sections: string[] = [];
sections.push(`# ${config.heading} Findings`);
sections.push('');
sections.push(DISCLAIMER);
sections.push('');
sections.push('## Identified Vulnerabilities');
sections.push('');
if (entries.length === 0) {
sections.push(`No ${config.noneFoundLabel} vulnerabilities were identified.`);
sections.push('');
} else {
for (const entry of entries) {
sections.push(config.renderEntry(entry));
sections.push('');
}
}
return `${sections.join('\n').trimEnd()}\n`;
}
// === Public Entry Point ===
/**
* Render `*_findings.md` per class from each `*_exploitation_queue.json`.
*
* Idempotent: skips classes whose findings file already exists, or whose queue
* is missing (class out of scope this run). Per-class failures are logged and
* other classes still proceed.
*/
export async function renderFindingsFromQueues(
sourceDir: string,
deliverablesSubdir: string | undefined,
logger: ActivityLogger,
): Promise<void> {
const dir = deliverablesDir(sourceDir, deliverablesSubdir);
for (const config of Object.values(CLASSES)) {
const queuePath = path.join(dir, config.queueFile);
const findingsPath = path.join(dir, config.findingsFile);
if (await fs.pathExists(findingsPath)) {
logger.info(`${config.heading}: ${config.findingsFile} already exists, skipping`);
continue;
}
if (!(await fs.pathExists(queuePath))) {
logger.info(`${config.heading}: no queue file (class out of scope), skipping`);
continue;
}
try {
const doc = (await fs.readJson(queuePath)) as QueueDocument<unknown>;
const entries = doc.vulnerabilities ?? [];
const markdown = renderClassFile(config, entries);
await fs.writeFile(findingsPath, markdown);
logger.info(`${config.heading}: rendered ${entries.length} finding(s) to ${config.findingsFile}`);
} catch (error) {
const err = error as Error;
logger.warn(`${config.heading}: failed to render findings from ${config.queueFile}: ${err.message}`);
}
}
}
+9 -91
View File
@@ -14,9 +14,8 @@
* Checks run sequentially, cheapest first:
* 1. Repository path exists and contains .git
* 2. Config file parses and validates (if provided)
* 3. code_path rules match real entries in the repo (filesystem only)
* 4. Credentials validate via Claude Agent SDK query (API key, OAuth, Bedrock, or Vertex AI)
* 5. Target URL is reachable from the container (DNS + HTTP)
* 3. Credentials validate via Claude Agent SDK query (API key, OAuth, Bedrock, or Vertex AI)
* 4. Target URL is reachable from the container (DNS + HTTP)
*/
import { lookup } from 'node:dns/promises';
@@ -25,11 +24,9 @@ import http from 'node:http';
import https from 'node:https';
import type { SDKAssistantMessageError } from '@anthropic-ai/claude-agent-sdk';
import { query } from '@anthropic-ai/claude-agent-sdk';
import { glob } from 'zx';
import { resolveModel } from '../ai/models.js';
import { parseConfig } from '../config-parser.js';
import type { ActivityLogger } from '../types/activity-logger.js';
import type { Config, Rule } from '../types/config.js';
import { ErrorCode } from '../types/errors.js';
import { err, ok, type Result } from '../types/result.js';
import { isRetryableError, PentestError } from './error-handling.js';
@@ -111,13 +108,13 @@ async function validateRepo(
// === Config Validation ===
async function validateConfig(configPath: string, logger: ActivityLogger): Promise<Result<Config, PentestError>> {
async function validateConfig(configPath: string, logger: ActivityLogger): Promise<Result<void, PentestError>> {
logger.info('Validating configuration file...', { configPath });
try {
const config = await parseConfig(configPath);
await parseConfig(configPath);
logger.info('Configuration file OK');
return ok(config);
return ok(undefined);
} catch (error) {
if (error instanceof PentestError) {
return err(error);
@@ -135,73 +132,6 @@ async function validateConfig(configPath: string, logger: ActivityLogger): Promi
}
}
// === code_path Existence Validation ===
const CODE_PATH_IGNORE = ['.git/**', '.shannon/**'];
async function patternMatchesAny(repoPath: string, pattern: string): Promise<boolean> {
const stream = glob.globbyStream(pattern, {
cwd: repoPath,
dot: true,
onlyFiles: false,
followSymbolicLinks: false,
ignore: CODE_PATH_IGNORE,
});
for await (const _ of stream) {
return true;
}
return false;
}
type RuleKind = 'avoid' | 'focus';
interface MissingCodePath {
kind: RuleKind;
value: string;
description: string;
}
async function validateCodePathsExist(
config: Config,
repoPath: string,
logger: ActivityLogger,
): Promise<Result<void, PentestError>> {
const tagged: Array<{ kind: RuleKind; rule: Rule }> = [
...(config.rules?.avoid ?? []).map((rule) => ({ kind: 'avoid' as const, rule })),
...(config.rules?.focus ?? []).map((rule) => ({ kind: 'focus' as const, rule })),
].filter(({ rule }) => rule.type === 'code_path');
if (tagged.length === 0) {
return ok(undefined);
}
logger.info(`Validating ${tagged.length} code_path rule(s) against repo...`);
// ≥1 match is the only property enforced — malformed globs simply match nothing.
const missing: MissingCodePath[] = [];
for (const { kind, rule } of tagged) {
if (!(await patternMatchesAny(repoPath, rule.value))) {
missing.push({ kind, value: rule.value, description: rule.description });
}
}
if (missing.length > 0) {
const lines = missing.map((m) => `[${m.kind}] '${m.value}' — ${m.description}`);
return err(
new PentestError(
`code_path rules don't match any file or directory in the repo:\n - ${lines.join('\n - ')}\n` +
`Fix the patterns or remove the rules.`,
'config',
false,
{ missing },
ErrorCode.CONFIG_VALIDATION_FAILED,
),
);
}
logger.info('All code_path rules matched');
return ok(undefined);
}
// === Credential Validation ===
/** Map SDK error type to a human-readable preflight PentestError. */
@@ -543,9 +473,8 @@ async function validateTargetUrl(targetUrl: string, logger: ActivityLogger): Pro
*
* 1. Repository path exists and contains .git
* 2. Config file parses and validates (if configPath provided)
* 3. code_path rules match at least one entry in the repo (skipped without config)
* 4. Credentials validate (API key, OAuth, Bedrock, or Vertex AI)
* 5. Target URL is reachable from the container
* 3. Credentials validate (API key, OAuth, Bedrock, or Vertex AI)
* 4. Target URL is reachable from the container
*
* Returns on first failure.
*/
@@ -565,31 +494,20 @@ export async function runPreflightChecks(
}
// 2. Config check (free — filesystem + CPU)
let parsedConfig: Config | null = null;
if (configPath) {
const configResult = await validateConfig(configPath, logger);
if (!configResult.ok) {
return configResult;
}
parsedConfig = configResult.value;
}
// 3. code_path rules must match real entries in the repo (filesystem only).
// Runs after both repo and config are valid, before any network round-trip.
if (parsedConfig) {
const codePathResult = await validateCodePathsExist(parsedConfig, repoPath, logger);
if (!codePathResult.ok) {
return codePathResult;
}
}
// 4. Credential check (cheap — 1 SDK round-trip, skipped when providerConfig present)
// 3. Credential check (cheap — 1 SDK round-trip, skipped when providerConfig present)
const credResult = await validateCredentials(logger, apiKey, providerConfig);
if (!credResult.ok) {
return credResult;
}
// 5. Target URL reachability check (cheap — 1 HTTP round-trip)
// 4. Target URL reachability check (cheap — 1 HTTP round-trip)
const urlResult = await validateTargetUrl(targetUrl, logger);
if (!urlResult.ok) {
return urlResult;
+45 -155
View File
@@ -8,113 +8,9 @@ import { fs, path } from 'zx';
import { PROMPTS_DIR } from '../paths.js';
import { PLAYWRIGHT_SESSION_MAPPING } from '../session-manager.js';
import type { ActivityLogger } from '../types/activity-logger.js';
import type { Authentication, DistributedConfig, ReportConfig, Rule, VulnClass } from '../types/config.js';
import { isGlobPattern } from '../utils/glob.js';
import type { Authentication, DistributedConfig } from '../types/config.js';
import { handlePromptError, PentestError } from './error-handling.js';
function renderCodePathRules(rules: Rule[]): string {
const filtered = rules.filter((r) => r.type === 'code_path');
if (filtered.length === 0) return 'None';
return filtered
.map((r) => {
const kind = isGlobPattern(r.value) ? '[GLOB]' : '[FILE]';
return `- ${r.value} ${kind}${r.description}`;
})
.join('\n');
}
interface VulnSummarySpec {
readonly heading: string;
readonly evidenceSection: string;
readonly noneFoundLabel: string;
}
const VULN_SUMMARY_SPECS: Record<VulnClass, VulnSummarySpec> = {
auth: {
heading: 'Authentication Vulnerabilities',
evidenceSection: 'Authentication Exploitation Evidence',
noneFoundLabel: 'authentication',
},
authz: {
heading: 'Authorization Vulnerabilities',
evidenceSection: 'Authorization Exploitation Evidence',
noneFoundLabel: 'authorization',
},
xss: {
heading: 'Cross-Site Scripting (XSS) Vulnerabilities',
evidenceSection: 'XSS Exploitation Evidence',
noneFoundLabel: 'XSS',
},
injection: {
heading: 'SQL/Command Injection Vulnerabilities',
evidenceSection: 'Injection Exploitation Evidence',
noneFoundLabel: 'SQL or command injection',
},
ssrf: {
heading: 'Server-Side Request Forgery (SSRF) Vulnerabilities',
evidenceSection: 'SSRF Exploitation Evidence',
noneFoundLabel: 'SSRF',
},
};
function renderVulnSummarySubsections(selected: readonly VulnClass[]): string {
const classes = selected.length > 0 ? selected : (Object.keys(VULN_SUMMARY_SPECS) as VulnClass[]);
return classes
.map((cls) => {
const spec = VULN_SUMMARY_SPECS[cls];
return `**${spec.heading}:**\n{Check for "${spec.evidenceSection}" section. Include actually exploited vulnerabilities and those blocked by security controls. Exclude theoretical vulnerabilities requiring internal network access. If vulnerabilities exist, summarize their impact and severity. If section is missing or empty, state: "No ${spec.noneFoundLabel} vulnerabilities were found."}`;
})
.join('\n\n');
}
/**
* Renders the top-level <report_filters> block. Empty when no filters are set —
* each filter is included only when the operator configured it, so the agent
* never sees `none` placeholders or instructions for filters that don't apply.
*/
function renderReportFiltersBlock(report: ReportConfig | undefined): string {
if (!report) return '';
const guidance = report.guidance?.trim();
if (!report.min_severity && !report.min_confidence && !guidance) return '';
const lines: string[] = [
'<report_filters>',
'The filters below are user-supplied and binding for this assessment. Honor each strictly when assembling the final report.',
'',
];
if (report.min_severity) {
lines.push(
`- Minimum severity: ${report.min_severity} — keep only findings rated this severity or higher (scale: low < medium < high < critical).`,
);
}
if (report.min_confidence) {
lines.push(
`- Minimum confidence: ${report.min_confidence} — keep only findings rated this confidence or higher (scale: low < medium < high).`,
);
}
if (guidance) {
lines.push('');
lines.push('User guidance — apply throughout the report as binding directives for finding selection:');
lines.push(guidance);
}
lines.push('</report_filters>');
return lines.join('\n');
}
/**
* Renders the per-finding DROP rules used inside the cleanup step. Severity and
* confidence inline as concrete thresholds; guidance is referenced by pointer
* so the actual text only lives in <report_filters>, avoiding double-statement.
*/
function renderReportFilterRules(report: ReportConfig | undefined): string {
const drops: string[] = [];
if (report?.min_severity) drops.push(`* severity is below ${report.min_severity}`);
if (report?.min_confidence) drops.push(`* confidence is below ${report.min_confidence}`);
if (report?.guidance?.trim()) drops.push('* topic matches an exclusion in the user guidance');
if (drops.length === 0) return '';
return [' - DROP any `### [TYPE]-VULN-[NUMBER]` finding whose:', ...drops.map((d) => ` ${d}`)].join('\n');
}
interface PromptVariables {
webUrl: string;
repoPath: string;
@@ -180,6 +76,17 @@ async function buildLoginInstructions(
`generated TOTP code using secret "${authentication.credentials.totp_secret}"`,
);
}
if (authentication.credentials.email_login) {
const emailLogin = authentication.credentials.email_login;
userInstructions = userInstructions.replace(/\$email_address/g, emailLogin.address);
userInstructions = userInstructions.replace(/\$email_password/g, emailLogin.password);
if (emailLogin.totp_secret) {
userInstructions = userInstructions.replace(
/\$email_totp/g,
`generated TOTP code using secret "${emailLogin.totp_secret}"`,
);
}
}
}
loginInstructions = loginInstructions.replace(/{{user_instructions}}/g, userInstructions);
@@ -279,63 +186,36 @@ async function interpolateVariables(
.replace(/{{AUTH_CONTEXT}}/g, buildAuthContext(config))
.replace(/{{DESCRIPTION}}/g, config?.description ? `Description: ${config.description}` : '');
const avoidUrlRules = config?.avoid?.filter((r) => r.type !== 'code_path') ?? [];
const focusUrlRules = config?.focus?.filter((r) => r.type !== 'code_path') ?? [];
if (avoidUrlRules.length === 0 && focusUrlRules.length === 0) {
result = result.replace(/<rules>[\s\S]*?<\/rules>\s*/g, '');
} else {
const avoidStr = avoidUrlRules.length > 0 ? avoidUrlRules.map((r) => `- ${r.description}`).join('\n') : 'None';
const focusStr = focusUrlRules.length > 0 ? focusUrlRules.map((r) => `- ${r.description}`).join('\n') : 'None';
result = result.replace(/{{RULES_AVOID}}/g, avoidStr).replace(/{{RULES_FOCUS}}/g, focusStr);
}
if (config) {
// Handle rules section - if both are empty, use cleaner messaging
const hasAvoidRules = config.avoid && config.avoid.length > 0;
const hasFocusRules = config.focus && config.focus.length > 0;
const avoidCodeRules = (config?.avoid ?? []).filter((r) => r.type === 'code_path');
const focusCodeRules = (config?.focus ?? []).filter((r) => r.type === 'code_path');
if (avoidCodeRules.length === 0 && focusCodeRules.length === 0) {
result = result.replace(/<code_path_rules>[\s\S]*?<\/code_path_rules>\s*/g, '');
} else {
result = result
.replace(/{{CODE_RULES_AVOID}}/g, renderCodePathRules(config?.avoid ?? []))
.replace(/{{CODE_RULES_FOCUS}}/g, renderCodePathRules(config?.focus ?? []));
}
if (!hasAvoidRules && !hasFocusRules) {
// Replace the entire rules section with a clean message
const cleanRulesSection = '<rules>\nNo specific rules or focus areas provided for this test.\n</rules>';
result = result.replace(/<rules>[\s\S]*?<\/rules>/g, cleanRulesSection);
} else {
const avoidRules = hasAvoidRules ? config.avoid?.map((r) => `- ${r.description}`).join('\n') : 'None';
const focusRules = hasFocusRules ? config.focus?.map((r) => `- ${r.description}`).join('\n') : 'None';
const roe = config?.rules_of_engagement?.trim() ?? '';
if (roe) {
result = result.replace(/{{RULES_OF_ENGAGEMENT}}/g, roe);
} else {
result = result.replace(/<rules_of_engagement>[\s\S]*?<\/rules_of_engagement>\s*/g, '');
}
result = result.replace(/{{RULES_AVOID}}/g, avoidRules).replace(/{{RULES_FOCUS}}/g, focusRules);
}
if (config?.authentication?.login_flow) {
const loginInstructions = await buildLoginInstructions(config.authentication, logger, promptsBaseDir);
result = result.replace(/{{LOGIN_INSTRUCTIONS}}/g, loginInstructions);
// Extract and inject login instructions from config
if (config.authentication?.login_flow) {
const loginInstructions = await buildLoginInstructions(config.authentication, logger, promptsBaseDir);
result = result.replace(/{{LOGIN_INSTRUCTIONS}}/g, loginInstructions);
} else {
result = result.replace(/{{LOGIN_INSTRUCTIONS}}/g, '');
}
} else {
// Replace the entire rules section with a clean message when no config provided
const cleanRulesSection = '<rules>\nNo specific rules or focus areas provided for this test.\n</rules>';
result = result.replace(/<rules>[\s\S]*?<\/rules>/g, cleanRulesSection);
result = result.replace(/{{LOGIN_INSTRUCTIONS}}/g, '');
}
const vulnClasses = config?.vuln_classes ?? [];
result = result.replace(
/{{VULN_CLASSES_TESTED}}/g,
vulnClasses.length > 0 ? vulnClasses.join(', ') : 'injection, xss, auth, authz, ssrf',
);
result = result.replace(/{{VULN_SUMMARY_SUBSECTIONS}}/g, renderVulnSummarySubsections(vulnClasses));
const exploitEnabled = config?.exploit ?? true;
result = result
.replace(/{{EXPLOITATION}}/g, exploitEnabled ? 'enabled' : 'disabled')
.replace(/{{REPORT_VULN_HEADING}}/g, exploitEnabled ? 'Exploitation Evidence' : 'Findings')
.replace(
/{{REPORT_VULN_SUBHEADING}}/g,
exploitEnabled ? 'Successfully Exploited Vulnerabilities' : 'Identified Vulnerabilities',
);
result = result
.replace(/{{REPORT_FILTERS_BLOCK}}/g, renderReportFiltersBlock(config?.report))
.replace(/{{REPORT_FILTER_RULES}}/g, renderReportFilterRules(config?.report));
// Collapse runs of 3+ newlines (left behind by tag-strip and empty-fragment substitutions).
result = result.replace(/\n{3,}/g, '\n\n');
// Validate that all placeholders have been replaced (excluding instructional text)
const remainingPlaceholders = result.match(/\{\{[^}]+\}\}/g);
if (remainingPlaceholders) {
@@ -352,6 +232,16 @@ async function interpolateVariables(
}
}
/**
* Resolve a prompt directory override to an absolute path.
* Falls back to the compiled-in PROMPTS_DIR when no override is given.
*/
export function resolvePromptDir(promptDir: string | undefined): string {
if (!promptDir) return PROMPTS_DIR;
if (path.isAbsolute(promptDir)) return promptDir;
return path.resolve(process.env.SHANNON_WORKER_ROOT ?? process.cwd(), promptDir);
}
// Pure function: Load and interpolate prompt template
export async function loadPrompt(
promptName: string,
+29 -35
View File
@@ -12,66 +12,60 @@ import { PentestError } from './error-handling.js';
interface DeliverableFile {
name: string;
/** Candidate filenames in priority order. First one that exists wins. */
paths: readonly string[];
path: string;
required: boolean;
}
// Pure function: Assemble final report from specialist deliverables.
// Per class, prefer the exploit-agent's evidence file; fall back to renderer-produced findings.
// Both never coexist for a workspace because scope (exploit flag) is locked.
// Pure function: Assemble final report from specialist deliverables
export async function assembleFinalReport(
sourceDir: string,
deliverablesSubdir: string | undefined,
logger: ActivityLogger,
): Promise<string> {
const deliverableFiles: readonly DeliverableFile[] = [
{ name: 'Injection', paths: ['injection_exploitation_evidence.md', 'injection_findings.md'], required: false },
{ name: 'XSS', paths: ['xss_exploitation_evidence.md', 'xss_findings.md'], required: false },
{ name: 'Authentication', paths: ['auth_exploitation_evidence.md', 'auth_findings.md'], required: false },
{ name: 'SSRF', paths: ['ssrf_exploitation_evidence.md', 'ssrf_findings.md'], required: false },
{ name: 'Authorization', paths: ['authz_exploitation_evidence.md', 'authz_findings.md'], required: false },
const deliverableFiles: DeliverableFile[] = [
{ name: 'Injection', path: 'injection_exploitation_evidence.md', required: false },
{ name: 'XSS', path: 'xss_exploitation_evidence.md', required: false },
{ name: 'Authentication', path: 'auth_exploitation_evidence.md', required: false },
{ name: 'SSRF', path: 'ssrf_exploitation_evidence.md', required: false },
{ name: 'Authorization', path: 'authz_exploitation_evidence.md', required: false },
];
const dir = deliverablesDir(sourceDir, deliverablesSubdir);
const sections: string[] = [];
for (const file of deliverableFiles) {
let added = false;
for (const candidate of file.paths) {
const filePath = path.join(dir, candidate);
try {
if (await fs.pathExists(filePath)) {
const content = await fs.readFile(filePath, 'utf8');
sections.push(content);
logger.info(`Added ${file.name} section from ${candidate}`);
added = true;
break;
}
} catch (error) {
const err = error as Error;
logger.warn(`Could not read ${candidate}: ${err.message}`);
}
}
if (!added) {
if (file.required) {
const filePath = path.join(deliverablesDir(sourceDir, deliverablesSubdir), file.path);
try {
if (await fs.pathExists(filePath)) {
const content = await fs.readFile(filePath, 'utf8');
sections.push(content);
logger.info(`Added ${file.name} findings`);
} else if (file.required) {
throw new PentestError(
`Required deliverable file not found: ${file.paths.join(' or ')}`,
`Required deliverable file not found: ${file.path}`,
'filesystem',
false,
{ deliverableFile: file.paths, sourceDir },
{ deliverableFile: file.path, sourceDir },
ErrorCode.DELIVERABLE_NOT_FOUND,
);
} else {
logger.info(`No ${file.name} deliverable found`);
}
logger.info(`No ${file.name} deliverable found`);
} catch (error) {
if (file.required) {
throw error;
}
const err = error as Error;
logger.warn(`Could not read ${file.path}: ${err.message}`);
}
}
const finalContent = sections.join('\n\n');
const finalReportPath = path.join(dir, 'comprehensive_security_assessment_report.md');
const outputDir = deliverablesDir(sourceDir, deliverablesSubdir);
const finalReportPath = path.join(outputDir, 'comprehensive_security_assessment_report.md');
try {
await fs.ensureDir(dir);
// Ensure deliverables directory exists
await fs.ensureDir(outputDir);
await fs.writeFile(finalReportPath, finalContent);
logger.info(`Final report assembled at ${finalReportPath}`);
} catch (error) {
@@ -0,0 +1,128 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
/**
* Auth-validation preflight service.
*
* Drives a real browser login before the full pipeline runs,
* catching bad credentials early and saving API budget.
*/
import type { JsonSchemaOutputFormat } from '@anthropic-ai/claude-agent-sdk';
import { z } from 'zod';
import { runClaudePrompt } from '../ai/claude-executor.js';
import type { AuditSession } from '../audit/index.js';
import type { ActivityLogger } from '../types/activity-logger.js';
import type { DistributedConfig, ProviderConfig } from '../types/config.js';
import { ErrorCode } from '../types/errors.js';
import type { Result } from '../types/result.js';
import { err, ok } from '../types/result.js';
import { PentestError } from './error-handling.js';
import { loadPrompt } from './prompt-manager.js';
type FailurePoint = 'username_or_password' | 'totp_secret' | 'out_of_band';
const AuthValidationSchema = z.object({
login_success: z.boolean(),
failure_point: z.enum(['username_or_password', 'totp_secret', 'out_of_band']).optional(),
failure_detail: z.string().max(250).optional(),
});
const AUTH_VALIDATION_OUTPUT_FORMAT: JsonSchemaOutputFormat = {
type: 'json_schema',
schema: z.toJSONSchema(AuthValidationSchema, { target: 'draft-07' }) as Record<string, unknown>,
};
export interface AuthValidationInput {
webUrl: string;
repoPath: string;
config: DistributedConfig;
pipelineTestingMode: boolean;
auditSession: AuditSession;
logger: ActivityLogger;
promptDir?: string;
apiKey?: string;
providerConfig?: ProviderConfig;
}
function classifyResult(parsed: z.infer<typeof AuthValidationSchema>): Result<void, PentestError> {
if (parsed.login_success) {
return ok(undefined);
}
const failurePoint: FailurePoint = parsed.failure_point ?? 'username_or_password';
const detail = parsed.failure_detail ?? 'Login failed';
return err(
new PentestError(
`Authentication validation failed at "${failurePoint}": ${detail}`,
'config',
false,
{ failurePoint, failureDetail: detail },
ErrorCode.AUTH_LOGIN_FAILED,
),
);
}
export async function validateAuthentication(input: AuthValidationInput): Promise<Result<void, PentestError>> {
const { webUrl, repoPath, config, pipelineTestingMode, auditSession, logger, promptDir, apiKey, providerConfig } =
input;
// 1. Load the validation prompt
const prompt = await loadPrompt(
'validate-authentication',
{ webUrl, repoPath },
config,
pipelineTestingMode,
logger,
promptDir,
);
// 2. Run the agent with structured output
const result = await runClaudePrompt(
prompt,
repoPath,
'',
'Auth validation',
'validate-authentication',
auditSession,
logger,
'medium',
AUTH_VALIDATION_OUTPUT_FORMAT,
apiKey,
undefined,
providerConfig,
);
// 3. Parse structured output
if (!result.success || !result.structuredOutput) {
return err(
new PentestError(
`Auth validation agent did not return a structured verdict: ${result.error ?? 'unknown error'}`,
'validation',
true,
{ agentError: result.error },
ErrorCode.AGENT_EXECUTION_FAILED,
),
);
}
const parseResult = AuthValidationSchema.safeParse(result.structuredOutput);
if (!parseResult.success) {
return err(
new PentestError(
`Auth validation output failed schema validation: ${parseResult.error.message}`,
'validation',
true,
{ zodErrors: parseResult.error.issues },
ErrorCode.OUTPUT_VALIDATION_FAILED,
),
);
}
// 4. Classify the verdict
return classifyResult(parseResult.data);
}
+3
View File
@@ -151,6 +151,9 @@ function createExploitValidator(vulnType: VulnType): AgentValidator {
// Playwright session mapping - assigns each agent to a specific session for browser isolation
// Keys are promptTemplate values from AGENTS registry
export const PLAYWRIGHT_SESSION_MAPPING: Record<string, PlaywrightSession> = Object.freeze({
// Runs before any agent — non-concurrent, so agent1 is safe to share
'validate-authentication': 'agent1',
// Phase 1: Pre-reconnaissance
'pre-recon-code': 'agent1',
+136 -98
View File
@@ -18,28 +18,30 @@
import fs from 'node:fs/promises';
import path from 'node:path';
import { ApplicationFailure, Context, heartbeat } from '@temporalio/activity';
import { writeUserSettingsForCodePathAvoids } from '../ai/settings-writer.js';
import { type StealthConfigWriteResult, writePlaywrightStealthConfig } from '../ai/playwright-config-writer.js';
import { AuditSession } from '../audit/index.js';
import type { ResumeAttempt } from '../audit/metrics-tracker.js';
import { generateSessionJsonPath, type SessionMetadata } from '../audit/utils.js';
import type { SessionMetadata } from '../audit/utils.js';
import type { WorkflowSummary } from '../audit/workflow-logger.js';
import { distributeConfig, parseConfig, parseConfigYAML } from '../config-parser.js';
import type { CheckpointContext } from '../interfaces/checkpoint-provider.js';
import { DEFAULT_DELIVERABLES_SUBDIR, deliverablesDir } from '../paths.js';
import { getContainer, getOrCreateContainer, removeContainer } from '../services/container.js';
import { classifyErrorForTemporal, PentestError } from '../services/error-handling.js';
import { ExploitationCheckerService } from '../services/exploitation-checker.js';
import { renderFindingsFromQueues } from '../services/findings-renderer.js';
import { executeGitCommandWithRetry } from '../services/git-manager.js';
import { runPreflightChecks } from '../services/preflight.js';
import { resolvePromptDir } from '../services/prompt-manager.js';
import type { ExploitationDecision, VulnType } from '../services/queue-validation.js';
import { assembleFinalReport, injectModelIntoReport } from '../services/reporting.js';
import { validateAuthentication } from '../services/validate-authentication.js';
import { AGENTS } from '../session-manager.js';
import type { AgentName } from '../types/agents.js';
import { ALL_AGENTS } from '../types/agents.js';
import type { ContainerConfig, ProviderConfig, VulnClass } from '../types/config.js';
import type { Config, ContainerConfig, ProviderConfig } from '../types/config.js';
import { ErrorCode } from '../types/errors.js';
import { isErr } from '../types/result.js';
import { atomicWrite, fileExists, readJson } from '../utils/file-io.js';
import { fileExists, readJson } from '../utils/file-io.js';
import { createActivityLogger } from './activity-logger.js';
import type { AgentMetrics, PipelineState, ResumeState } from './shared.js';
@@ -184,11 +186,7 @@ async function runAgentActivity(agentName: AgentName, input: ActivityInput): Pro
attemptNumber,
...(input.apiKey !== undefined && { apiKey: input.apiKey }),
...(input.providerConfig !== undefined && { providerConfig: input.providerConfig }),
...(input.promptDir !== undefined && {
promptDir: path.isAbsolute(input.promptDir)
? input.promptDir
: path.resolve(process.env.SHANNON_WORKER_ROOT ?? process.cwd(), input.promptDir),
}),
...(input.promptDir !== undefined && { promptDir: resolvePromptDir(input.promptDir) }),
...(input.configYAML !== undefined && { configYAML: input.configYAML }),
},
auditSession,
@@ -375,6 +373,131 @@ export async function runPreflightValidation(input: ActivityInput): Promise<void
}
}
/**
* Write Playwright stealth configuration to the source directory.
* Thin activity wrapper — delegates to writePlaywrightStealthConfig.
*/
export async function syncPlaywrightStealthConfig(input: ActivityInput): Promise<StealthConfigWriteResult> {
const logger = createActivityLogger();
const result = await writePlaywrightStealthConfig(input.repoPath);
logger.info(`Playwright stealth config: ${result}`);
return result;
}
/**
* Auth-validation preflight activity.
*
* Runs a real browser login attempt to confirm credentials work
* before committing to the full pipeline.
*
* NOT using runAgentActivity — auth validation has its own structured output
* flow and retry semantics.
*/
export async function runAuthenticationValidation(input: ActivityInput): Promise<void> {
const startTime = Date.now();
const attemptNumber = Context.current().info.attempt;
const heartbeatInterval = setInterval(() => {
const elapsed = Math.floor((Date.now() - startTime) / 1000);
heartbeat({ phase: 'auth-validation', elapsedSeconds: elapsed, attempt: attemptNumber });
}, HEARTBEAT_INTERVAL_MS);
try {
const logger = createActivityLogger();
logger.info('Running authentication validation...', { attempt: attemptNumber });
// 1. Load config to get authentication details
let config: Config;
if (input.configYAML) {
config = parseConfigYAML(input.configYAML);
} else if (input.configPath) {
config = await parseConfig(input.configPath);
} else {
logger.info('No config provided, skipping auth validation');
return;
}
const distributed = distributeConfig(config);
if (!distributed.authentication) {
logger.info('No authentication configured, skipping auth validation');
return;
}
// 2. Create audit session
const sessionMetadata = buildSessionMetadata(input);
const auditSession = new AuditSession(sessionMetadata);
await auditSession.initialize(input.workflowId);
// 3. Run validation
const result = await validateAuthentication({
webUrl: input.webUrl,
repoPath: input.repoPath,
config: distributed,
pipelineTestingMode: input.pipelineTestingMode ?? false,
auditSession,
logger,
promptDir: resolvePromptDir(input.promptDir),
...(input.apiKey !== undefined && { apiKey: input.apiKey }),
...(input.providerConfig !== undefined && { providerConfig: input.providerConfig }),
});
if (isErr(result)) {
const classified = classifyErrorForTemporal(result.error);
const message = truncateErrorMessage(result.error.message);
const details: Record<string, unknown>[] = [
{ phase: 'auth-validation', attemptNumber, elapsed: Date.now() - startTime },
];
// Include failure point and detail for consumer error handling
if (result.error.context.failurePoint) {
details.push({
failurePoint: result.error.context.failurePoint,
failureDetail: result.error.context.failureDetail,
});
}
if (classified.retryable) {
const failure = ApplicationFailure.create({
message,
type: classified.type,
details,
});
truncateStackTrace(failure);
throw failure;
} else {
const failure = ApplicationFailure.nonRetryable(message, classified.type, details);
truncateStackTrace(failure);
throw failure;
}
}
logger.info('Authentication validation passed');
} catch (error) {
if (error instanceof ApplicationFailure) {
throw error;
}
const classified = classifyErrorForTemporal(error);
const rawMessage = error instanceof Error ? error.message : String(error);
const message = truncateErrorMessage(rawMessage);
const failure = classified.retryable
? ApplicationFailure.create({
message,
type: classified.type,
details: [{ phase: 'auth-validation', attemptNumber, elapsed: Date.now() - startTime }],
})
: ApplicationFailure.nonRetryable(message, classified.type, [
{ phase: 'auth-validation', attemptNumber, elapsed: Date.now() - startTime },
]);
truncateStackTrace(failure);
throw failure;
} finally {
clearInterval(heartbeatInterval);
}
}
/**
* Initialize a private git repository inside the workspace deliverables directory.
* Idempotent — skips if .git already exists (resume case).
@@ -401,50 +524,11 @@ export async function initDeliverableGit(input: ActivityInput): Promise<void> {
}
/**
* Sync code_path avoid rules into Claude's user-scope settings.json so the
* SDK enforces them at the tool layer for every agent in this run.
*
* Runs once per workflow before any agent fires. Config is fixed for the
* lifetime of the workflow, so writing once avoids the parallel-agent race
* on the global ~/.claude/settings.json file.
* Assemble the final report by concatenating exploitation evidence files.
*/
export async function syncCodePathDenyRules(input: ActivityInput): Promise<void> {
const logger = createActivityLogger();
const container = getOrCreateContainer(input.workflowId, buildSessionMetadata(input), buildContainerConfig(input));
const configResult = await container.configLoader.loadOptional(input.configPath, undefined, input.configYAML);
if (isErr(configResult)) {
logger.warn(`syncCodePathDenyRules: skipping (config load failed: ${configResult.error.message})`);
return;
}
const config = configResult.value;
const denyCount = (config?.avoid ?? []).filter((r) => r.type === 'code_path').length;
await writeUserSettingsForCodePathAvoids(config);
logger.info(`Synced code_path deny rules to user settings (${denyCount} entries)`);
}
/**
* Assemble the final report by concatenating per-class deliverables.
*
* Under exploit=true, each exploit agent has produced `*_exploitation_evidence.md`
* directly. Under exploit=false, exploit agents didn't run; we deterministically
* render `*_findings.md` from each `*_exploitation_queue.json` first, then assemble.
*/
export async function assembleReportActivity(input: ActivityInput, exploit: boolean): Promise<void> {
export async function assembleReportActivity(input: ActivityInput): Promise<void> {
const { repoPath, deliverablesSubdir } = input;
const logger = createActivityLogger();
if (!exploit) {
logger.info('Rendering per-class findings from analysis queues...');
try {
await renderFindingsFromQueues(repoPath, deliverablesSubdir, logger);
} catch (error) {
const err = error as Error;
logger.warn(`Error rendering findings from queues: ${err.message}`);
}
}
logger.info('Assembling deliverables from specialist agents...');
try {
await assembleFinalReport(repoPath, deliverablesSubdir, logger);
@@ -488,11 +572,6 @@ export async function checkExploitationQueue(input: ActivityInput, vulnType: Vul
return checker.checkQueue(vulnType, delivPath, logger);
}
interface RunScope {
vulnClasses: VulnClass[];
exploit: boolean;
}
interface SessionJson {
session: {
id: string;
@@ -500,7 +579,6 @@ interface SessionJson {
repoPath?: string;
originalWorkflowId?: string;
resumeAttempts?: ResumeAttempt[];
scope?: RunScope;
};
metrics: {
agents: Record<
@@ -618,42 +696,6 @@ export async function loadResumeState(
};
}
/** First run records scope into session.json; resume runs throw if it differs. */
export async function persistOrValidateRunScope(
input: ActivityInput,
vulnClasses: VulnClass[],
exploit: boolean,
): Promise<void> {
const sessionMetadata = buildSessionMetadata(input);
const auditSession = new AuditSession(sessionMetadata);
await auditSession.initialize(input.workflowId);
const sessionPath = generateSessionJsonPath(sessionMetadata);
const session = await readJson<SessionJson>(sessionPath);
if (session.session.scope) {
const recorded = session.session.scope;
const sameClasses =
recorded.vulnClasses.length === vulnClasses.length &&
recorded.vulnClasses.every((c) => vulnClasses.includes(c)) &&
vulnClasses.every((c) => recorded.vulnClasses.includes(c));
if (!sameClasses || recorded.exploit !== exploit) {
throw ApplicationFailure.nonRetryable(
`Resume scope mismatch for workspace ${input.sessionId}.\n` +
` Original: vuln_classes=[${recorded.vulnClasses.join(', ')}], exploit=${recorded.exploit}\n` +
` Provided: vuln_classes=[${vulnClasses.join(', ')}], exploit=${exploit}\n` +
`Resume requires the same scope as the original run. Start a new workspace if you want different scope.`,
'ScopeMismatchError',
);
}
return;
}
session.session.scope = { vulnClasses: [...vulnClasses], exploit };
await atomicWrite(sessionPath, session);
}
async function findLatestCommit(gitDir: string, commitHashes: string[]): Promise<string> {
if (commitHashes.length === 1) {
const hash = commitHashes[0];
@@ -882,11 +924,7 @@ export async function generateReportOutputActivity(input: ActivityInput): Promis
// Resolve promptDir against the worker root so providers are cwd-independent.
const resolvedInput: ActivityInput = {
...input,
...(input.promptDir !== undefined && {
promptDir: path.isAbsolute(input.promptDir)
? input.promptDir
: path.resolve(process.env.SHANNON_WORKER_ROOT ?? process.cwd(), input.promptDir),
}),
...(input.promptDir !== undefined && { promptDir: resolvePromptDir(input.promptDir) }),
};
const result = await container.reportOutputProvider.generate(resolvedInput, logger);
+1 -3
View File
@@ -2,7 +2,7 @@ import { defineQuery } from '@temporalio/workflow';
export type { AgentMetrics } from '../types/metrics.js';
import type { DistributedConfig, PipelineConfig, ProviderConfig, VulnClass } from '../types/config.js';
import type { DistributedConfig, PipelineConfig, ProviderConfig } from '../types/config.js';
import type { ErrorCode } from '../types/errors.js';
import type { AgentMetrics } from '../types/metrics.js';
@@ -29,8 +29,6 @@ export interface PipelineInput {
checkpointsEnabled?: boolean; // Enable checkpoint activities (default: false)
skipGitCheck?: boolean; // Skip .git directory validation in preflight (e.g. when .git is removed after clone)
providerConfig?: ProviderConfig; // LLM provider configuration (Bedrock, Vertex, etc.)
vulnClasses?: VulnClass[]; // omitted = all five
exploit?: boolean; // false skips the exploitation phase
}
export interface ResumeState {
+16 -27
View File
@@ -36,7 +36,7 @@ import dotenv from 'dotenv';
import { sanitizeHostname } from '../audit/utils.js';
import { parseConfig } from '../config-parser.js';
import { deliverablesDir } from '../paths.js';
import type { PipelineConfig, VulnClass } from '../types/config.js';
import type { PipelineConfig } from '../types/config.js';
import { fileExists, readJson } from '../utils/file-io.js';
import * as activities from './activities.js';
import type { PipelineInput, PipelineProgress, PipelineState } from './shared.js';
@@ -275,39 +275,30 @@ async function resolveWorkspace(client: Client, args: CliArgs): Promise<Workspac
// === Pipeline Input Construction ===
interface OrchestrationConfig {
pipelineConfig: PipelineConfig;
vulnClasses?: VulnClass[];
exploit?: boolean;
}
async function loadOrchestrationConfig(configPath: string | undefined): Promise<OrchestrationConfig> {
if (!configPath) return { pipelineConfig: {} };
async function loadPipelineConfig(configPath: string | undefined): Promise<PipelineConfig> {
if (!configPath) return {};
try {
const config = await parseConfig(configPath);
const raw = config.pipeline;
if (!raw) return {};
const pipelineConfig: PipelineConfig = {};
if (config.pipeline?.retry_preset !== undefined) {
pipelineConfig.retry_preset = config.pipeline.retry_preset;
const result: PipelineConfig = {};
if (raw.retry_preset !== undefined) {
result.retry_preset = raw.retry_preset;
}
if (config.pipeline?.max_concurrent_pipelines !== undefined) {
pipelineConfig.max_concurrent_pipelines = Number(config.pipeline.max_concurrent_pipelines);
if (raw.max_concurrent_pipelines !== undefined) {
result.max_concurrent_pipelines = Number(raw.max_concurrent_pipelines);
}
return {
pipelineConfig,
...(config.vuln_classes && config.vuln_classes.length > 0 && { vulnClasses: [...config.vuln_classes] }),
...(config.exploit !== undefined && { exploit: config.exploit === 'true' }),
};
return result;
} catch {
return { pipelineConfig: {} };
return {};
}
}
function buildPipelineInput(
args: CliArgs,
workspace: WorkspaceResolution,
orchestration: OrchestrationConfig,
pipelineConfig: PipelineConfig,
): PipelineInput {
return {
webUrl: args.webUrl,
@@ -318,9 +309,7 @@ function buildPipelineInput(
...(args.pipelineTestingMode && { pipelineTestingMode: args.pipelineTestingMode }),
...(workspace.isResume && args.resumeFromWorkspace && { resumeFromWorkspace: args.resumeFromWorkspace }),
...(workspace.terminatedWorkflows.length > 0 && { terminatedWorkflows: workspace.terminatedWorkflows }),
...(Object.keys(orchestration.pipelineConfig).length > 0 && { pipelineConfig: orchestration.pipelineConfig }),
...(orchestration.vulnClasses && { vulnClasses: orchestration.vulnClasses }),
...(orchestration.exploit !== undefined && { exploit: orchestration.exploit }),
...(Object.keys(pipelineConfig).length > 0 && { pipelineConfig }),
};
}
@@ -428,8 +417,8 @@ async function run(): Promise<void> {
// 4. Resolve workspace and build pipeline input
const workspace = await resolveWorkspace(client, args);
const orchestration = await loadOrchestrationConfig(args.configPath);
const input = buildPipelineInput(args, workspace, orchestration);
const pipelineConfig = await loadPipelineConfig(args.configPath);
const input = buildPipelineInput(args, workspace, pipelineConfig);
// 5. Start worker polling in the background
const workerDone = worker.run();
+44 -49
View File
@@ -33,7 +33,6 @@ import {
} from '@temporalio/workflow';
import type { AgentName, VulnType } from '../types/agents.js';
import { ALL_AGENTS } from '../types/agents.js';
import { ALL_VULN_CLASSES, type VulnClass } from '../types/config.js';
import type * as activities from './activities.js';
import type { ActivityInput } from './activities.js';
import {
@@ -49,19 +48,6 @@ import {
import { toWorkflowSummary } from './summary-mapper.js';
import { classifyErrorCode, formatWorkflowError } from './workflow-errors.js';
/** Agents this run is expected to produce — drives the resume short-circuit. */
function computeExpectedAgents(vulnClasses: readonly VulnClass[], exploit: boolean): string[] {
const expected: string[] = ['pre-recon', 'recon'];
for (const cls of vulnClasses) {
expected.push(`${cls}-vuln`);
if (exploit) {
expected.push(`${cls}-exploit`);
}
}
expected.push('report');
return expected;
}
// Retry configuration for production (long intervals for billing recovery)
const PRODUCTION_RETRY = {
initialInterval: '5 minutes',
@@ -70,6 +56,7 @@ const PRODUCTION_RETRY = {
maximumAttempts: 50,
nonRetryableErrorTypes: [
'AuthenticationError',
'AuthLoginFailedError',
'PermissionError',
'InvalidRequestError',
'RequestTooLargeError',
@@ -134,6 +121,22 @@ const preflightActs = proxyActivities<typeof activities>({
retry: PREFLIGHT_RETRY,
});
// Retry configuration for auth validation (browser-based, longer timeout)
const AUTH_VALIDATION_RETRY = {
initialInterval: '10 seconds',
maximumInterval: '1 minute',
backoffCoefficient: 2,
maximumAttempts: 3,
nonRetryableErrorTypes: PRODUCTION_RETRY.nonRetryableErrorTypes,
};
// Activity proxy for auth validation (10-minute timeout for browser login)
const authValidationActs = proxyActivities<typeof activities>({
startToCloseTimeout: '10 minutes',
heartbeatTimeout: '10 minutes',
retry: AUTH_VALIDATION_RETRY,
});
/**
* Compute aggregated metrics from the current pipeline state.
* Called on both success and failure to provide partial metrics.
@@ -229,14 +232,6 @@ export async function pentestPipeline(input: PipelineInput): Promise<PipelineSta
...(input.providerConfig !== undefined && { providerConfig: input.providerConfig }),
};
const selectedVulnClasses: readonly VulnClass[] =
input.vulnClasses && input.vulnClasses.length > 0 ? input.vulnClasses : ALL_VULN_CLASSES;
const selectedClassSet = new Set<VulnClass>(selectedVulnClasses);
const exploit: boolean = input.exploit ?? true;
const expectedAgents = computeExpectedAgents(selectedVulnClasses, exploit);
await a.persistOrValidateRunScope(activityInput, [...selectedVulnClasses], exploit);
let resumeState: ResumeState | null = null;
if (input.resumeFromWorkspace) {
@@ -260,11 +255,9 @@ export async function pentestPipeline(input: PipelineInput): Promise<PipelineSta
input.deliverablesSubdir,
);
// 3. Short-circuit when every agent expected by this run is done.
// Uses dynamic expectedAgents (not ALL_AGENTS) so a class-scoped run completes sooner.
const allExpectedDone = expectedAgents.every((a) => resumeState?.completedAgents.includes(a));
if (allExpectedDone) {
log.info(`All ${expectedAgents.length} expected agents already completed. Nothing to resume.`);
// 3. Short-circuit if all agents already completed
if (resumeState.completedAgents.length === ALL_AGENTS.length) {
log.info(`All ${ALL_AGENTS.length} agents already completed. Nothing to resume.`);
state.status = 'completed';
state.completedAgents = [...resumeState.completedAgents];
state.summary = computeSummary(state);
@@ -420,14 +413,19 @@ export async function pentestPipeline(input: PipelineInput): Promise<PipelineSta
await preflightActs.runPreflightValidation(activityInput);
log.info('Preflight validation passed');
// === Playwright Stealth Config ===
await a.syncPlaywrightStealthConfig(activityInput);
// === Auth Validation ===
// Browser-based credential check before committing to the full pipeline.
state.currentPhase = 'auth-validation';
state.currentAgent = 'validate-authentication';
await authValidationActs.runAuthenticationValidation(activityInput);
log.info('Auth validation passed');
// === Initialize Deliverables Git ===
await a.initDeliverableGit(activityInput);
// === Sync SDK deny rules ===
await a.syncCodePathDenyRules(activityInput);
log.info(`Run scope: vuln_classes=[${selectedVulnClasses.join(', ')}] exploit=${exploit}`);
// === Phase 1: Pre-Reconnaissance ===
await runSequentialPhase('pre-recon', 'pre-recon', a.runPreReconAgent);
@@ -471,17 +469,19 @@ export async function pentestPipeline(input: PipelineInput): Promise<PipelineSta
// 2. Check exploitation queue for actionable findings
const decision = await a.checkExploitationQueue(activityInput, vulnType);
// 3. Previously-completed exploits are preserved regardless of mode; new exploits gated by mode.
// 3. Conditionally run exploitation agent
let exploitMetrics: AgentMetrics | null = null;
if (shouldSkip(exploitAgentName)) {
log.info(`Skipping ${exploitAgentName} (already complete)`);
state.completedAgents.push(exploitAgentName);
} else if (decision.shouldExploit && exploit) {
exploitMetrics = await runExploitAgent();
state.agentMetrics[exploitAgentName] = exploitMetrics;
state.completedAgents.push(exploitAgentName);
if (input.checkpointsEnabled) {
await a.saveCheckpoint(activityInput, exploitAgentName, 'exploitation', state);
if (decision.shouldExploit) {
if (!shouldSkip(exploitAgentName)) {
exploitMetrics = await runExploitAgent();
state.agentMetrics[exploitAgentName] = exploitMetrics;
state.completedAgents.push(exploitAgentName);
if (input.checkpointsEnabled) {
await a.saveCheckpoint(activityInput, exploitAgentName, 'exploitation', state);
}
} else {
log.info(`Skipping ${exploitAgentName} (already complete)`);
state.completedAgents.push(exploitAgentName);
}
}
@@ -503,11 +503,6 @@ export async function pentestPipeline(input: PipelineInput): Promise<PipelineSta
const pipelineThunks: Array<() => Promise<VulnExploitPipelineResult>> = [];
for (const config of pipelineConfigs) {
// Excluded classes drop entirely; any prior deliverables stay on disk but don't count this run.
if (!selectedClassSet.has(config.vulnType)) {
log.info(`Skipping ${config.vulnType} pipeline (class not selected this run)`);
continue;
}
if (!shouldSkip(config.vulnAgent) || !shouldSkip(config.exploitAgent)) {
pipelineThunks.push(() => runVulnExploitPipeline(config.vulnType, config.runVuln, config.runExploit));
} else {
@@ -529,8 +524,8 @@ export async function pentestPipeline(input: PipelineInput): Promise<PipelineSta
state.currentAgent = 'report';
await a.logPhaseTransition(activityInput, 'reporting', 'start');
// First, assemble the concatenated report from per-class deliverables
await a.assembleReportActivity(activityInput, exploit);
// First, assemble the concatenated report from exploitation evidence files
await a.assembleReportActivity(activityInput);
// Then run the report agent to add executive summary and clean up
state.agentMetrics.report = await a.runReportAgent(activityInput);
+11 -25
View File
@@ -8,12 +8,12 @@
* Configuration type definitions
*/
export type RuleType = 'url_path' | 'subdomain' | 'domain' | 'method' | 'header' | 'parameter' | 'code_path';
export type RuleType = 'path' | 'subdomain' | 'domain' | 'method' | 'header' | 'parameter';
export interface Rule {
description: string;
type: RuleType;
value: string;
url_path: string;
}
export interface Rules {
@@ -21,19 +21,6 @@ export interface Rules {
focus?: Rule[];
}
export type VulnClass = 'injection' | 'xss' | 'auth' | 'authz' | 'ssrf';
export const ALL_VULN_CLASSES: readonly VulnClass[] = ['injection', 'xss', 'auth', 'authz', 'ssrf'];
export type Severity = 'low' | 'medium' | 'high' | 'critical';
export type Confidence = 'low' | 'medium' | 'high';
export interface ReportConfig {
min_severity?: Severity;
min_confidence?: Confidence;
guidance?: string;
}
export type LoginType = 'form' | 'sso' | 'api' | 'basic';
export interface SuccessCondition {
@@ -41,12 +28,19 @@ export interface SuccessCondition {
value: string;
}
export interface Credentials {
username: string;
export interface EmailLogin {
address: string;
password: string;
totp_secret?: string;
}
export interface Credentials {
username: string;
password?: string;
totp_secret?: string;
email_login?: EmailLogin;
}
export interface Authentication {
login_type: LoginType;
login_url: string;
@@ -60,10 +54,6 @@ export interface Config {
authentication?: Authentication;
pipeline?: PipelineConfig;
description?: string;
vuln_classes?: VulnClass[];
exploit?: 'true' | 'false';
report?: ReportConfig;
rules_of_engagement?: string;
}
export type RetryPreset = 'default' | 'subscription';
@@ -78,10 +68,6 @@ export interface DistributedConfig {
focus: Rule[];
authentication: Authentication | null;
description: string;
vuln_classes: VulnClass[];
exploit: boolean;
report: ReportConfig;
rules_of_engagement: string;
}
/**
+3
View File
@@ -45,6 +45,9 @@ export enum ErrorCode {
TARGET_UNREACHABLE = 'TARGET_UNREACHABLE',
AUTH_FAILED = 'AUTH_FAILED',
BILLING_ERROR = 'BILLING_ERROR',
// Auth validation errors
AUTH_LOGIN_FAILED = 'AUTH_LOGIN_FAILED',
}
export type PentestErrorType = 'config' | 'network' | 'prompt' | 'filesystem' | 'validation' | 'billing' | 'unknown';
-11
View File
@@ -1,11 +0,0 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
import { glob } from 'zx';
export function isGlobPattern(value: string): boolean {
return glob.isDynamicPattern(value);
}