Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 03e2bc1e11 |
@@ -135,6 +135,7 @@ shannon <URL> <REPO> --pipeline-testing
|
||||
|-------------------|---------|------------|
|
||||
| `config` | Configuration file issues | No |
|
||||
| `network` | Connection/timeout issues | Yes |
|
||||
| `tool` | External tool (nmap, etc.) failed | Yes |
|
||||
| `prompt` | Claude SDK/API issues | Sometimes |
|
||||
| `filesystem` | File read/write errors | Sometimes |
|
||||
| `validation` | Deliverable validation failed | Yes (via retry) |
|
||||
|
||||
+3
-6
@@ -4,9 +4,6 @@
|
||||
# Recommended output token configuration for larger tool outputs
|
||||
CLAUDE_CODE_MAX_OUTPUT_TOKENS=64000
|
||||
|
||||
# Adaptive thinking is enabled automatically on Opus 4.6/4.7. Set to false to disable.
|
||||
# CLAUDE_ADAPTIVE_THINKING=false
|
||||
|
||||
# =============================================================================
|
||||
# OPTION 1: Direct Anthropic
|
||||
# =============================================================================
|
||||
@@ -29,7 +26,7 @@ ANTHROPIC_API_KEY=your-api-key-here
|
||||
# Optional for direct Anthropic and custom base URL modes. Required for Bedrock/Vertex.
|
||||
# ANTHROPIC_SMALL_MODEL=... # Small tier (default: claude-haiku-4-5-20251001)
|
||||
# ANTHROPIC_MEDIUM_MODEL=... # Medium tier (default: claude-sonnet-4-6)
|
||||
# ANTHROPIC_LARGE_MODEL=... # Large tier (default: claude-opus-4-7)
|
||||
# ANTHROPIC_LARGE_MODEL=... # Large tier (default: claude-opus-4-6)
|
||||
|
||||
# =============================================================================
|
||||
# OPTION 3: AWS Bedrock
|
||||
@@ -39,7 +36,7 @@ ANTHROPIC_API_KEY=your-api-key-here
|
||||
# Example Bedrock model IDs for us-east-1:
|
||||
# ANTHROPIC_SMALL_MODEL=us.anthropic.claude-haiku-4-5-20251001-v1:0
|
||||
# ANTHROPIC_MEDIUM_MODEL=us.anthropic.claude-sonnet-4-6
|
||||
# ANTHROPIC_LARGE_MODEL=us.anthropic.claude-opus-4-7
|
||||
# ANTHROPIC_LARGE_MODEL=us.anthropic.claude-opus-4-6
|
||||
|
||||
# CLAUDE_CODE_USE_BEDROCK=1
|
||||
# AWS_REGION=us-east-1
|
||||
@@ -55,7 +52,7 @@ ANTHROPIC_API_KEY=your-api-key-here
|
||||
# Example Vertex AI model IDs:
|
||||
# ANTHROPIC_SMALL_MODEL=claude-haiku-4-5@20251001
|
||||
# ANTHROPIC_MEDIUM_MODEL=claude-sonnet-4-6
|
||||
# ANTHROPIC_LARGE_MODEL=claude-opus-4-7
|
||||
# ANTHROPIC_LARGE_MODEL=claude-opus-4-6
|
||||
|
||||
# CLAUDE_CODE_USE_VERTEX=1
|
||||
# CLOUD_ML_REGION=us-east5
|
||||
|
||||
@@ -57,7 +57,7 @@ Durable workflow orchestration with crash recovery, queryable progress, intellig
|
||||
|
||||
### Five-Phase Pipeline
|
||||
|
||||
1. **Pre-Recon** (`pre-recon`) — Source code analysis to build the architectural baseline
|
||||
1. **Pre-Recon** (`pre-recon`) — External scans (nmap, subfinder, whatweb) + source code analysis
|
||||
2. **Recon** (`recon`) — Attack surface mapping from initial findings
|
||||
3. **Vulnerability Analysis** (5 parallel agents) — injection, xss, auth, authz, ssrf
|
||||
4. **Exploitation** (5 parallel agents, conditional) — Exploits confirmed vulnerabilities
|
||||
|
||||
+49
-2
@@ -13,12 +13,44 @@ RUN apk update && apk add --no-cache \
|
||||
curl \
|
||||
wget \
|
||||
ca-certificates \
|
||||
# Network libraries for Go tools
|
||||
libpcap-dev \
|
||||
linux-headers \
|
||||
# Language runtimes
|
||||
go \
|
||||
nodejs-22 \
|
||||
npm \
|
||||
python3 \
|
||||
py3-pip \
|
||||
ruby \
|
||||
ruby-dev \
|
||||
# Security tools available in Wolfi
|
||||
nmap \
|
||||
# Additional utilities
|
||||
bash
|
||||
|
||||
# Set environment variables for Go
|
||||
ENV GOPATH=/go
|
||||
ENV PATH=$GOPATH/bin:/usr/local/go/bin:$PATH
|
||||
ENV CGO_ENABLED=1
|
||||
|
||||
# Create directories
|
||||
RUN mkdir -p $GOPATH/bin
|
||||
|
||||
# Install Go-based security tools
|
||||
RUN go install -v github.com/projectdiscovery/subfinder/v2/cmd/subfinder@v2.13.0
|
||||
# Install WhatWeb from release tarball (Ruby-based tool)
|
||||
RUN curl -sL https://github.com/urbanadventurer/WhatWeb/archive/refs/tags/v0.6.3.tar.gz | tar xz -C /opt && \
|
||||
mv /opt/WhatWeb-0.6.3 /opt/whatweb && \
|
||||
chmod +x /opt/whatweb/whatweb && \
|
||||
gem install addressable -v 2.8.9 && \
|
||||
echo '#!/bin/bash' > /usr/local/bin/whatweb && \
|
||||
echo 'cd /opt/whatweb && exec ./whatweb "$@"' >> /usr/local/bin/whatweb && \
|
||||
chmod +x /usr/local/bin/whatweb
|
||||
|
||||
# Install Python-based tools
|
||||
RUN pip3 install --no-cache-dir schemathesis==4.13.0
|
||||
|
||||
# Install pnpm
|
||||
RUN npm install -g pnpm@10.33.0
|
||||
|
||||
@@ -52,11 +84,12 @@ RUN apk add --no-cache \
|
||||
curl \
|
||||
ca-certificates \
|
||||
shadow \
|
||||
# Language runtimes (minimal)
|
||||
libpcap \
|
||||
nmap \
|
||||
nodejs-22 \
|
||||
npm \
|
||||
python3 \
|
||||
# Chromium browser and dependencies for Playwright
|
||||
ruby \
|
||||
chromium \
|
||||
nss \
|
||||
freetype \
|
||||
@@ -71,6 +104,20 @@ RUN apk add --no-cache \
|
||||
fontconfig \
|
||||
|| true
|
||||
|
||||
# Copy Go binaries from builder
|
||||
COPY --from=builder /go/bin/subfinder /usr/local/bin/
|
||||
|
||||
# Copy WhatWeb from builder
|
||||
COPY --from=builder /opt/whatweb /opt/whatweb
|
||||
COPY --from=builder /usr/local/bin/whatweb /usr/local/bin/whatweb
|
||||
|
||||
# Install WhatWeb Ruby dependencies in runtime stage
|
||||
RUN gem install addressable -v 2.8.9
|
||||
|
||||
# Copy Python packages from builder
|
||||
COPY --from=builder /usr/lib/python3.*/site-packages /usr/lib/python3.12/site-packages
|
||||
COPY --from=builder /usr/bin/schemathesis /usr/bin/
|
||||
|
||||
# Create non-root user
|
||||
RUN addgroup -g 1001 pentest && \
|
||||
adduser -u 1001 -G pentest -s /bin/bash -D pentest
|
||||
|
||||
+2
-1
@@ -147,7 +147,7 @@ This phase informs everything downstream. If the codebase uses an ORM with param
|
||||
|
||||
## Phase 2: Reconnaissance
|
||||
|
||||
Bridges static and dynamic analysis using browser automation. The recon agent correlates code findings with the live application, validating that endpoints actually exist, mapping authentication flows, inventorying input vectors (URL parameters, POST fields, headers, cookies), and documenting the real authorization architecture.
|
||||
Bridges static and dynamic analysis using browser automation. The recon agent correlates code findings with the live application, validating that endpoints actually exist, mapping authentication flows, inventorying input vectors (URL parameters, POST fields, headers, cookies), and documenting the real authorization architecture. This phase may also integrate with infrastructure discovery tools including Nmap, Subfinder, and WhatWeb for network perimeter mapping.
|
||||
|
||||
## Phase 3: Vulnerability Analysis
|
||||
|
||||
@@ -194,6 +194,7 @@ This correlation means that a data flow vulnerability identified in static analy
|
||||
- **Fully Autonomous Operation:** Shannon Pro handles complex workflows including 2FA/TOTP logins and SSO (e.g., Sign in with Google) without human intervention. TOTP is handled via a dedicated MCP server tool.
|
||||
- **White-Box Awareness:** Unlike black-box scanners, Shannon Pro reads the source code to intelligently guide its attack strategy, combining code-level insight with runtime validation.
|
||||
- **Parallel Processing:** Vulnerability analysis and exploitation phases run concurrently across attack domains, with pipelined parallelism minimizing total execution time.
|
||||
- **Tool Orchestration:** Shannon Pro orchestrates existing security tools (e.g., Schemathesis for API testing, Nmap for network discovery) while adding LLM reasoning to interpret results.
|
||||
- **Configurable Login Flows:** Authentication configuration specifies login procedures and credentials, which are interpolated into agent prompts for authenticated testing.
|
||||
|
||||
---
|
||||
|
||||
@@ -12,7 +12,6 @@
|
||||
"dependencies": {
|
||||
"@hono/node-server": "^1.14.0",
|
||||
"@kubernetes/client-node": "^1.4.0",
|
||||
"@modelcontextprotocol/sdk": "^1.29.0",
|
||||
"@trebuchet/worker": "workspace:*",
|
||||
"@temporalio/client": "^1.11.0",
|
||||
"hono": "^4.7.0",
|
||||
|
||||
@@ -5,7 +5,6 @@
|
||||
|
||||
export interface Config {
|
||||
readonly port: number;
|
||||
readonly mcpPort: number;
|
||||
readonly temporalAddress: string;
|
||||
readonly apiKey: string;
|
||||
readonly k8sNamespace: string;
|
||||
@@ -29,7 +28,6 @@ export function loadConfig(): Config {
|
||||
|
||||
return {
|
||||
port: Number(process.env.PORT) || 3000,
|
||||
mcpPort: Number(process.env.MCP_PORT) || 3100,
|
||||
temporalAddress: process.env.TEMPORAL_ADDRESS || 'hightower-temporal:7233',
|
||||
apiKey,
|
||||
k8sNamespace: process.env.K8S_NAMESPACE || 'hightower',
|
||||
|
||||
@@ -1,204 +0,0 @@
|
||||
/**
|
||||
* MCP server for Hightower scan management.
|
||||
* Exposes scan-manager tools via the Model Context Protocol over HTTP.
|
||||
*/
|
||||
|
||||
import http from 'node:http';
|
||||
import type * as k8s from '@kubernetes/client-node';
|
||||
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
||||
import { StreamableHTTPServerTransport } from '@modelcontextprotocol/sdk/server/streamableHttp.js';
|
||||
import type { Client } from '@temporalio/client';
|
||||
import { z } from 'zod';
|
||||
import type { Config } from '../config.js';
|
||||
import { cancelScan, getReport, getScan, listScans, startScan } from '../services/scan-manager.js';
|
||||
import type { CreateScanInput } from '../types/api.js';
|
||||
|
||||
export interface McpServerDeps {
|
||||
readonly config: Config;
|
||||
readonly temporalClient: Client;
|
||||
readonly batchApi: k8s.BatchV1Api;
|
||||
readonly coreApi: k8s.CoreV1Api;
|
||||
}
|
||||
|
||||
function createMcpServer(deps: McpServerDeps): McpServer {
|
||||
const server = new McpServer(
|
||||
{ name: 'hightower', version: '1.0.0' },
|
||||
{
|
||||
capabilities: {
|
||||
tools: {},
|
||||
},
|
||||
},
|
||||
);
|
||||
|
||||
// === Tool: start_scan ===
|
||||
server.registerTool(
|
||||
'start_scan',
|
||||
{
|
||||
description: 'Start a new penetration test scan. Returns the scan ID and initial status.',
|
||||
inputSchema: z.object({
|
||||
targetUrl: z.string().describe('Target URL to scan (e.g., https://example.com)'),
|
||||
gitUrl: z.string().describe('Git URL of the repository to analyze (e.g., https://github.com/user/repo)'),
|
||||
workspace: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe(
|
||||
'Optional workspace name. Must match /^[a-zA-Z0-9][a-zA-Z0-9_-]{0,127}$/. Defaults to auto-generated from target URL.',
|
||||
),
|
||||
gitRef: z.string().optional().describe('Optional Git branch/tag/commit to checkout before scanning.'),
|
||||
pipelineTesting: z
|
||||
.boolean()
|
||||
.optional()
|
||||
.describe('If true, runs in minimal testing mode with fast retries (10s). Use for development.'),
|
||||
}),
|
||||
},
|
||||
async ({ targetUrl, gitUrl, workspace, gitRef, pipelineTesting }) => {
|
||||
const input: CreateScanInput = {
|
||||
targetUrl,
|
||||
gitUrl,
|
||||
workspace,
|
||||
...(gitRef !== undefined && { gitRef }),
|
||||
...(pipelineTesting !== undefined && { pipelineTesting }),
|
||||
};
|
||||
|
||||
const result = await startScan(deps.config, deps.batchApi, input);
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: 'text' as const,
|
||||
text: JSON.stringify(result, null, 2),
|
||||
},
|
||||
],
|
||||
};
|
||||
},
|
||||
);
|
||||
|
||||
// === Tool: get_scan ===
|
||||
server.registerTool(
|
||||
'get_scan',
|
||||
{
|
||||
description: 'Get the status, progress, and results of a running or completed scan.',
|
||||
inputSchema: z.object({
|
||||
scanId: z.string().describe('The scan ID returned from start_scan (e.g., hightower-worker-abc123)'),
|
||||
}),
|
||||
},
|
||||
async ({ scanId }) => {
|
||||
const result = await getScan(deps.config, deps.temporalClient, scanId);
|
||||
|
||||
if (!result) {
|
||||
return {
|
||||
content: [{ type: 'text' as const, text: `Scan '${scanId}' not found.` }],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: 'text' as const,
|
||||
text: JSON.stringify(result, null, 2),
|
||||
},
|
||||
],
|
||||
};
|
||||
},
|
||||
);
|
||||
|
||||
// === Tool: list_scans ===
|
||||
server.registerTool(
|
||||
'list_scans',
|
||||
{
|
||||
description: 'List all running and historical scans.',
|
||||
inputSchema: z.object({}),
|
||||
},
|
||||
async () => {
|
||||
const results = await listScans(deps.config, deps.temporalClient, deps.batchApi);
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: 'text' as const,
|
||||
text: JSON.stringify(results, null, 2),
|
||||
},
|
||||
],
|
||||
};
|
||||
},
|
||||
);
|
||||
|
||||
// === Tool: cancel_scan ===
|
||||
server.registerTool(
|
||||
'cancel_scan',
|
||||
{
|
||||
description: 'Cancel a running scan by terminating its Kubernetes Job and Temporal workflow.',
|
||||
inputSchema: z.object({
|
||||
scanId: z.string().describe('The scan ID to cancel.'),
|
||||
}),
|
||||
},
|
||||
async ({ scanId }) => {
|
||||
await cancelScan(deps.config, deps.temporalClient, deps.batchApi, scanId);
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: 'text' as const,
|
||||
text: `Scan '${scanId}' cancellation requested.`,
|
||||
},
|
||||
],
|
||||
};
|
||||
},
|
||||
);
|
||||
|
||||
// === Tool: get_report ===
|
||||
server.registerTool(
|
||||
'get_report',
|
||||
{
|
||||
description: 'Get the final security report for a completed scan.',
|
||||
inputSchema: z.object({
|
||||
scanId: z.string().describe('The scan ID to get the report for.'),
|
||||
}),
|
||||
},
|
||||
async ({ scanId }) => {
|
||||
const report = await getReport(deps.config, scanId);
|
||||
|
||||
if (!report) {
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: 'text' as const,
|
||||
text: `Report for scan '${scanId}' not found.`,
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
content: [{ type: 'text' as const, text: report }],
|
||||
};
|
||||
},
|
||||
);
|
||||
|
||||
return server;
|
||||
}
|
||||
|
||||
export async function startMcpServer(deps: McpServerDeps, port: number): Promise<http.Server> {
|
||||
const mcpServer = createMcpServer(deps);
|
||||
const transport = new StreamableHTTPServerTransport({
|
||||
sessionIdGenerator: () => crypto.randomUUID(),
|
||||
});
|
||||
|
||||
// Cast to Transport — the SDK's Transport interface requires onclose: () => void
|
||||
// but StreamableHTTPServerTransport allows undefined (handled internally).
|
||||
await mcpServer.connect(transport as never);
|
||||
|
||||
const server = http.createServer((req, res) => {
|
||||
transport.handleRequest(req, res, undefined);
|
||||
});
|
||||
|
||||
return new Promise<http.Server>((resolve, reject) => {
|
||||
server.on('error', reject);
|
||||
server.listen(port, () => {
|
||||
console.log(`MCP server listening on port ${port}`);
|
||||
resolve(server);
|
||||
});
|
||||
});
|
||||
}
|
||||
@@ -26,25 +26,6 @@ import { displaySplash } from './splash.js';
|
||||
|
||||
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
||||
|
||||
function blockSudo(): void {
|
||||
const isSudo = !!process.env.SUDO_USER;
|
||||
const isRoot = process.geteuid?.() === 0;
|
||||
if (!isSudo && !isRoot) return;
|
||||
|
||||
if (isSudo) {
|
||||
console.error('ERROR: Shannon must not be run with sudo.');
|
||||
console.error('Re-run this command as your normal user.');
|
||||
} else {
|
||||
console.error('ERROR: Shannon must not be run as the root user.');
|
||||
console.error('Switch to a regular user account and re-run this command.');
|
||||
}
|
||||
if (process.platform === 'linux') {
|
||||
console.error('Configure Docker to run without sudo first:');
|
||||
console.error('https://docs.docker.com/engine/install/linux-postinstall');
|
||||
}
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
function getVersion(): string {
|
||||
try {
|
||||
const pkgPath = path.join(__dirname, '..', 'package.json');
|
||||
@@ -198,8 +179,6 @@ function parseStartArgs(argv: string[]): ParsedStartArgs {
|
||||
|
||||
// === Main Dispatch ===
|
||||
|
||||
blockSudo();
|
||||
|
||||
const args = process.argv.slice(2);
|
||||
|
||||
// Parse --backend flag before command dispatch
|
||||
|
||||
@@ -118,51 +118,6 @@
|
||||
},
|
||||
"additionalProperties": false
|
||||
},
|
||||
"vuln_classes": {
|
||||
"type": "array",
|
||||
"description": "Vulnerability classes to test. When omitted, all five classes run. When set, only listed classes run; their vuln+exploit agents and report sections are included.",
|
||||
"items": {
|
||||
"type": "string",
|
||||
"enum": ["injection", "xss", "auth", "authz", "ssrf"]
|
||||
},
|
||||
"minItems": 1,
|
||||
"maxItems": 5,
|
||||
"uniqueItems": true
|
||||
},
|
||||
"exploit": {
|
||||
"type": "string",
|
||||
"enum": ["true", "false"],
|
||||
"description": "Whether to run the exploitation phase (default true). Set false to run only analysis."
|
||||
},
|
||||
"report": {
|
||||
"type": "object",
|
||||
"description": "Report filtering and guidance applied by the report agent.",
|
||||
"properties": {
|
||||
"min_severity": {
|
||||
"type": "string",
|
||||
"enum": ["low", "medium", "high", "critical"],
|
||||
"description": "Minimum severity threshold; findings below are dropped by the report agent."
|
||||
},
|
||||
"min_confidence": {
|
||||
"type": "string",
|
||||
"enum": ["low", "medium", "high"],
|
||||
"description": "Minimum confidence threshold; findings below are dropped by the report agent."
|
||||
},
|
||||
"guidance": {
|
||||
"type": "string",
|
||||
"minLength": 1,
|
||||
"maxLength": 500,
|
||||
"description": "Free-text guidance to the report agent (e.g., 'Drop findings about missing security headers')."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
},
|
||||
"rules_of_engagement": {
|
||||
"type": "string",
|
||||
"minLength": 1,
|
||||
"maxLength": 1000,
|
||||
"description": "Free-text instructions to the agent that render into every prompt."
|
||||
},
|
||||
"login": {
|
||||
"type": "object",
|
||||
"description": "Deprecated: Use 'authentication' section instead",
|
||||
@@ -180,11 +135,7 @@
|
||||
{ "required": ["authentication"] },
|
||||
{ "required": ["rules"] },
|
||||
{ "required": ["authentication", "rules"] },
|
||||
{ "required": ["description"] },
|
||||
{ "required": ["vuln_classes"] },
|
||||
{ "required": ["exploit"] },
|
||||
{ "required": ["report"] },
|
||||
{ "required": ["rules_of_engagement"] }
|
||||
{ "required": ["description"] }
|
||||
],
|
||||
"additionalProperties": false,
|
||||
"$defs": {
|
||||
@@ -200,17 +151,17 @@
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"enum": ["url_path", "subdomain", "domain", "method", "header", "parameter", "code_path"],
|
||||
"description": "Type of rule (what aspect of requests or source code to match against)"
|
||||
"enum": ["path", "subdomain", "domain", "method", "header", "parameter"],
|
||||
"description": "Type of rule (what aspect of requests to match against)"
|
||||
},
|
||||
"value": {
|
||||
"url_path": {
|
||||
"type": "string",
|
||||
"minLength": 1,
|
||||
"maxLength": 1000,
|
||||
"description": "Value to match"
|
||||
"description": "URL path pattern or value to match"
|
||||
}
|
||||
},
|
||||
"required": ["description", "type", "value"],
|
||||
"required": ["description", "type", "url_path"],
|
||||
"additionalProperties": false
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,27 +4,6 @@
|
||||
# Description of the target environment (optional, max 500 chars)
|
||||
description: "Next.js e-commerce app on PostgreSQL. Local dev environment — .env files contain local-only credentials, not deployed to production."
|
||||
|
||||
# Limit which vulnerability classes run end-to-end (optional, default: all five)
|
||||
# vuln_classes: [injection, xss, auth, authz, ssrf]
|
||||
|
||||
# Skip the exploitation phase (optional, default: "true")
|
||||
# exploit: "false"
|
||||
|
||||
# Free-form engagement rules applied to analysis and exploitation agents (optional).
|
||||
# Example below is illustrative; edit, remove, or add sections as needed.
|
||||
# rules_of_engagement: |
|
||||
# Forbidden techniques:
|
||||
# - No password brute-force or credential stuffing. Cap login attempts at 5 per account.
|
||||
# - ...
|
||||
#
|
||||
# Operational:
|
||||
# - Throttle to under 5 requests per second per endpoint. Back off 60 seconds on any 429 response.
|
||||
# - ...
|
||||
#
|
||||
# Data handling:
|
||||
# - Do not include actual values in deliverables — use placeholders like [order_id] or [user_email].
|
||||
# - ...
|
||||
|
||||
authentication:
|
||||
login_type: form # Options: 'form' or 'sso'
|
||||
login_url: "https://example.com/login"
|
||||
@@ -46,55 +25,27 @@ authentication:
|
||||
value: "/dashboard"
|
||||
|
||||
rules:
|
||||
# Supported types: url_path, subdomain, domain, method, header, parameter, code_path
|
||||
avoid:
|
||||
- description: "Do not test the marketing site subdomain"
|
||||
type: subdomain
|
||||
value: "www"
|
||||
url_path: "www"
|
||||
|
||||
- description: "Skip logout functionality"
|
||||
type: url_path
|
||||
value: "/logout"
|
||||
type: path
|
||||
url_path: "/logout"
|
||||
|
||||
- description: "No DELETE operations on user API"
|
||||
type: url_path
|
||||
value: "/api/v1/users/*"
|
||||
type: path
|
||||
url_path: "/api/v1/users/*"
|
||||
|
||||
# code_path values are repo-relative file paths or globs (e.g. "src/auth.ts", "test/**").
|
||||
# - description: "Test fixtures and specs (not production code)"
|
||||
# type: code_path
|
||||
# value: "test/**"
|
||||
#
|
||||
# - description: "Generated migrations"
|
||||
# type: code_path
|
||||
# value: "db/migrations/**"
|
||||
|
||||
focus:
|
||||
- description: "Prioritize beta admin panel subdomain"
|
||||
type: subdomain
|
||||
value: "beta-admin"
|
||||
url_path: "beta-admin"
|
||||
|
||||
- description: "Focus on user profile updates"
|
||||
type: url_path
|
||||
value: "/api/v2/user-profile"
|
||||
|
||||
# code_path values are repo-relative file paths or globs (e.g. "src/auth.ts", "routes/*.ts").
|
||||
# - description: "Express route handlers"
|
||||
# type: code_path
|
||||
# value: "routes/*.ts"
|
||||
#
|
||||
# - description: "Sequelize ORM model definitions"
|
||||
# type: code_path
|
||||
# value: "models/*.ts"
|
||||
|
||||
# Report filters applied by the report agent when assembling the final report (optional).
|
||||
# Example below is illustrative; edit, remove, or add sections as needed.
|
||||
# report:
|
||||
# min_severity: low
|
||||
# min_confidence: low
|
||||
# guidance: |
|
||||
# Drop findings about missing security headers and rate-limit gaps.
|
||||
# ...
|
||||
type: path
|
||||
url_path: "/api/v2/user-profile"
|
||||
|
||||
# Pipeline execution settings (optional)
|
||||
# pipeline:
|
||||
|
||||
@@ -105,8 +105,6 @@ If you only confirmed a vulnerability without attempting to weaponize it, you ha
|
||||
{{LOGIN_INSTRUCTIONS}}
|
||||
</login_instructions>
|
||||
|
||||
@include(shared/_rules-of-engagement.txt)
|
||||
|
||||
<starting_context>
|
||||
- Your **actionable exploitation queue** is located at `.shannon/deliverables/auth_exploitation_queue.json`. This is your to-do list, containing the precise targets identified by the analysis phase. You MUST process every vulnerability in this queue.
|
||||
|
||||
|
||||
@@ -92,8 +92,6 @@ After exhaustive bypass attempts, determine:
|
||||
{{LOGIN_INSTRUCTIONS}}
|
||||
</login_instructions>
|
||||
|
||||
@include(shared/_rules-of-engagement.txt)
|
||||
|
||||
<starting_context>
|
||||
- Your **actionable exploitation queue** is located at `.shannon/deliverables/authz_exploitation_queue.json`. This is your to-do list, containing the precise targets identified by the analysis phase. You MUST process every vulnerability in this queue.
|
||||
|
||||
|
||||
@@ -69,8 +69,6 @@ Remember: An unproven vulnerability is worse than no finding at all - it wastes
|
||||
{{LOGIN_INSTRUCTIONS}}
|
||||
</login_instructions>
|
||||
|
||||
@include(shared/_rules-of-engagement.txt)
|
||||
|
||||
<starting_context>
|
||||
- Your **actionable exploitation queue** is located at `.shannon/deliverables/injection_exploitation_queue.json`. This is your to-do list, containing the precise targets identified by the analysis phase. You MUST process every vulnerability in this queue.
|
||||
|
||||
@@ -182,16 +180,17 @@ For each root vulnerability in your plan, you will follow this systematic, four-
|
||||
## **Strategic Tool Usage**
|
||||
Use the right tool for the job to ensure thoroughness.
|
||||
- **Use `curl` (Manual Probing) for:** Initial confirmation, simple UNION/Error-based injections, and crafting specific WAF bypasses.
|
||||
- **Use `sqlmap` (Automation) for:** Time-consuming blind injections, automating enumeration **after** manual confirmation, and as a final step to try a wide range of payloads when manual techniques are failing.
|
||||
|
||||
## **Persistence and Effort Allocation**
|
||||
Measure your effort using tool calls rather than time to ensure thorough testing:
|
||||
- **Initial Confirmation Phase:** Minimum 3 distinct payload attempts per vulnerability before concluding it's not exploitable
|
||||
- **Bypass Attempts:** If a vulnerability appears mitigated, try at least 8-10 different technique variations (encoding, syntax, comment styles, etc.) before concluding it's properly defended
|
||||
- **Escalation Trigger:** If manual testing exceeds 10-12 tool calls without progress on a single vulnerability, escalate to Task Agent scripting
|
||||
- **Escalation Trigger:** If manual testing exceeds 10-12 tool calls without progress on a single vulnerability, escalate to automated tools (`sqlmap`) or Task Agent scripting
|
||||
- **Termination Criteria:** After systematic attempts with multiple different techniques → classify as appropriate level
|
||||
|
||||
## **Using the Task Agent for Custom Scripting**
|
||||
You must delegate every injection automation task to the Task Agent. Use manual `curl` runs for spot checks, then escalate to scripted payload loops handled by the Task Agent.
|
||||
You must delegate every injection automation task to the Task Agent. Use manual `curl` or `sqlmap` runs for spot checks, then escalate to scripted payload loops handled by the Task Agent.
|
||||
|
||||
**TEMPLATE FOR SCRIPTING TASKS (REQUIRED):**
|
||||
"
|
||||
|
||||
@@ -92,8 +92,6 @@ After exhaustive bypass attempts, determine:
|
||||
{{LOGIN_INSTRUCTIONS}}
|
||||
</login_instructions>
|
||||
|
||||
@include(shared/_rules-of-engagement.txt)
|
||||
|
||||
<starting_context>
|
||||
- Your **actionable exploitation queue** is located at `.shannon/deliverables/ssrf_exploitation_queue.json`. This is your to-do list, containing the precise targets identified by the analysis phase. You MUST process every vulnerability in this queue.
|
||||
|
||||
|
||||
@@ -90,8 +90,6 @@ After exhaustive bypass attempts, determine:
|
||||
{{LOGIN_INSTRUCTIONS}}
|
||||
</login_instructions>
|
||||
|
||||
@include(shared/_rules-of-engagement.txt)
|
||||
|
||||
<starting_context>
|
||||
- Your **actionable exploitation queue** is located at `.shannon/deliverables/xss_exploitation_queue.json`. This is your to-do list, containing the precise targets identified by the analysis phase. You MUST process every vulnerability in this queue.
|
||||
|
||||
|
||||
@@ -31,20 +31,12 @@ Read `.gitignore` and run `git ls-files --others --ignored --exclude-standard --
|
||||
|
||||
{{DESCRIPTION}}
|
||||
|
||||
<scope>
|
||||
Downstream vulnerability analysis will cover these classes: {{VULN_CLASSES_TESTED}}.
|
||||
</scope>
|
||||
|
||||
@include(shared/_code-path-rules.txt)
|
||||
|
||||
@include(shared/_rules-of-engagement.txt)
|
||||
|
||||
<system_architecture>
|
||||
**PENTESTING WORKFLOW - YOUR POSITION:**
|
||||
|
||||
**Phase Sequence:** **PRE-RECON (You)** → RECON → VULN ANALYSIS (5 agents) → EXPLOITATION (5 agents) → REPORTING
|
||||
|
||||
**Your Input:** Target application source code in the working directory
|
||||
**Your Input:** External scan results from pre-recon tools (nmap, subfinder, whatweb)
|
||||
**Your Output:** `.shannon/deliverables/pre_recon_deliverable.md` (feeds all subsequent analysis phases)
|
||||
**Shared Intelligence:** You create the foundational intelligence baseline that all other agents depend on
|
||||
|
||||
@@ -79,6 +71,7 @@ You are the **Code Intelligence Gatherer** and **Architectural Foundation Builde
|
||||
|
||||
<starting_context>
|
||||
- You are the **ENTRY POINT** of the comprehensive security assessment - no prior deliverables exist to read
|
||||
- External reconnaissance tools have completed and their results are available in the working environment
|
||||
- The target application source code has been cloned and is ready for analysis in the current directory
|
||||
- You must create the **foundational intelligence baseline** that all subsequent agents depend on
|
||||
- **CRITICAL:** This is the ONLY agent with full source code access - your completeness determines whether vulnerabilities are found
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
<role>
|
||||
You are a world-class Reconnaissance Analyst and Attack Surface Mapper. Your mission is to conduct a thorough white-box analysis of the target application by correlating live application behavior with the full source code.
|
||||
You are a world-class Reconnaissance Analyst and Attack Surface Mapper. Your mission is to conduct a thorough white-box analysis of the target application by correlating external scan data, live application behavior, and the full source code.
|
||||
</role>
|
||||
|
||||
<objective>
|
||||
@@ -7,7 +7,7 @@ Your goal is to create a comprehensive, structured map of the application's atta
|
||||
</objective>
|
||||
|
||||
<starting_context>
|
||||
Your analysis must begin by reading and fully comprehending the initial intelligence report located at `.shannon/deliverables/pre_recon_deliverable.md`. This file contains the initial source code analysis. This is your only starting information.
|
||||
Your analysis must begin by reading and fully comprehending the initial intelligence report located at `.shannon/deliverables/pre_recon_deliverable.md`. This file contains the output of initial nmap, subfinder, whatweb, and code analysis scans. This is your only starting information.
|
||||
</starting_context>
|
||||
|
||||
<target>
|
||||
@@ -20,10 +20,6 @@ Filesystem:
|
||||
- {{REPO_PATH}}/.shannon/scratchpad/ (read-write) - screenshots, scripts, scratch work, etc.
|
||||
</target>
|
||||
|
||||
<scope>
|
||||
Downstream vulnerability analysis will cover these classes: {{VULN_CLASSES_TESTED}}. Map only what supports these classes.
|
||||
</scope>
|
||||
|
||||
<rules>
|
||||
Rules to Avoid:
|
||||
{{RULES_AVOID}}
|
||||
@@ -32,14 +28,10 @@ Areas to Focus On:
|
||||
{{RULES_FOCUS}}
|
||||
</rules>
|
||||
|
||||
@include(shared/_code-path-rules.txt)
|
||||
|
||||
<login_instructions>
|
||||
{{LOGIN_INSTRUCTIONS}}
|
||||
</login_instructions>
|
||||
|
||||
@include(shared/_rules-of-engagement.txt)
|
||||
|
||||
<scope_boundaries>
|
||||
# Penetration Test Scope & Boundaries
|
||||
|
||||
@@ -88,13 +80,13 @@ Please use these tools for the following use cases:
|
||||
|
||||
**Phase Sequence:** PRE-RECON (Complete) → **RECONNAISSANCE (You)** → VULN ANALYSIS (5 agents) → EXPLOITATION (5 agents) → FINAL REPORT (next phase)
|
||||
|
||||
**Your Input:** `.shannon/deliverables/pre_recon_deliverable.md` (initial code analysis)
|
||||
**Your Input:** `.shannon/deliverables/pre_recon_deliverable.md` (external scan data, initial code analysis)
|
||||
**Your Output:** `.shannon/deliverables/recon_deliverable.md` (comprehensive attack surface map)
|
||||
**Shared Intelligence:** None (you are the first analysis specialist)
|
||||
|
||||
**WHAT HAPPENED BEFORE YOU:**
|
||||
- Pre-reconnaissance agent performed initial source code analysis
|
||||
- Attack surfaces, technologies, and entry points were catalogued from the codebase
|
||||
- Pre-reconnaissance agent performed external scans (nmap, subfinder, whatweb) and initial code analysis
|
||||
- All attack surfaces, technologies, and entry points were catalogued from external perspective
|
||||
|
||||
**WHAT HAPPENS AFTER YOU:**
|
||||
- Injection Analysis specialist will analyze SQL injection and command injection vulnerabilities using your attack surface map
|
||||
@@ -120,7 +112,7 @@ You must follow this methodical four-step process:
|
||||
|
||||
1. **Synthesize Initial Data:**
|
||||
- Read the entire `.shannon/deliverables/pre_recon_deliverable.md`.
|
||||
- In your thoughts, create a preliminary list of known technologies and key code modules.
|
||||
- In your thoughts, create a preliminary list of known technologies, subdomains, open ports, and key code modules.
|
||||
|
||||
2. **Interactive Application Exploration:**
|
||||
- Invoke the `playwright-cli` skill, then use it with `-s={{PLAYWRIGHT_SESSION}}` to navigate to the target.
|
||||
@@ -174,6 +166,8 @@ A brief overview of the application's purpose, core technology stack (e.g., Next
|
||||
- **Frontend:** [Framework, key libraries, authentication libraries]
|
||||
- **Backend:** [Language, framework, key dependencies]
|
||||
- **Infrastructure:** [Hosting provider, CDN, database type]
|
||||
- **Identified Subdomains:** [List from subfinder and any others discovered]
|
||||
- **Open Ports & Services:** [List from nmap and their purpose]
|
||||
|
||||
## 3. Authentication & Session Management Flow
|
||||
- **Entry Points:** [e.g., /login, /register, /auth/sso]
|
||||
|
||||
@@ -9,11 +9,11 @@ Technical leadership (CTOs, CISOs, Engineering VPs) who need both technical accu
|
||||
</audience>
|
||||
|
||||
<objective>
|
||||
The orchestrator has already concatenated all per-class deliverables into `comprehensive_security_assessment_report.md`. Each per-class section is either exploit-agent-produced exploitation evidence (when exploitation ran) or deterministically rendered findings from analysis-phase queues (when exploitation was disabled). The cleanup rules below apply uniformly to either source.
|
||||
The orchestrator has already concatenated all specialist reports into `comprehensive_security_assessment_report.md`.
|
||||
Your task is to:
|
||||
1. Read this existing concatenated report
|
||||
2. Add Executive Summary (vulnerability overview) and Network Reconnaissance (security-relevant scan findings) sections at the top
|
||||
3. Clean up ALL per-class report sections by removing extraneous content
|
||||
3. Clean up ALL exploitation evidence sections by removing hallucinated content
|
||||
4. Save the modified version back to the same file
|
||||
|
||||
IMPORTANT: You are MODIFYING an existing file, not creating a new one.
|
||||
@@ -32,13 +32,8 @@ Filesystem:
|
||||
<context>
|
||||
Authentication Context:
|
||||
{{AUTH_CONTEXT}}
|
||||
|
||||
Vulnerability classes tested in this run: {{VULN_CLASSES_TESTED}}
|
||||
Exploitation: {{EXPLOITATION}}
|
||||
</context>
|
||||
|
||||
{{REPORT_FILTERS_BLOCK}}
|
||||
|
||||
<input_files>
|
||||
You will analyze the following deliverable files:
|
||||
- `.shannon/deliverables/pre_recon_deliverable.md` - Initial reconnaissance and technology stack
|
||||
@@ -56,14 +51,26 @@ MODIFY the existing file `.shannon/deliverables/comprehensive_security_assessmen
|
||||
## Executive Summary
|
||||
- Target: {{WEB_URL}}
|
||||
- Assessment Date: {current date}
|
||||
- Scope: {{VULN_CLASSES_TESTED}}
|
||||
- Exploitation: {{EXPLOITATION}}
|
||||
- Scope: Authentication, XSS, SQL and Command Injection, SSRF, Authorization testing
|
||||
|
||||
## Summary by Vulnerability Type
|
||||
|
||||
{For each vulnerability type below, examine the comprehensive_security_assessment_report.md sections and provide a summary. If no section exists for that type or no vulnerabilities are listed, explicitly state that none were found.}
|
||||
|
||||
{{VULN_SUMMARY_SUBSECTIONS}}
|
||||
**Authentication Vulnerabilities:**
|
||||
{Check for "Authentication Exploitation Evidence" section. Include actually exploited vulnerabilities and those blocked by security controls. Exclude theoretical vulnerabilities requiring internal network access. If vulnerabilities exist, summarize their impact and severity. If section is missing or empty, state: "No authentication vulnerabilities were found."}
|
||||
|
||||
**Authorization Vulnerabilities:**
|
||||
{Check for "Authorization Exploitation Evidence" section. Include actually exploited vulnerabilities and those blocked by security controls. Exclude theoretical vulnerabilities requiring internal network access. If vulnerabilities exist, summarize their impact and severity. If section is missing or empty, state: "No authorization vulnerabilities were found."}
|
||||
|
||||
**Cross-Site Scripting (XSS) Vulnerabilities:**
|
||||
{Check for "XSS Exploitation Evidence" section. Include actually exploited vulnerabilities and those blocked by security controls. Exclude theoretical vulnerabilities requiring internal network access. If vulnerabilities exist, summarize their impact and severity. If section is missing or empty, state: "No XSS vulnerabilities were found."}
|
||||
|
||||
**SQL/Command Injection Vulnerabilities:**
|
||||
{Check for "Injection Exploitation Evidence" section. Include actually exploited vulnerabilities and those blocked by security controls. Exclude theoretical vulnerabilities requiring internal network access. If vulnerabilities exist, summarize their impact and severity. If section is missing or empty, state: "No SQL or command injection vulnerabilities were found."}
|
||||
|
||||
**Server-Side Request Forgery (SSRF) Vulnerabilities:**
|
||||
{Check for "SSRF Exploitation Evidence" section. Include actually exploited vulnerabilities and those blocked by security controls. Exclude theoretical vulnerabilities requiring internal network access. If vulnerabilities exist, summarize their impact and severity. If section is missing or empty, state: "No SSRF vulnerabilities were found."}
|
||||
|
||||
## Network Reconnaissance
|
||||
{Extract security-relevant findings from automated scanning tools:
|
||||
@@ -73,32 +80,34 @@ MODIFY the existing file `.shannon/deliverables/comprehensive_security_assessmen
|
||||
- Any other security-relevant findings from the automated tools
|
||||
SKIP stack details - technical leaders know their infrastructure}
|
||||
|
||||
2. KEEPING the existing per-class report sections but CLEANING them according to the rules below
|
||||
2. KEEPING the existing exploitation evidence sections but CLEANING them according to the rules below
|
||||
|
||||
3. The final structure should be:
|
||||
- Executive Summary (new)
|
||||
- Network Reconnaissance (new)
|
||||
- All existing per-class report sections (cleaned)
|
||||
- All existing exploitation evidence sections (cleaned)
|
||||
|
||||
IMPORTANT: Do NOT reorder the existing per-class report sections. Maintain the exact order they appear in the concatenated report. Only remove sections that do not match the defined criteria above.
|
||||
IMPORTANT: Do NOT reorder the existing exploitation evidence sections. Maintain the exact order they appear in the concatenated report. Only remove sections that do not match the defined criteria above.
|
||||
|
||||
</deliverable_instructions>
|
||||
|
||||
<instructions>
|
||||
1. Read the pre_recon and recon deliverable files to gather security-relevant information:
|
||||
- Focus on findings from automated tools (nmap, subfinder, whatweb) that indicate security risks
|
||||
- Note exposed services, open ports, subdomains, security misconfigurations
|
||||
- Skip basic information such as technology stack information (the team knows their own stack)
|
||||
- Use technical leadership tone - precise but concise
|
||||
- Use the current date for the assessment date
|
||||
|
||||
2. Create the Executive Summary content:
|
||||
2. Create the Executive Summary and Network Reconnaissance content:
|
||||
- Executive Summary: Technical overview with actionable findings for engineering leaders
|
||||
- Network Reconnaissance: Focus on security-relevant discoveries from automated scans
|
||||
|
||||
3. Clean the per-class report sections in `.shannon/deliverables/comprehensive_security_assessment_report.md` by applying these rules:
|
||||
3. Clean the exploitation evidence sections from `.shannon/deliverables/comprehensive_security_assessment_report.md` by applying these rules:
|
||||
- KEEP these specific section headings:
|
||||
NOTE: these sections will contain vulnerability lists with IDs matching pattern `### [TYPE]-VULN-[NUMBER]`
|
||||
* `# [Type] {{REPORT_VULN_HEADING}}`
|
||||
* `## {{REPORT_VULN_SUBHEADING}}`
|
||||
{{REPORT_FILTER_RULES}}
|
||||
* `# [Type] Exploitation Evidence`
|
||||
* `## Successfully Exploited Vulnerabilities`
|
||||
- REMOVE ANY OTHER SECTIONS (even if they contain vulnerability IDs), such as:
|
||||
* `## Potential Vulnerabilities (Validation Blocked)` (All agents)
|
||||
* Standalone "Recommendations" sections
|
||||
@@ -110,11 +119,11 @@ IMPORTANT: Do NOT reorder the existing per-class report sections. Maintain the e
|
||||
* False positives sections
|
||||
* any intros in the sections
|
||||
* any counts in the sections
|
||||
- Preserve exact vulnerability IDs (`### [TYPE]-VULN-NN:`); if the title after the colon is only a short category label rather than a descriptive phrase, rewrite it to a concise human-readable descriptor derived from the finding's Vulnerable location and Overview.
|
||||
- Preserve exact vulnerability IDs and formatting
|
||||
|
||||
4. Combine the content:
|
||||
- Place the Executive Summary and Network Reconnaissance sections at the top
|
||||
- Follow with the cleaned per-class report sections
|
||||
- Follow with the cleaned exploitation evidence sections
|
||||
- Save as the modified `.shannon/deliverables/comprehensive_security_assessment_report.md`
|
||||
|
||||
CRITICAL: You are modifying the existing concatenated report at `.shannon/deliverables/comprehensive_security_assessment_report.md` IN-PLACE, not creating a separate file.
|
||||
|
||||
@@ -1,13 +0,0 @@
|
||||
<code_path_rules>
|
||||
Source-code routing. Each rule is tagged `[FILE]` (literal path) or `[GLOB]` (pattern). All paths are repository-relative.
|
||||
|
||||
How to apply (focus rules):
|
||||
- For `[FILE]` entries — delegate analysis to the Task tool.
|
||||
- For `[GLOB]` entries — invoke the Glob tool to enumerate matches, then delegate analysis of every match to the Task tool.
|
||||
|
||||
Avoid — out of scope. Skip entirely; the tool layer will block any access attempts.
|
||||
{{CODE_RULES_AVOID}}
|
||||
|
||||
Focus — priority work assignments. Analyze every entry.
|
||||
{{CODE_RULES_FOCUS}}
|
||||
</code_path_rules>
|
||||
@@ -1,3 +0,0 @@
|
||||
<rules_of_engagement>
|
||||
{{RULES_OF_ENGAGEMENT}}
|
||||
</rules_of_engagement>
|
||||
@@ -20,14 +20,10 @@ Success criterion: A complete, code-backed analysis of every potential authentic
|
||||
@include(shared/_rules.txt)
|
||||
</rules>
|
||||
|
||||
@include(shared/_code-path-rules.txt)
|
||||
|
||||
<login_instructions>
|
||||
{{LOGIN_INSTRUCTIONS}}
|
||||
</login_instructions>
|
||||
|
||||
@include(shared/_rules-of-engagement.txt)
|
||||
|
||||
<critical>
|
||||
**Your Professional Standard**
|
||||
- **Severity Context:** A flaw in authentication is a catastrophic failure. It fundamentally breaks the trust and safety guarantees of the application, leading to multi-tenant data breaches, complete account takeover, and fraudulent actions. These are the most critical vulnerabilities you can find.
|
||||
|
||||
@@ -19,14 +19,10 @@ Success criterion: A complete, code-backed analysis of every potential authoriza
|
||||
@include(shared/_rules.txt)
|
||||
</rules>
|
||||
|
||||
@include(shared/_code-path-rules.txt)
|
||||
|
||||
<login_instructions>
|
||||
{{LOGIN_INSTRUCTIONS}}
|
||||
</login_instructions>
|
||||
|
||||
@include(shared/_rules-of-engagement.txt)
|
||||
|
||||
<critical>
|
||||
**Your Professional Standard**
|
||||
- **Severity Context:** A flaw in authorization is a critical security failure. It allows privilege escalation, horizontal and vertical access control bypass, and unauthorized data access. These vulnerabilities enable attackers to access resources they shouldn't, escalate privileges, and compromise multi-tenant data isolation.
|
||||
|
||||
@@ -20,14 +20,10 @@ Success criterion: Complete source-to-sink traces detailing path, sanitizers, si
|
||||
@include(shared/_rules.txt)
|
||||
</rules>
|
||||
|
||||
@include(shared/_code-path-rules.txt)
|
||||
|
||||
<login_instructions>
|
||||
{{LOGIN_INSTRUCTIONS}}
|
||||
</login_instructions>
|
||||
|
||||
@include(shared/_rules-of-engagement.txt)
|
||||
|
||||
<critical>
|
||||
**Your Professional Standard**
|
||||
- **Severity Context:** A structural flaw in a backend command, whether an SQL query or a shell command, is one of the most severe classes of vulnerability in a SaaS application. It undermines the foundational trust of the system by creating the *potential* for data exposure (SQLi) or direct server compromise (Command Injection).
|
||||
|
||||
@@ -19,14 +19,10 @@ Success criterion: A complete source-to-sink trace for every identified SSRF vul
|
||||
@include(shared/_rules.txt)
|
||||
</rules>
|
||||
|
||||
@include(shared/_code-path-rules.txt)
|
||||
|
||||
<login_instructions>
|
||||
{{LOGIN_INSTRUCTIONS}}
|
||||
</login_instructions>
|
||||
|
||||
@include(shared/_rules-of-engagement.txt)
|
||||
|
||||
<critical>
|
||||
**Your Professional Standard**
|
||||
- **Severity Context:** An SSRF flaw is a breach of network segmentation and trust boundaries. It allows attackers to leverage the application server as a proxy to access internal services, cloud metadata endpoints, or perform reconnaissance of internal networks. These vulnerabilities can lead to data exposure, privilege escalation, and complete network compromise.
|
||||
|
||||
@@ -19,14 +19,10 @@ Success criterion: Live confirmation of XSS execution for every vulnerability th
|
||||
@include(shared/_rules.txt)
|
||||
</rules>
|
||||
|
||||
@include(shared/_code-path-rules.txt)
|
||||
|
||||
<login_instructions>
|
||||
{{LOGIN_INSTRUCTIONS}}
|
||||
</login_instructions>
|
||||
|
||||
@include(shared/_rules-of-engagement.txt)
|
||||
|
||||
<critical>
|
||||
**Your Professional Standard**
|
||||
- Severity Context: An XSS flaw is a breach of trust between the user and the application. It grants an attacker the ability to execute arbitrary code within a victim's browser, leading to session hijacking, credential theft, and complete compromise of the user's account and data within the application.
|
||||
|
||||
@@ -18,7 +18,7 @@ import { formatTimestamp } from '../utils/formatting.js';
|
||||
import { Timer } from '../utils/metrics.js';
|
||||
import { createAuditLogger } from './audit-logger.js';
|
||||
import { dispatchMessage } from './message-handlers.js';
|
||||
import { type ModelTier, resolveModel, supportsAdaptiveThinking } from './models.js';
|
||||
import { type ModelTier, resolveModel } from './models.js';
|
||||
import { detectExecutionContext, formatCompletionMessage, formatErrorOutput } from './output-formatters.js';
|
||||
import { createProgressManager } from './progress-manager.js';
|
||||
|
||||
@@ -218,7 +218,6 @@ export async function runClaudePrompt(
|
||||
// 4. Configure SDK options
|
||||
// Model override from providerConfig takes precedence over env-based resolveModel
|
||||
const model = providerConfig?.modelOverrides?.[modelTier] ?? resolveModel(modelTier);
|
||||
const adaptiveThinking = supportsAdaptiveThinking(model) && process.env.CLAUDE_ADAPTIVE_THINKING !== 'false';
|
||||
const options = {
|
||||
model,
|
||||
maxTurns: 10_000,
|
||||
@@ -227,7 +226,6 @@ export async function runClaudePrompt(
|
||||
allowDangerouslySkipPermissions: true,
|
||||
settingSources: ['user'] as ('user' | 'project' | 'local')[],
|
||||
env: sdkEnv,
|
||||
...(adaptiveThinking && { thinking: { type: 'adaptive' as const } }),
|
||||
...(outputFormat && { outputFormat }),
|
||||
};
|
||||
|
||||
|
||||
@@ -39,10 +39,7 @@ function extractMessageContent(message: AssistantMessage): string {
|
||||
const messageContent = message.message;
|
||||
|
||||
if (Array.isArray(messageContent.content)) {
|
||||
return messageContent.content
|
||||
.filter((c: ContentBlock) => c.type !== 'thinking' && c.type !== 'redacted_thinking')
|
||||
.map((c: ContentBlock) => c.text || JSON.stringify(c))
|
||||
.join('\n');
|
||||
return messageContent.content.map((c: ContentBlock) => c.text || JSON.stringify(c)).join('\n');
|
||||
}
|
||||
|
||||
return String(messageContent.content);
|
||||
|
||||
@@ -21,7 +21,7 @@ export type ModelTier = 'small' | 'medium' | 'large';
|
||||
const DEFAULT_MODELS: Readonly<Record<ModelTier, string>> = {
|
||||
small: 'claude-haiku-4-5-20251001',
|
||||
medium: 'claude-sonnet-4-6',
|
||||
large: 'claude-opus-4-7',
|
||||
large: 'claude-opus-4-6',
|
||||
};
|
||||
|
||||
/** Resolve a model tier to a concrete model ID. */
|
||||
@@ -35,8 +35,3 @@ export function resolveModel(tier: ModelTier = 'medium'): string {
|
||||
return process.env.ANTHROPIC_MEDIUM_MODEL || DEFAULT_MODELS.medium;
|
||||
}
|
||||
}
|
||||
|
||||
/** Whether a model supports adaptive thinking. Opus 4.6 and 4.7 only. */
|
||||
export function supportsAdaptiveThinking(model: string): boolean {
|
||||
return /opus-4-[67]/.test(model);
|
||||
}
|
||||
|
||||
@@ -17,26 +17,15 @@ import type { AgentName } from '../types/agents.js';
|
||||
|
||||
// === Common Fields ===
|
||||
|
||||
const ANALYSIS_NOTES_DESCRIPTION = 'Plain context for defenders (caveats, scope, what is at risk). Not attack steps.';
|
||||
const baseVulnerability = z.object({
|
||||
ID: z.string(),
|
||||
vulnerability_type: z.string(),
|
||||
externally_exploitable: z.boolean(),
|
||||
confidence: z.string(),
|
||||
notes: z.string().optional(),
|
||||
});
|
||||
|
||||
function notesField(exploit: boolean) {
|
||||
const f = z.string().optional();
|
||||
return exploit ? f : f.describe(ANALYSIS_NOTES_DESCRIPTION);
|
||||
}
|
||||
|
||||
function makeBase(exploit: boolean) {
|
||||
return z.object({
|
||||
ID: z.string(),
|
||||
vulnerability_type: z.string(),
|
||||
externally_exploitable: z.boolean(),
|
||||
confidence: z.string(),
|
||||
notes: notesField(exploit),
|
||||
});
|
||||
}
|
||||
|
||||
// === Per-Vuln-Type Schemas (used for type inference; notes description is mode-agnostic for types) ===
|
||||
|
||||
const baseVulnerability = makeBase(true);
|
||||
// === Per-Vuln-Type Schemas ===
|
||||
|
||||
const InjectionVulnerability = baseVulnerability.extend({
|
||||
source: z.string().optional(),
|
||||
@@ -90,13 +79,13 @@ const AuthzVulnerability = baseVulnerability.extend({
|
||||
minimal_witness: z.string().optional(),
|
||||
});
|
||||
|
||||
// === Inferred Entry Types (consumed by renderer) ===
|
||||
// === Queue Wrapper Schemas ===
|
||||
|
||||
export type InjectionFinding = z.infer<typeof InjectionVulnerability>;
|
||||
export type XssFinding = z.infer<typeof XssVulnerability>;
|
||||
export type AuthFinding = z.infer<typeof AuthVulnerability>;
|
||||
export type SsrfFinding = z.infer<typeof SsrfVulnerability>;
|
||||
export type AuthzFinding = z.infer<typeof AuthzVulnerability>;
|
||||
const InjectionQueueSchema = z.object({ vulnerabilities: z.array(InjectionVulnerability) });
|
||||
const XssQueueSchema = z.object({ vulnerabilities: z.array(XssVulnerability) });
|
||||
const AuthQueueSchema = z.object({ vulnerabilities: z.array(AuthVulnerability) });
|
||||
const SsrfQueueSchema = z.object({ vulnerabilities: z.array(SsrfVulnerability) });
|
||||
const AuthzQueueSchema = z.object({ vulnerabilities: z.array(AuthzVulnerability) });
|
||||
|
||||
// === Convert to JSON Schema for SDK ===
|
||||
|
||||
@@ -106,95 +95,15 @@ function toOutputFormat(zodSchema: z.ZodType): JsonSchemaOutputFormat {
|
||||
return { type: 'json_schema', schema: z.toJSONSchema(zodSchema, { target: 'draft-07' }) as Record<string, unknown> };
|
||||
}
|
||||
|
||||
// === Per-Mode Output Format Builders ===
|
||||
// Two maps cached at module load; the only per-mode difference is the
|
||||
// description on the `notes` field, which steers the LLM's writing.
|
||||
// === Lookup Maps ===
|
||||
|
||||
function buildOutputFormats(exploit: boolean): Partial<Record<AgentName, JsonSchemaOutputFormat>> {
|
||||
const base = makeBase(exploit);
|
||||
return {
|
||||
'injection-vuln': toOutputFormat(
|
||||
z.object({
|
||||
vulnerabilities: z.array(
|
||||
base.extend({
|
||||
source: z.string().optional(),
|
||||
combined_sources: z.string().optional(),
|
||||
path: z.string().optional(),
|
||||
sink_call: z.string().optional(),
|
||||
slot_type: z.string().optional(),
|
||||
sanitization_observed: z.string().optional(),
|
||||
concat_occurrences: z.string().optional(),
|
||||
verdict: z.string().optional(),
|
||||
mismatch_reason: z.string().optional(),
|
||||
witness_payload: z.string().optional(),
|
||||
}),
|
||||
),
|
||||
}),
|
||||
),
|
||||
'xss-vuln': toOutputFormat(
|
||||
z.object({
|
||||
vulnerabilities: z.array(
|
||||
base.extend({
|
||||
source: z.string().optional(),
|
||||
source_detail: z.string().optional(),
|
||||
path: z.string().optional(),
|
||||
sink_function: z.string().optional(),
|
||||
render_context: z.string().optional(),
|
||||
encoding_observed: z.string().optional(),
|
||||
verdict: z.string().optional(),
|
||||
mismatch_reason: z.string().optional(),
|
||||
witness_payload: z.string().optional(),
|
||||
}),
|
||||
),
|
||||
}),
|
||||
),
|
||||
'auth-vuln': toOutputFormat(
|
||||
z.object({
|
||||
vulnerabilities: z.array(
|
||||
base.extend({
|
||||
source_endpoint: z.string().optional(),
|
||||
vulnerable_code_location: z.string().optional(),
|
||||
missing_defense: z.string().optional(),
|
||||
exploitation_hypothesis: z.string().optional(),
|
||||
suggested_exploit_technique: z.string().optional(),
|
||||
}),
|
||||
),
|
||||
}),
|
||||
),
|
||||
'ssrf-vuln': toOutputFormat(
|
||||
z.object({
|
||||
vulnerabilities: z.array(
|
||||
base.extend({
|
||||
source_endpoint: z.string().optional(),
|
||||
vulnerable_parameter: z.string().optional(),
|
||||
vulnerable_code_location: z.string().optional(),
|
||||
missing_defense: z.string().optional(),
|
||||
exploitation_hypothesis: z.string().optional(),
|
||||
suggested_exploit_technique: z.string().optional(),
|
||||
}),
|
||||
),
|
||||
}),
|
||||
),
|
||||
'authz-vuln': toOutputFormat(
|
||||
z.object({
|
||||
vulnerabilities: z.array(
|
||||
base.extend({
|
||||
endpoint: z.string().optional(),
|
||||
vulnerable_code_location: z.string().optional(),
|
||||
role_context: z.string().optional(),
|
||||
guard_evidence: z.string().optional(),
|
||||
side_effect: z.string().optional(),
|
||||
reason: z.string().optional(),
|
||||
minimal_witness: z.string().optional(),
|
||||
}),
|
||||
),
|
||||
}),
|
||||
),
|
||||
};
|
||||
}
|
||||
|
||||
const OUTPUT_FORMATS_EXPLOIT = buildOutputFormats(true);
|
||||
const OUTPUT_FORMATS_ANALYSIS = buildOutputFormats(false);
|
||||
const VULN_AGENT_OUTPUT_FORMAT: Partial<Record<AgentName, JsonSchemaOutputFormat>> = {
|
||||
'injection-vuln': toOutputFormat(InjectionQueueSchema),
|
||||
'xss-vuln': toOutputFormat(XssQueueSchema),
|
||||
'auth-vuln': toOutputFormat(AuthQueueSchema),
|
||||
'ssrf-vuln': toOutputFormat(SsrfQueueSchema),
|
||||
'authz-vuln': toOutputFormat(AuthzQueueSchema),
|
||||
};
|
||||
|
||||
const VULN_AGENT_QUEUE_FILENAMES: Partial<Record<AgentName, string>> = {
|
||||
'injection-vuln': 'injection_exploitation_queue.json',
|
||||
@@ -205,8 +114,8 @@ const VULN_AGENT_QUEUE_FILENAMES: Partial<Record<AgentName, string>> = {
|
||||
};
|
||||
|
||||
/** Returns the structured output format for a vuln agent, or undefined for non-vuln agents. */
|
||||
export function getOutputFormat(agentName: AgentName, exploit = true): JsonSchemaOutputFormat | undefined {
|
||||
return (exploit ? OUTPUT_FORMATS_EXPLOIT : OUTPUT_FORMATS_ANALYSIS)[agentName];
|
||||
export function getOutputFormat(agentName: AgentName): JsonSchemaOutputFormat | undefined {
|
||||
return VULN_AGENT_OUTPUT_FORMAT[agentName];
|
||||
}
|
||||
|
||||
/** Returns the queue filename for a vuln agent, or undefined for non-vuln agents. */
|
||||
|
||||
@@ -1,41 +0,0 @@
|
||||
// Copyright (C) 2025 Keygraph, Inc.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License version 3
|
||||
// as published by the Free Software Foundation.
|
||||
|
||||
/**
|
||||
* Writes ~/.claude/settings.json with permissions.deny rules derived from
|
||||
* `code_path` avoid patterns. The SDK reads this via `settingSources: ['user']`;
|
||||
* deny rules fire even in `bypassPermissions` mode.
|
||||
*/
|
||||
|
||||
import os from 'node:os';
|
||||
import { fs, path } from 'zx';
|
||||
import type { DistributedConfig } from '../types/config.js';
|
||||
|
||||
const FILE_TOOLS = ['Read', 'Edit'] as const;
|
||||
|
||||
function denyEntriesFor(pattern: string): string[] {
|
||||
const arg = `./${pattern.replace(/^[./]+/, '')}`;
|
||||
return FILE_TOOLS.map((tool) => `${tool}(${arg})`);
|
||||
}
|
||||
|
||||
export async function writeUserSettingsForCodePathAvoids(config: DistributedConfig | null): Promise<void> {
|
||||
const avoidPatterns = (config?.avoid ?? []).filter((r) => r.type === 'code_path').map((r) => r.value);
|
||||
const settingsPath = path.join(os.homedir(), '.claude', 'settings.json');
|
||||
|
||||
if (avoidPatterns.length === 0) {
|
||||
await fs.remove(settingsPath);
|
||||
return;
|
||||
}
|
||||
|
||||
const settings = {
|
||||
permissions: {
|
||||
deny: avoidPatterns.flatMap(denyEntriesFor),
|
||||
},
|
||||
};
|
||||
|
||||
await fs.ensureDir(path.dirname(settingsPath));
|
||||
await fs.writeJson(settingsPath, settings, { spaces: 2 });
|
||||
}
|
||||
@@ -52,8 +52,6 @@ export interface ToolResultData {
|
||||
export interface ContentBlock {
|
||||
type?: string;
|
||||
text?: string;
|
||||
thinking?: string;
|
||||
data?: string;
|
||||
}
|
||||
|
||||
export interface AssistantMessage {
|
||||
|
||||
@@ -10,13 +10,7 @@ import type { FormatsPlugin } from 'ajv-formats';
|
||||
import yaml from 'js-yaml';
|
||||
import { fs } from 'zx';
|
||||
import { PentestError } from './services/error-handling.js';
|
||||
import {
|
||||
ALL_VULN_CLASSES,
|
||||
type Authentication,
|
||||
type Config,
|
||||
type DistributedConfig,
|
||||
type Rule,
|
||||
} from './types/config.js';
|
||||
import type { Authentication, Config, DistributedConfig, Rule } from './types/config.js';
|
||||
import { ErrorCode } from './types/errors.js';
|
||||
|
||||
// Handle ESM/CJS interop for ajv-formats using require
|
||||
@@ -312,39 +306,6 @@ export const parseConfigYAML = (yamlContent: string): Config => {
|
||||
return config as Config;
|
||||
};
|
||||
|
||||
function checkDeprecatedFields(config: Config): void {
|
||||
const messages: string[] = [];
|
||||
|
||||
const checkRules = (rules: unknown, where: string): void => {
|
||||
if (!Array.isArray(rules)) return;
|
||||
rules.forEach((rule, idx) => {
|
||||
if (typeof rule !== 'object' || rule === null) return;
|
||||
const r = rule as Record<string, unknown>;
|
||||
if (r.type === 'path') {
|
||||
messages.push(`rules.${where}[${idx}].type: 'path' has been renamed to 'url_path'.`);
|
||||
}
|
||||
if ('url_path' in r && !('value' in r)) {
|
||||
messages.push(`rules.${where}[${idx}]: the rule field 'url_path' has been renamed to 'value'.`);
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
const raw = config as Record<string, unknown>;
|
||||
const rules = raw.rules as { avoid?: unknown; focus?: unknown } | undefined;
|
||||
checkRules(rules?.avoid, 'avoid');
|
||||
checkRules(rules?.focus, 'focus');
|
||||
|
||||
if (messages.length > 0) {
|
||||
throw new PentestError(
|
||||
`Configuration uses deprecated fields. Please update:\n - ${messages.join('\n - ')}`,
|
||||
'config',
|
||||
false,
|
||||
{ deprecatedFields: messages },
|
||||
ErrorCode.CONFIG_VALIDATION_FAILED,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
const validateConfig = (config: Config): void => {
|
||||
if (!config || typeof config !== 'object') {
|
||||
throw new PentestError(
|
||||
@@ -366,8 +327,6 @@ const validateConfig = (config: Config): void => {
|
||||
);
|
||||
}
|
||||
|
||||
checkDeprecatedFields(config);
|
||||
|
||||
const isValid = validateSchema(config);
|
||||
if (!isValid) {
|
||||
const errors = validateSchema.errors || [];
|
||||
@@ -383,16 +342,10 @@ const validateConfig = (config: Config): void => {
|
||||
|
||||
performSecurityValidation(config);
|
||||
|
||||
const hasAnySteering =
|
||||
!!config.rules ||
|
||||
!!config.authentication ||
|
||||
!!config.description ||
|
||||
!!config.vuln_classes ||
|
||||
config.exploit !== undefined ||
|
||||
!!config.report ||
|
||||
!!config.rules_of_engagement;
|
||||
if (!hasAnySteering) {
|
||||
console.warn('⚠️ Configuration file contains no steering fields. The pentest will run with all defaults.');
|
||||
if (!config.rules && !config.authentication && !config.description) {
|
||||
console.warn(
|
||||
'⚠️ Configuration file contains no rules, authentication, or description. The pentest will run without any scoping restrictions or login capabilities.',
|
||||
);
|
||||
} else if (config.rules && !config.rules.avoid && !config.rules.focus) {
|
||||
console.warn('⚠️ Configuration file contains no rules. The pentest will run without any scoping restrictions.');
|
||||
}
|
||||
@@ -479,34 +432,6 @@ const performSecurityValidation = (config: Config): void => {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (config.rules_of_engagement) {
|
||||
for (const pattern of DANGEROUS_PATTERNS) {
|
||||
if (pattern.test(config.rules_of_engagement)) {
|
||||
throw new PentestError(
|
||||
`rules_of_engagement contains potentially dangerous pattern: ${pattern.source}`,
|
||||
'config',
|
||||
false,
|
||||
{ field: 'rules_of_engagement', pattern: pattern.source },
|
||||
ErrorCode.CONFIG_VALIDATION_FAILED,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (config.report?.guidance) {
|
||||
for (const pattern of DANGEROUS_PATTERNS) {
|
||||
if (pattern.test(config.report.guidance)) {
|
||||
throw new PentestError(
|
||||
`report.guidance contains potentially dangerous pattern: ${pattern.source}`,
|
||||
'config',
|
||||
false,
|
||||
{ field: 'report.guidance', pattern: pattern.source },
|
||||
ErrorCode.CONFIG_VALIDATION_FAILED,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const validateRulesSecurity = (rules: Rule[] | undefined, ruleType: string): void => {
|
||||
@@ -514,12 +439,12 @@ const validateRulesSecurity = (rules: Rule[] | undefined, ruleType: string): voi
|
||||
|
||||
rules.forEach((rule, index) => {
|
||||
for (const pattern of DANGEROUS_PATTERNS) {
|
||||
if (pattern.test(rule.value)) {
|
||||
if (pattern.test(rule.url_path)) {
|
||||
throw new PentestError(
|
||||
`rules.${ruleType}[${index}].value contains potentially dangerous pattern: ${pattern.source}`,
|
||||
`rules.${ruleType}[${index}].url_path contains potentially dangerous pattern: ${pattern.source}`,
|
||||
'config',
|
||||
false,
|
||||
{ field: `rules.${ruleType}[${index}].value`, pattern: pattern.source },
|
||||
{ field: `rules.${ruleType}[${index}].url_path`, pattern: pattern.source },
|
||||
ErrorCode.CONFIG_VALIDATION_FAILED,
|
||||
);
|
||||
}
|
||||
@@ -539,25 +464,13 @@ const validateRulesSecurity = (rules: Rule[] | undefined, ruleType: string): voi
|
||||
};
|
||||
|
||||
const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number): void => {
|
||||
const field = `rules.${ruleType}[${index}].value`;
|
||||
const field = `rules.${ruleType}[${index}].url_path`;
|
||||
|
||||
switch (rule.type) {
|
||||
case 'url_path':
|
||||
if (!rule.value.startsWith('/')) {
|
||||
case 'path':
|
||||
if (!rule.url_path.startsWith('/')) {
|
||||
throw new PentestError(
|
||||
`${field} for type 'url_path' must start with '/'`,
|
||||
'config',
|
||||
false,
|
||||
{ field, ruleType: rule.type },
|
||||
ErrorCode.CONFIG_VALIDATION_FAILED,
|
||||
);
|
||||
}
|
||||
break;
|
||||
|
||||
case 'code_path':
|
||||
if (rule.value.includes('://')) {
|
||||
throw new PentestError(
|
||||
`${field} for type 'code_path' must not contain a URL protocol (got '${rule.value}')`,
|
||||
`${field} for type 'path' must start with '/'`,
|
||||
'config',
|
||||
false,
|
||||
{ field, ruleType: rule.type },
|
||||
@@ -569,7 +482,7 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number):
|
||||
case 'subdomain':
|
||||
case 'domain':
|
||||
// Basic domain validation - no slashes allowed
|
||||
if (rule.value.includes('/')) {
|
||||
if (rule.url_path.includes('/')) {
|
||||
throw new PentestError(
|
||||
`${field} for type '${rule.type}' cannot contain '/' characters`,
|
||||
'config',
|
||||
@@ -579,7 +492,7 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number):
|
||||
);
|
||||
}
|
||||
// Must contain at least one dot for domains
|
||||
if (rule.type === 'domain' && !rule.value.includes('.')) {
|
||||
if (rule.type === 'domain' && !rule.url_path.includes('.')) {
|
||||
throw new PentestError(
|
||||
`${field} for type 'domain' must be a valid domain name`,
|
||||
'config',
|
||||
@@ -592,7 +505,7 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number):
|
||||
|
||||
case 'method': {
|
||||
const allowedMethods = ['GET', 'POST', 'PUT', 'DELETE', 'PATCH', 'HEAD', 'OPTIONS'];
|
||||
if (!allowedMethods.includes(rule.value.toUpperCase())) {
|
||||
if (!allowedMethods.includes(rule.url_path.toUpperCase())) {
|
||||
throw new PentestError(
|
||||
`${field} for type 'method' must be one of: ${allowedMethods.join(', ')}`,
|
||||
'config',
|
||||
@@ -605,7 +518,7 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number):
|
||||
}
|
||||
|
||||
case 'header':
|
||||
if (!rule.value.match(/^[a-zA-Z0-9\-_]+$/)) {
|
||||
if (!rule.url_path.match(/^[a-zA-Z0-9\-_]+$/)) {
|
||||
throw new PentestError(
|
||||
`${field} for type 'header' must be a valid header name (alphanumeric, hyphens, underscores only)`,
|
||||
'config',
|
||||
@@ -617,7 +530,7 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number):
|
||||
break;
|
||||
|
||||
case 'parameter':
|
||||
if (!rule.value.match(/^[a-zA-Z0-9\-_]+$/)) {
|
||||
if (!rule.url_path.match(/^[a-zA-Z0-9\-_]+$/)) {
|
||||
throw new PentestError(
|
||||
`${field} for type 'parameter' must be a valid parameter name (alphanumeric, hyphens, underscores only)`,
|
||||
'config',
|
||||
@@ -633,13 +546,13 @@ const validateRuleTypeSpecific = (rule: Rule, ruleType: string, index: number):
|
||||
const checkForDuplicates = (rules: Rule[], ruleType: string): void => {
|
||||
const seen = new Set<string>();
|
||||
rules.forEach((rule, index) => {
|
||||
const key = `${rule.type}:${rule.value}`;
|
||||
const key = `${rule.type}:${rule.url_path}`;
|
||||
if (seen.has(key)) {
|
||||
throw new PentestError(
|
||||
`Duplicate rule found in rules.${ruleType}[${index}]: ${rule.type} '${rule.value}'`,
|
||||
`Duplicate rule found in rules.${ruleType}[${index}]: ${rule.type} '${rule.url_path}'`,
|
||||
'config',
|
||||
false,
|
||||
{ field: `rules.${ruleType}[${index}]`, ruleType: rule.type, value: rule.value },
|
||||
{ field: `rules.${ruleType}[${index}]`, ruleType: rule.type, urlPath: rule.url_path },
|
||||
ErrorCode.CONFIG_VALIDATION_FAILED,
|
||||
);
|
||||
}
|
||||
@@ -648,16 +561,16 @@ const checkForDuplicates = (rules: Rule[], ruleType: string): void => {
|
||||
};
|
||||
|
||||
const checkForConflicts = (avoidRules: Rule[] = [], focusRules: Rule[] = []): void => {
|
||||
const avoidSet = new Set(avoidRules.map((rule) => `${rule.type}:${rule.value}`));
|
||||
const avoidSet = new Set(avoidRules.map((rule) => `${rule.type}:${rule.url_path}`));
|
||||
|
||||
focusRules.forEach((rule, index) => {
|
||||
const key = `${rule.type}:${rule.value}`;
|
||||
const key = `${rule.type}:${rule.url_path}`;
|
||||
if (avoidSet.has(key)) {
|
||||
throw new PentestError(
|
||||
`Conflicting rule found: rules.focus[${index}] '${rule.value}' also exists in rules.avoid`,
|
||||
`Conflicting rule found: rules.focus[${index}] '${rule.url_path}' also exists in rules.avoid`,
|
||||
'config',
|
||||
false,
|
||||
{ field: `rules.focus[${index}]`, value: rule.value },
|
||||
{ field: `rules.focus[${index}]`, urlPath: rule.url_path },
|
||||
ErrorCode.CONFIG_VALIDATION_FAILED,
|
||||
);
|
||||
}
|
||||
@@ -668,7 +581,7 @@ const sanitizeRule = (rule: Rule): Rule => {
|
||||
return {
|
||||
description: rule.description.trim(),
|
||||
type: rule.type.toLowerCase().trim() as Rule['type'],
|
||||
value: rule.value.trim(),
|
||||
url_path: rule.url_path.trim(),
|
||||
};
|
||||
};
|
||||
|
||||
@@ -678,28 +591,11 @@ export const distributeConfig = (config: Config | null): DistributedConfig => {
|
||||
const authentication = config?.authentication || null;
|
||||
const description = config?.description?.trim() || '';
|
||||
|
||||
const vuln_classes =
|
||||
config?.vuln_classes && config.vuln_classes.length > 0 ? [...config.vuln_classes] : [...ALL_VULN_CLASSES];
|
||||
|
||||
const exploit = config?.exploit !== undefined ? config.exploit === 'true' : true;
|
||||
|
||||
const report = {
|
||||
...(config?.report?.min_severity && { min_severity: config.report.min_severity }),
|
||||
...(config?.report?.min_confidence && { min_confidence: config.report.min_confidence }),
|
||||
...(config?.report?.guidance && { guidance: config.report.guidance.trim() }),
|
||||
};
|
||||
|
||||
const rules_of_engagement = config?.rules_of_engagement?.trim() ?? '';
|
||||
|
||||
return {
|
||||
avoid: avoid.map(sanitizeRule),
|
||||
focus: focus.map(sanitizeRule),
|
||||
authentication: authentication ? sanitizeAuthentication(authentication) : null,
|
||||
description,
|
||||
vuln_classes,
|
||||
exploit,
|
||||
report,
|
||||
rules_of_engagement,
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
@@ -82,26 +82,6 @@ function generateTOTP(secret: string, timeStep: number = 30, digits: number = 6)
|
||||
return generateHOTP(secret, counter, digits);
|
||||
}
|
||||
|
||||
// === Help ===
|
||||
|
||||
function printHelp(): void {
|
||||
console.log(
|
||||
`generate-totp - emit a current 6-digit TOTP code for a base32-encoded secret.
|
||||
|
||||
Usage:
|
||||
generate-totp --secret <BASE32>
|
||||
generate-totp --help
|
||||
|
||||
Options:
|
||||
--secret Base32-encoded TOTP shared secret (characters A-Z, 2-7).
|
||||
-h, --help Show this help and exit.
|
||||
|
||||
Output:
|
||||
JSON to stdout. On success: {"status":"success","totpCode":"123456","expiresIn":<sec>}.
|
||||
On error: {"status":"error","message":"...","retryable":false} (exit 1).`,
|
||||
);
|
||||
}
|
||||
|
||||
// === Argument Parsing ===
|
||||
|
||||
function parseSecret(argv: string[]): string {
|
||||
@@ -117,11 +97,6 @@ function parseSecret(argv: string[]): string {
|
||||
// === Main ===
|
||||
|
||||
function main(): void {
|
||||
if (process.argv.includes('--help') || process.argv.includes('-h')) {
|
||||
printHelp();
|
||||
return;
|
||||
}
|
||||
|
||||
const secret = parseSecret(process.argv);
|
||||
|
||||
if (!secret) {
|
||||
|
||||
@@ -19,31 +19,6 @@ import { mkdirSync, readFileSync, writeFileSync } from 'node:fs';
|
||||
import { join, resolve } from 'node:path';
|
||||
import { DELIVERABLE_FILENAMES, type DeliverableType } from '../types/deliverables.js';
|
||||
|
||||
// === Help ===
|
||||
|
||||
function printHelp(): void {
|
||||
const types = Object.keys(DELIVERABLE_FILENAMES).join(', ');
|
||||
console.log(
|
||||
`save-deliverable - save a Shannon pentest deliverable under its canonical filename.
|
||||
|
||||
Usage:
|
||||
save-deliverable --type <TYPE> --file-path <path>
|
||||
save-deliverable --type <TYPE> --content '<text>'
|
||||
save-deliverable --help
|
||||
|
||||
Options:
|
||||
--type Deliverable type (required). One of:
|
||||
${types}
|
||||
--file-path Path of a file whose contents to save (preferred for large content).
|
||||
--content Inline content string to save.
|
||||
-h, --help Show this help and exit.
|
||||
|
||||
Output:
|
||||
JSON to stdout. On success: {"status":"success","filepath":"..."}.
|
||||
On error: {"status":"error","message":"...","retryable":true|false} (exit 1).`,
|
||||
);
|
||||
}
|
||||
|
||||
// === Argument Parsing ===
|
||||
|
||||
interface ParsedArgs {
|
||||
@@ -94,11 +69,6 @@ function saveDeliverableFile(targetDir: string, filename: string, content: strin
|
||||
// === Main ===
|
||||
|
||||
function main(): void {
|
||||
if (process.argv.includes('--help') || process.argv.includes('-h')) {
|
||||
printHelp();
|
||||
return;
|
||||
}
|
||||
|
||||
const args = parseArgs(process.argv);
|
||||
|
||||
// 1. Validate --type
|
||||
|
||||
@@ -161,7 +161,7 @@ export class AgentExecutionService {
|
||||
await auditSession.startAgent(agentName, prompt, attemptNumber);
|
||||
|
||||
// 5. Execute agent
|
||||
const outputFormat = getOutputFormat(agentName, distributedConfig?.exploit ?? true);
|
||||
const outputFormat = getOutputFormat(agentName);
|
||||
const result: ClaudePromptResult = await runClaudePrompt(
|
||||
prompt,
|
||||
repoPath,
|
||||
|
||||
@@ -1,239 +0,0 @@
|
||||
// Copyright (C) 2025 Keygraph, Inc.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License version 3
|
||||
// as published by the Free Software Foundation.
|
||||
|
||||
/**
|
||||
* Deterministic queue-JSON to findings-MD renderer.
|
||||
*
|
||||
* Used when exploit=false: the exploit agents didn't run, so there is no
|
||||
* `*_exploitation_evidence.md` to concatenate into the report. This module
|
||||
* reads each `*_exploitation_queue.json` (already SDK-validated against the
|
||||
* schemas in ../ai/queue-schemas.ts) and writes a `*_findings.md` per class
|
||||
* in the canonical body shape that report-executive.txt's cleanup expects.
|
||||
*
|
||||
* No LLM in the loop — every field maps directly from a JSON key.
|
||||
*/
|
||||
|
||||
import { fs, path } from 'zx';
|
||||
import type { AuthFinding, AuthzFinding, InjectionFinding, SsrfFinding, XssFinding } from '../ai/queue-schemas.js';
|
||||
import { deliverablesDir } from '../paths.js';
|
||||
import type { ActivityLogger } from '../types/activity-logger.js';
|
||||
import type { VulnClass } from '../types/config.js';
|
||||
|
||||
const DISCLAIMER = [
|
||||
'> Exploitation phase was not run for this assessment. Each entry documents a',
|
||||
'> vulnerability identified through static analysis; live exploitation steps and',
|
||||
'> proof of impact are not included.',
|
||||
].join('\n');
|
||||
|
||||
interface ClassConfig<T> {
|
||||
readonly heading: string;
|
||||
readonly noneFoundLabel: string;
|
||||
readonly queueFile: string;
|
||||
readonly findingsFile: string;
|
||||
readonly renderEntry: (entry: T) => string;
|
||||
}
|
||||
|
||||
interface QueueDocument<T> {
|
||||
vulnerabilities?: T[];
|
||||
}
|
||||
|
||||
// === Common Render Helpers ===
|
||||
|
||||
function summaryRow(label: string, value: string | undefined | null | boolean): string | null {
|
||||
if (value === undefined || value === null) return null;
|
||||
if (typeof value === 'string' && value.trim() === '') return null;
|
||||
return `- **${label}:** ${value}`;
|
||||
}
|
||||
|
||||
function formatLocation(endpoint: string | undefined, codeLocation: string | undefined): string {
|
||||
if (endpoint && codeLocation) return `${endpoint} (${codeLocation})`;
|
||||
return endpoint ?? codeLocation ?? '';
|
||||
}
|
||||
|
||||
function buildEntry(
|
||||
id: string,
|
||||
title: string,
|
||||
summaryRows: ReadonlyArray<string | null>,
|
||||
notes: string | undefined,
|
||||
): string {
|
||||
const lines: string[] = [];
|
||||
lines.push(`### ${id}: ${title}`);
|
||||
lines.push('');
|
||||
lines.push('**Summary:**');
|
||||
for (const row of summaryRows) {
|
||||
if (row !== null) lines.push(row);
|
||||
}
|
||||
lines.push('');
|
||||
if (notes && notes.trim() !== '') {
|
||||
lines.push(`**Notes:** ${notes.trim()}`);
|
||||
}
|
||||
return lines.join('\n').trimEnd();
|
||||
}
|
||||
|
||||
// === Per-Class Renderers ===
|
||||
|
||||
function renderAuthEntry(e: AuthFinding): string {
|
||||
return buildEntry(
|
||||
e.ID,
|
||||
e.vulnerability_type,
|
||||
[
|
||||
summaryRow('Vulnerable location', formatLocation(e.source_endpoint, e.vulnerable_code_location)),
|
||||
summaryRow('Overview', e.missing_defense),
|
||||
summaryRow('Impact', e.exploitation_hypothesis),
|
||||
],
|
||||
e.notes,
|
||||
);
|
||||
}
|
||||
|
||||
function renderSsrfEntry(e: SsrfFinding): string {
|
||||
return buildEntry(
|
||||
e.ID,
|
||||
e.vulnerability_type,
|
||||
[
|
||||
summaryRow('Vulnerable location', formatLocation(e.source_endpoint, e.vulnerable_code_location)),
|
||||
summaryRow('Overview', e.missing_defense),
|
||||
summaryRow('Impact', e.exploitation_hypothesis),
|
||||
],
|
||||
e.notes,
|
||||
);
|
||||
}
|
||||
|
||||
function renderAuthzEntry(e: AuthzFinding): string {
|
||||
return buildEntry(
|
||||
e.ID,
|
||||
e.vulnerability_type,
|
||||
[
|
||||
summaryRow('Vulnerable location', formatLocation(e.endpoint, e.vulnerable_code_location)),
|
||||
summaryRow('Overview', e.guard_evidence),
|
||||
summaryRow('Impact', e.side_effect),
|
||||
],
|
||||
e.notes,
|
||||
);
|
||||
}
|
||||
|
||||
function renderInjectionEntry(e: InjectionFinding): string {
|
||||
const location = e.path && e.sink_call ? `${e.sink_call} (path: ${e.path})` : (e.sink_call ?? e.path);
|
||||
return buildEntry(
|
||||
e.ID,
|
||||
e.vulnerability_type,
|
||||
[summaryRow('Vulnerable location', location), summaryRow('Overview', e.mismatch_reason)],
|
||||
e.notes,
|
||||
);
|
||||
}
|
||||
|
||||
function renderXssEntry(e: XssFinding): string {
|
||||
const location = e.path && e.sink_function ? `${e.sink_function} (path: ${e.path})` : (e.sink_function ?? e.path);
|
||||
return buildEntry(
|
||||
e.ID,
|
||||
e.vulnerability_type,
|
||||
[summaryRow('Vulnerable location', location), summaryRow('Overview', e.mismatch_reason)],
|
||||
e.notes,
|
||||
);
|
||||
}
|
||||
|
||||
// === Class Registry ===
|
||||
|
||||
const CLASSES: Record<VulnClass, ClassConfig<unknown>> = {
|
||||
auth: {
|
||||
heading: 'Authentication',
|
||||
noneFoundLabel: 'authentication',
|
||||
queueFile: 'auth_exploitation_queue.json',
|
||||
findingsFile: 'auth_findings.md',
|
||||
renderEntry: (e) => renderAuthEntry(e as AuthFinding),
|
||||
},
|
||||
authz: {
|
||||
heading: 'Authorization',
|
||||
noneFoundLabel: 'authorization',
|
||||
queueFile: 'authz_exploitation_queue.json',
|
||||
findingsFile: 'authz_findings.md',
|
||||
renderEntry: (e) => renderAuthzEntry(e as AuthzFinding),
|
||||
},
|
||||
injection: {
|
||||
heading: 'Injection',
|
||||
noneFoundLabel: 'injection',
|
||||
queueFile: 'injection_exploitation_queue.json',
|
||||
findingsFile: 'injection_findings.md',
|
||||
renderEntry: (e) => renderInjectionEntry(e as InjectionFinding),
|
||||
},
|
||||
xss: {
|
||||
heading: 'XSS',
|
||||
noneFoundLabel: 'XSS',
|
||||
queueFile: 'xss_exploitation_queue.json',
|
||||
findingsFile: 'xss_findings.md',
|
||||
renderEntry: (e) => renderXssEntry(e as XssFinding),
|
||||
},
|
||||
ssrf: {
|
||||
heading: 'SSRF',
|
||||
noneFoundLabel: 'SSRF',
|
||||
queueFile: 'ssrf_exploitation_queue.json',
|
||||
findingsFile: 'ssrf_findings.md',
|
||||
renderEntry: (e) => renderSsrfEntry(e as SsrfFinding),
|
||||
},
|
||||
};
|
||||
|
||||
// === Class File Assembly ===
|
||||
|
||||
function renderClassFile(config: ClassConfig<unknown>, entries: readonly unknown[]): string {
|
||||
const sections: string[] = [];
|
||||
sections.push(`# ${config.heading} Findings`);
|
||||
sections.push('');
|
||||
sections.push(DISCLAIMER);
|
||||
sections.push('');
|
||||
sections.push('## Identified Vulnerabilities');
|
||||
sections.push('');
|
||||
if (entries.length === 0) {
|
||||
sections.push(`No ${config.noneFoundLabel} vulnerabilities were identified.`);
|
||||
sections.push('');
|
||||
} else {
|
||||
for (const entry of entries) {
|
||||
sections.push(config.renderEntry(entry));
|
||||
sections.push('');
|
||||
}
|
||||
}
|
||||
return `${sections.join('\n').trimEnd()}\n`;
|
||||
}
|
||||
|
||||
// === Public Entry Point ===
|
||||
|
||||
/**
|
||||
* Render `*_findings.md` per class from each `*_exploitation_queue.json`.
|
||||
*
|
||||
* Idempotent: skips classes whose findings file already exists, or whose queue
|
||||
* is missing (class out of scope this run). Per-class failures are logged and
|
||||
* other classes still proceed.
|
||||
*/
|
||||
export async function renderFindingsFromQueues(
|
||||
sourceDir: string,
|
||||
deliverablesSubdir: string | undefined,
|
||||
logger: ActivityLogger,
|
||||
): Promise<void> {
|
||||
const dir = deliverablesDir(sourceDir, deliverablesSubdir);
|
||||
|
||||
for (const config of Object.values(CLASSES)) {
|
||||
const queuePath = path.join(dir, config.queueFile);
|
||||
const findingsPath = path.join(dir, config.findingsFile);
|
||||
|
||||
if (await fs.pathExists(findingsPath)) {
|
||||
logger.info(`${config.heading}: ${config.findingsFile} already exists, skipping`);
|
||||
continue;
|
||||
}
|
||||
if (!(await fs.pathExists(queuePath))) {
|
||||
logger.info(`${config.heading}: no queue file (class out of scope), skipping`);
|
||||
continue;
|
||||
}
|
||||
|
||||
try {
|
||||
const doc = (await fs.readJson(queuePath)) as QueueDocument<unknown>;
|
||||
const entries = doc.vulnerabilities ?? [];
|
||||
const markdown = renderClassFile(config, entries);
|
||||
await fs.writeFile(findingsPath, markdown);
|
||||
logger.info(`${config.heading}: rendered ${entries.length} finding(s) to ${config.findingsFile}`);
|
||||
} catch (error) {
|
||||
const err = error as Error;
|
||||
logger.warn(`${config.heading}: failed to render findings from ${config.queueFile}: ${err.message}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -14,9 +14,8 @@
|
||||
* Checks run sequentially, cheapest first:
|
||||
* 1. Repository path exists and contains .git
|
||||
* 2. Config file parses and validates (if provided)
|
||||
* 3. code_path rules match real entries in the repo (filesystem only)
|
||||
* 4. Credentials validate via Claude Agent SDK query (API key, OAuth, Bedrock, or Vertex AI)
|
||||
* 5. Target URL is reachable from the container (DNS + HTTP)
|
||||
* 3. Credentials validate via Claude Agent SDK query (API key, OAuth, Bedrock, or Vertex AI)
|
||||
* 4. Target URL is reachable from the container (DNS + HTTP)
|
||||
*/
|
||||
|
||||
import { lookup } from 'node:dns/promises';
|
||||
@@ -25,11 +24,9 @@ import http from 'node:http';
|
||||
import https from 'node:https';
|
||||
import type { SDKAssistantMessageError } from '@anthropic-ai/claude-agent-sdk';
|
||||
import { query } from '@anthropic-ai/claude-agent-sdk';
|
||||
import { glob } from 'zx';
|
||||
import { resolveModel } from '../ai/models.js';
|
||||
import { parseConfig } from '../config-parser.js';
|
||||
import type { ActivityLogger } from '../types/activity-logger.js';
|
||||
import type { Config, Rule } from '../types/config.js';
|
||||
import { ErrorCode } from '../types/errors.js';
|
||||
import { err, ok, type Result } from '../types/result.js';
|
||||
import { isRetryableError, PentestError } from './error-handling.js';
|
||||
@@ -111,13 +108,13 @@ async function validateRepo(
|
||||
|
||||
// === Config Validation ===
|
||||
|
||||
async function validateConfig(configPath: string, logger: ActivityLogger): Promise<Result<Config, PentestError>> {
|
||||
async function validateConfig(configPath: string, logger: ActivityLogger): Promise<Result<void, PentestError>> {
|
||||
logger.info('Validating configuration file...', { configPath });
|
||||
|
||||
try {
|
||||
const config = await parseConfig(configPath);
|
||||
await parseConfig(configPath);
|
||||
logger.info('Configuration file OK');
|
||||
return ok(config);
|
||||
return ok(undefined);
|
||||
} catch (error) {
|
||||
if (error instanceof PentestError) {
|
||||
return err(error);
|
||||
@@ -135,73 +132,6 @@ async function validateConfig(configPath: string, logger: ActivityLogger): Promi
|
||||
}
|
||||
}
|
||||
|
||||
// === code_path Existence Validation ===
|
||||
|
||||
const CODE_PATH_IGNORE = ['.git/**', '.shannon/**'];
|
||||
|
||||
async function patternMatchesAny(repoPath: string, pattern: string): Promise<boolean> {
|
||||
const stream = glob.globbyStream(pattern, {
|
||||
cwd: repoPath,
|
||||
dot: true,
|
||||
onlyFiles: false,
|
||||
followSymbolicLinks: false,
|
||||
ignore: CODE_PATH_IGNORE,
|
||||
});
|
||||
for await (const _ of stream) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
type RuleKind = 'avoid' | 'focus';
|
||||
interface MissingCodePath {
|
||||
kind: RuleKind;
|
||||
value: string;
|
||||
description: string;
|
||||
}
|
||||
|
||||
async function validateCodePathsExist(
|
||||
config: Config,
|
||||
repoPath: string,
|
||||
logger: ActivityLogger,
|
||||
): Promise<Result<void, PentestError>> {
|
||||
const tagged: Array<{ kind: RuleKind; rule: Rule }> = [
|
||||
...(config.rules?.avoid ?? []).map((rule) => ({ kind: 'avoid' as const, rule })),
|
||||
...(config.rules?.focus ?? []).map((rule) => ({ kind: 'focus' as const, rule })),
|
||||
].filter(({ rule }) => rule.type === 'code_path');
|
||||
|
||||
if (tagged.length === 0) {
|
||||
return ok(undefined);
|
||||
}
|
||||
|
||||
logger.info(`Validating ${tagged.length} code_path rule(s) against repo...`);
|
||||
|
||||
// ≥1 match is the only property enforced — malformed globs simply match nothing.
|
||||
const missing: MissingCodePath[] = [];
|
||||
for (const { kind, rule } of tagged) {
|
||||
if (!(await patternMatchesAny(repoPath, rule.value))) {
|
||||
missing.push({ kind, value: rule.value, description: rule.description });
|
||||
}
|
||||
}
|
||||
|
||||
if (missing.length > 0) {
|
||||
const lines = missing.map((m) => `[${m.kind}] '${m.value}' — ${m.description}`);
|
||||
return err(
|
||||
new PentestError(
|
||||
`code_path rules don't match any file or directory in the repo:\n - ${lines.join('\n - ')}\n` +
|
||||
`Fix the patterns or remove the rules.`,
|
||||
'config',
|
||||
false,
|
||||
{ missing },
|
||||
ErrorCode.CONFIG_VALIDATION_FAILED,
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
logger.info('All code_path rules matched');
|
||||
return ok(undefined);
|
||||
}
|
||||
|
||||
// === Credential Validation ===
|
||||
|
||||
/** Map SDK error type to a human-readable preflight PentestError. */
|
||||
@@ -280,7 +210,7 @@ async function validateCredentials(
|
||||
// 1. Custom base URL — validate endpoint is reachable via SDK query
|
||||
if (process.env.ANTHROPIC_BASE_URL && process.env.ANTHROPIC_AUTH_TOKEN) {
|
||||
const baseUrl = process.env.ANTHROPIC_BASE_URL;
|
||||
logger.info('Validating custom base URL');
|
||||
logger.info(`Validating custom base URL: ${baseUrl}`);
|
||||
|
||||
try {
|
||||
for await (const message of query({ prompt: 'hi', options: { model: resolveModel('small'), maxTurns: 1 } })) {
|
||||
@@ -464,7 +394,7 @@ function httpHead(url: string, timeoutMs: number): Promise<number> {
|
||||
|
||||
/** Check that the target URL is reachable from inside the container. */
|
||||
async function validateTargetUrl(targetUrl: string, logger: ActivityLogger): Promise<Result<void, PentestError>> {
|
||||
logger.info('Checking target URL reachability...');
|
||||
logger.info('Checking target URL reachability...', { targetUrl });
|
||||
|
||||
// 1. Parse URL
|
||||
let parsed: URL;
|
||||
@@ -543,9 +473,8 @@ async function validateTargetUrl(targetUrl: string, logger: ActivityLogger): Pro
|
||||
*
|
||||
* 1. Repository path exists and contains .git
|
||||
* 2. Config file parses and validates (if configPath provided)
|
||||
* 3. code_path rules match at least one entry in the repo (skipped without config)
|
||||
* 4. Credentials validate (API key, OAuth, Bedrock, or Vertex AI)
|
||||
* 5. Target URL is reachable from the container
|
||||
* 3. Credentials validate (API key, OAuth, Bedrock, or Vertex AI)
|
||||
* 4. Target URL is reachable from the container
|
||||
*
|
||||
* Returns on first failure.
|
||||
*/
|
||||
@@ -565,31 +494,20 @@ export async function runPreflightChecks(
|
||||
}
|
||||
|
||||
// 2. Config check (free — filesystem + CPU)
|
||||
let parsedConfig: Config | null = null;
|
||||
if (configPath) {
|
||||
const configResult = await validateConfig(configPath, logger);
|
||||
if (!configResult.ok) {
|
||||
return configResult;
|
||||
}
|
||||
parsedConfig = configResult.value;
|
||||
}
|
||||
|
||||
// 3. code_path rules must match real entries in the repo (filesystem only).
|
||||
// Runs after both repo and config are valid, before any network round-trip.
|
||||
if (parsedConfig) {
|
||||
const codePathResult = await validateCodePathsExist(parsedConfig, repoPath, logger);
|
||||
if (!codePathResult.ok) {
|
||||
return codePathResult;
|
||||
}
|
||||
}
|
||||
|
||||
// 4. Credential check (cheap — 1 SDK round-trip, skipped when providerConfig present)
|
||||
// 3. Credential check (cheap — 1 SDK round-trip, skipped when providerConfig present)
|
||||
const credResult = await validateCredentials(logger, apiKey, providerConfig);
|
||||
if (!credResult.ok) {
|
||||
return credResult;
|
||||
}
|
||||
|
||||
// 5. Target URL reachability check (cheap — 1 HTTP round-trip)
|
||||
// 4. Target URL reachability check (cheap — 1 HTTP round-trip)
|
||||
const urlResult = await validateTargetUrl(targetUrl, logger);
|
||||
if (!urlResult.ok) {
|
||||
return urlResult;
|
||||
|
||||
@@ -8,113 +8,9 @@ import { fs, path } from 'zx';
|
||||
import { PROMPTS_DIR } from '../paths.js';
|
||||
import { PLAYWRIGHT_SESSION_MAPPING } from '../session-manager.js';
|
||||
import type { ActivityLogger } from '../types/activity-logger.js';
|
||||
import type { Authentication, DistributedConfig, ReportConfig, Rule, VulnClass } from '../types/config.js';
|
||||
import { isGlobPattern } from '../utils/glob.js';
|
||||
import type { Authentication, DistributedConfig } from '../types/config.js';
|
||||
import { handlePromptError, PentestError } from './error-handling.js';
|
||||
|
||||
function renderCodePathRules(rules: Rule[]): string {
|
||||
const filtered = rules.filter((r) => r.type === 'code_path');
|
||||
if (filtered.length === 0) return 'None';
|
||||
return filtered
|
||||
.map((r) => {
|
||||
const kind = isGlobPattern(r.value) ? '[GLOB]' : '[FILE]';
|
||||
return `- ${r.value} ${kind} — ${r.description}`;
|
||||
})
|
||||
.join('\n');
|
||||
}
|
||||
|
||||
interface VulnSummarySpec {
|
||||
readonly heading: string;
|
||||
readonly evidenceSection: string;
|
||||
readonly noneFoundLabel: string;
|
||||
}
|
||||
|
||||
const VULN_SUMMARY_SPECS: Record<VulnClass, VulnSummarySpec> = {
|
||||
auth: {
|
||||
heading: 'Authentication Vulnerabilities',
|
||||
evidenceSection: 'Authentication Exploitation Evidence',
|
||||
noneFoundLabel: 'authentication',
|
||||
},
|
||||
authz: {
|
||||
heading: 'Authorization Vulnerabilities',
|
||||
evidenceSection: 'Authorization Exploitation Evidence',
|
||||
noneFoundLabel: 'authorization',
|
||||
},
|
||||
xss: {
|
||||
heading: 'Cross-Site Scripting (XSS) Vulnerabilities',
|
||||
evidenceSection: 'XSS Exploitation Evidence',
|
||||
noneFoundLabel: 'XSS',
|
||||
},
|
||||
injection: {
|
||||
heading: 'SQL/Command Injection Vulnerabilities',
|
||||
evidenceSection: 'Injection Exploitation Evidence',
|
||||
noneFoundLabel: 'SQL or command injection',
|
||||
},
|
||||
ssrf: {
|
||||
heading: 'Server-Side Request Forgery (SSRF) Vulnerabilities',
|
||||
evidenceSection: 'SSRF Exploitation Evidence',
|
||||
noneFoundLabel: 'SSRF',
|
||||
},
|
||||
};
|
||||
|
||||
function renderVulnSummarySubsections(selected: readonly VulnClass[]): string {
|
||||
const classes = selected.length > 0 ? selected : (Object.keys(VULN_SUMMARY_SPECS) as VulnClass[]);
|
||||
return classes
|
||||
.map((cls) => {
|
||||
const spec = VULN_SUMMARY_SPECS[cls];
|
||||
return `**${spec.heading}:**\n{Check for "${spec.evidenceSection}" section. Include actually exploited vulnerabilities and those blocked by security controls. Exclude theoretical vulnerabilities requiring internal network access. If vulnerabilities exist, summarize their impact and severity. If section is missing or empty, state: "No ${spec.noneFoundLabel} vulnerabilities were found."}`;
|
||||
})
|
||||
.join('\n\n');
|
||||
}
|
||||
|
||||
/**
|
||||
* Renders the top-level <report_filters> block. Empty when no filters are set —
|
||||
* each filter is included only when the operator configured it, so the agent
|
||||
* never sees `none` placeholders or instructions for filters that don't apply.
|
||||
*/
|
||||
function renderReportFiltersBlock(report: ReportConfig | undefined): string {
|
||||
if (!report) return '';
|
||||
const guidance = report.guidance?.trim();
|
||||
if (!report.min_severity && !report.min_confidence && !guidance) return '';
|
||||
|
||||
const lines: string[] = [
|
||||
'<report_filters>',
|
||||
'The filters below are user-supplied and binding for this assessment. Honor each strictly when assembling the final report.',
|
||||
'',
|
||||
];
|
||||
if (report.min_severity) {
|
||||
lines.push(
|
||||
`- Minimum severity: ${report.min_severity} — keep only findings rated this severity or higher (scale: low < medium < high < critical).`,
|
||||
);
|
||||
}
|
||||
if (report.min_confidence) {
|
||||
lines.push(
|
||||
`- Minimum confidence: ${report.min_confidence} — keep only findings rated this confidence or higher (scale: low < medium < high).`,
|
||||
);
|
||||
}
|
||||
if (guidance) {
|
||||
lines.push('');
|
||||
lines.push('User guidance — apply throughout the report as binding directives for finding selection:');
|
||||
lines.push(guidance);
|
||||
}
|
||||
lines.push('</report_filters>');
|
||||
return lines.join('\n');
|
||||
}
|
||||
|
||||
/**
|
||||
* Renders the per-finding DROP rules used inside the cleanup step. Severity and
|
||||
* confidence inline as concrete thresholds; guidance is referenced by pointer
|
||||
* so the actual text only lives in <report_filters>, avoiding double-statement.
|
||||
*/
|
||||
function renderReportFilterRules(report: ReportConfig | undefined): string {
|
||||
const drops: string[] = [];
|
||||
if (report?.min_severity) drops.push(`* severity is below ${report.min_severity}`);
|
||||
if (report?.min_confidence) drops.push(`* confidence is below ${report.min_confidence}`);
|
||||
if (report?.guidance?.trim()) drops.push('* topic matches an exclusion in the user guidance');
|
||||
if (drops.length === 0) return '';
|
||||
return [' - DROP any `### [TYPE]-VULN-[NUMBER]` finding whose:', ...drops.map((d) => ` ${d}`)].join('\n');
|
||||
}
|
||||
|
||||
interface PromptVariables {
|
||||
webUrl: string;
|
||||
repoPath: string;
|
||||
@@ -279,63 +175,36 @@ async function interpolateVariables(
|
||||
.replace(/{{AUTH_CONTEXT}}/g, buildAuthContext(config))
|
||||
.replace(/{{DESCRIPTION}}/g, config?.description ? `Description: ${config.description}` : '');
|
||||
|
||||
const avoidUrlRules = config?.avoid?.filter((r) => r.type !== 'code_path') ?? [];
|
||||
const focusUrlRules = config?.focus?.filter((r) => r.type !== 'code_path') ?? [];
|
||||
if (avoidUrlRules.length === 0 && focusUrlRules.length === 0) {
|
||||
result = result.replace(/<rules>[\s\S]*?<\/rules>\s*/g, '');
|
||||
} else {
|
||||
const avoidStr = avoidUrlRules.length > 0 ? avoidUrlRules.map((r) => `- ${r.description}`).join('\n') : 'None';
|
||||
const focusStr = focusUrlRules.length > 0 ? focusUrlRules.map((r) => `- ${r.description}`).join('\n') : 'None';
|
||||
result = result.replace(/{{RULES_AVOID}}/g, avoidStr).replace(/{{RULES_FOCUS}}/g, focusStr);
|
||||
}
|
||||
if (config) {
|
||||
// Handle rules section - if both are empty, use cleaner messaging
|
||||
const hasAvoidRules = config.avoid && config.avoid.length > 0;
|
||||
const hasFocusRules = config.focus && config.focus.length > 0;
|
||||
|
||||
const avoidCodeRules = (config?.avoid ?? []).filter((r) => r.type === 'code_path');
|
||||
const focusCodeRules = (config?.focus ?? []).filter((r) => r.type === 'code_path');
|
||||
if (avoidCodeRules.length === 0 && focusCodeRules.length === 0) {
|
||||
result = result.replace(/<code_path_rules>[\s\S]*?<\/code_path_rules>\s*/g, '');
|
||||
} else {
|
||||
result = result
|
||||
.replace(/{{CODE_RULES_AVOID}}/g, renderCodePathRules(config?.avoid ?? []))
|
||||
.replace(/{{CODE_RULES_FOCUS}}/g, renderCodePathRules(config?.focus ?? []));
|
||||
}
|
||||
if (!hasAvoidRules && !hasFocusRules) {
|
||||
// Replace the entire rules section with a clean message
|
||||
const cleanRulesSection = '<rules>\nNo specific rules or focus areas provided for this test.\n</rules>';
|
||||
result = result.replace(/<rules>[\s\S]*?<\/rules>/g, cleanRulesSection);
|
||||
} else {
|
||||
const avoidRules = hasAvoidRules ? config.avoid?.map((r) => `- ${r.description}`).join('\n') : 'None';
|
||||
const focusRules = hasFocusRules ? config.focus?.map((r) => `- ${r.description}`).join('\n') : 'None';
|
||||
|
||||
const roe = config?.rules_of_engagement?.trim() ?? '';
|
||||
if (roe) {
|
||||
result = result.replace(/{{RULES_OF_ENGAGEMENT}}/g, roe);
|
||||
} else {
|
||||
result = result.replace(/<rules_of_engagement>[\s\S]*?<\/rules_of_engagement>\s*/g, '');
|
||||
}
|
||||
result = result.replace(/{{RULES_AVOID}}/g, avoidRules).replace(/{{RULES_FOCUS}}/g, focusRules);
|
||||
}
|
||||
|
||||
if (config?.authentication?.login_flow) {
|
||||
const loginInstructions = await buildLoginInstructions(config.authentication, logger, promptsBaseDir);
|
||||
result = result.replace(/{{LOGIN_INSTRUCTIONS}}/g, loginInstructions);
|
||||
// Extract and inject login instructions from config
|
||||
if (config.authentication?.login_flow) {
|
||||
const loginInstructions = await buildLoginInstructions(config.authentication, logger, promptsBaseDir);
|
||||
result = result.replace(/{{LOGIN_INSTRUCTIONS}}/g, loginInstructions);
|
||||
} else {
|
||||
result = result.replace(/{{LOGIN_INSTRUCTIONS}}/g, '');
|
||||
}
|
||||
} else {
|
||||
// Replace the entire rules section with a clean message when no config provided
|
||||
const cleanRulesSection = '<rules>\nNo specific rules or focus areas provided for this test.\n</rules>';
|
||||
result = result.replace(/<rules>[\s\S]*?<\/rules>/g, cleanRulesSection);
|
||||
result = result.replace(/{{LOGIN_INSTRUCTIONS}}/g, '');
|
||||
}
|
||||
|
||||
const vulnClasses = config?.vuln_classes ?? [];
|
||||
result = result.replace(
|
||||
/{{VULN_CLASSES_TESTED}}/g,
|
||||
vulnClasses.length > 0 ? vulnClasses.join(', ') : 'injection, xss, auth, authz, ssrf',
|
||||
);
|
||||
result = result.replace(/{{VULN_SUMMARY_SUBSECTIONS}}/g, renderVulnSummarySubsections(vulnClasses));
|
||||
|
||||
const exploitEnabled = config?.exploit ?? true;
|
||||
result = result
|
||||
.replace(/{{EXPLOITATION}}/g, exploitEnabled ? 'enabled' : 'disabled')
|
||||
.replace(/{{REPORT_VULN_HEADING}}/g, exploitEnabled ? 'Exploitation Evidence' : 'Findings')
|
||||
.replace(
|
||||
/{{REPORT_VULN_SUBHEADING}}/g,
|
||||
exploitEnabled ? 'Successfully Exploited Vulnerabilities' : 'Identified Vulnerabilities',
|
||||
);
|
||||
|
||||
result = result
|
||||
.replace(/{{REPORT_FILTERS_BLOCK}}/g, renderReportFiltersBlock(config?.report))
|
||||
.replace(/{{REPORT_FILTER_RULES}}/g, renderReportFilterRules(config?.report));
|
||||
|
||||
// Collapse runs of 3+ newlines (left behind by tag-strip and empty-fragment substitutions).
|
||||
result = result.replace(/\n{3,}/g, '\n\n');
|
||||
|
||||
// Validate that all placeholders have been replaced (excluding instructional text)
|
||||
const remainingPlaceholders = result.match(/\{\{[^}]+\}\}/g);
|
||||
if (remainingPlaceholders) {
|
||||
|
||||
@@ -12,66 +12,60 @@ import { PentestError } from './error-handling.js';
|
||||
|
||||
interface DeliverableFile {
|
||||
name: string;
|
||||
/** Candidate filenames in priority order. First one that exists wins. */
|
||||
paths: readonly string[];
|
||||
path: string;
|
||||
required: boolean;
|
||||
}
|
||||
|
||||
// Pure function: Assemble final report from specialist deliverables.
|
||||
// Per class, prefer the exploit-agent's evidence file; fall back to renderer-produced findings.
|
||||
// Both never coexist for a workspace because scope (exploit flag) is locked.
|
||||
// Pure function: Assemble final report from specialist deliverables
|
||||
export async function assembleFinalReport(
|
||||
sourceDir: string,
|
||||
deliverablesSubdir: string | undefined,
|
||||
logger: ActivityLogger,
|
||||
): Promise<string> {
|
||||
const deliverableFiles: readonly DeliverableFile[] = [
|
||||
{ name: 'Injection', paths: ['injection_exploitation_evidence.md', 'injection_findings.md'], required: false },
|
||||
{ name: 'XSS', paths: ['xss_exploitation_evidence.md', 'xss_findings.md'], required: false },
|
||||
{ name: 'Authentication', paths: ['auth_exploitation_evidence.md', 'auth_findings.md'], required: false },
|
||||
{ name: 'SSRF', paths: ['ssrf_exploitation_evidence.md', 'ssrf_findings.md'], required: false },
|
||||
{ name: 'Authorization', paths: ['authz_exploitation_evidence.md', 'authz_findings.md'], required: false },
|
||||
const deliverableFiles: DeliverableFile[] = [
|
||||
{ name: 'Injection', path: 'injection_exploitation_evidence.md', required: false },
|
||||
{ name: 'XSS', path: 'xss_exploitation_evidence.md', required: false },
|
||||
{ name: 'Authentication', path: 'auth_exploitation_evidence.md', required: false },
|
||||
{ name: 'SSRF', path: 'ssrf_exploitation_evidence.md', required: false },
|
||||
{ name: 'Authorization', path: 'authz_exploitation_evidence.md', required: false },
|
||||
];
|
||||
|
||||
const dir = deliverablesDir(sourceDir, deliverablesSubdir);
|
||||
const sections: string[] = [];
|
||||
|
||||
for (const file of deliverableFiles) {
|
||||
let added = false;
|
||||
for (const candidate of file.paths) {
|
||||
const filePath = path.join(dir, candidate);
|
||||
try {
|
||||
if (await fs.pathExists(filePath)) {
|
||||
const content = await fs.readFile(filePath, 'utf8');
|
||||
sections.push(content);
|
||||
logger.info(`Added ${file.name} section from ${candidate}`);
|
||||
added = true;
|
||||
break;
|
||||
}
|
||||
} catch (error) {
|
||||
const err = error as Error;
|
||||
logger.warn(`Could not read ${candidate}: ${err.message}`);
|
||||
}
|
||||
}
|
||||
if (!added) {
|
||||
if (file.required) {
|
||||
const filePath = path.join(deliverablesDir(sourceDir, deliverablesSubdir), file.path);
|
||||
try {
|
||||
if (await fs.pathExists(filePath)) {
|
||||
const content = await fs.readFile(filePath, 'utf8');
|
||||
sections.push(content);
|
||||
logger.info(`Added ${file.name} findings`);
|
||||
} else if (file.required) {
|
||||
throw new PentestError(
|
||||
`Required deliverable file not found: ${file.paths.join(' or ')}`,
|
||||
`Required deliverable file not found: ${file.path}`,
|
||||
'filesystem',
|
||||
false,
|
||||
{ deliverableFile: file.paths, sourceDir },
|
||||
{ deliverableFile: file.path, sourceDir },
|
||||
ErrorCode.DELIVERABLE_NOT_FOUND,
|
||||
);
|
||||
} else {
|
||||
logger.info(`No ${file.name} deliverable found`);
|
||||
}
|
||||
logger.info(`No ${file.name} deliverable found`);
|
||||
} catch (error) {
|
||||
if (file.required) {
|
||||
throw error;
|
||||
}
|
||||
const err = error as Error;
|
||||
logger.warn(`Could not read ${file.path}: ${err.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
const finalContent = sections.join('\n\n');
|
||||
const finalReportPath = path.join(dir, 'comprehensive_security_assessment_report.md');
|
||||
const outputDir = deliverablesDir(sourceDir, deliverablesSubdir);
|
||||
const finalReportPath = path.join(outputDir, 'comprehensive_security_assessment_report.md');
|
||||
|
||||
try {
|
||||
await fs.ensureDir(dir);
|
||||
// Ensure deliverables directory exists
|
||||
await fs.ensureDir(outputDir);
|
||||
await fs.writeFile(finalReportPath, finalContent);
|
||||
logger.info(`Final report assembled at ${finalReportPath}`);
|
||||
} catch (error) {
|
||||
|
||||
@@ -18,17 +18,15 @@
|
||||
import fs from 'node:fs/promises';
|
||||
import path from 'node:path';
|
||||
import { ApplicationFailure, Context, heartbeat } from '@temporalio/activity';
|
||||
import { writeUserSettingsForCodePathAvoids } from '../ai/settings-writer.js';
|
||||
import { AuditSession } from '../audit/index.js';
|
||||
import type { ResumeAttempt } from '../audit/metrics-tracker.js';
|
||||
import { generateSessionJsonPath, type SessionMetadata } from '../audit/utils.js';
|
||||
import type { SessionMetadata } from '../audit/utils.js';
|
||||
import type { WorkflowSummary } from '../audit/workflow-logger.js';
|
||||
import type { CheckpointContext } from '../interfaces/checkpoint-provider.js';
|
||||
import { DEFAULT_DELIVERABLES_SUBDIR, deliverablesDir } from '../paths.js';
|
||||
import { getContainer, getOrCreateContainer, removeContainer } from '../services/container.js';
|
||||
import { classifyErrorForTemporal, PentestError } from '../services/error-handling.js';
|
||||
import { ExploitationCheckerService } from '../services/exploitation-checker.js';
|
||||
import { renderFindingsFromQueues } from '../services/findings-renderer.js';
|
||||
import { executeGitCommandWithRetry } from '../services/git-manager.js';
|
||||
import { runPreflightChecks } from '../services/preflight.js';
|
||||
import type { ExploitationDecision, VulnType } from '../services/queue-validation.js';
|
||||
@@ -36,10 +34,10 @@ import { assembleFinalReport, injectModelIntoReport } from '../services/reportin
|
||||
import { AGENTS } from '../session-manager.js';
|
||||
import type { AgentName } from '../types/agents.js';
|
||||
import { ALL_AGENTS } from '../types/agents.js';
|
||||
import type { ContainerConfig, ProviderConfig, VulnClass } from '../types/config.js';
|
||||
import type { ContainerConfig, ProviderConfig } from '../types/config.js';
|
||||
import { ErrorCode } from '../types/errors.js';
|
||||
import { isErr } from '../types/result.js';
|
||||
import { atomicWrite, fileExists, readJson } from '../utils/file-io.js';
|
||||
import { fileExists, readJson } from '../utils/file-io.js';
|
||||
import { createActivityLogger } from './activity-logger.js';
|
||||
import type { AgentMetrics, PipelineState, ResumeState } from './shared.js';
|
||||
|
||||
@@ -401,50 +399,11 @@ export async function initDeliverableGit(input: ActivityInput): Promise<void> {
|
||||
}
|
||||
|
||||
/**
|
||||
* Sync code_path avoid rules into Claude's user-scope settings.json so the
|
||||
* SDK enforces them at the tool layer for every agent in this run.
|
||||
*
|
||||
* Runs once per workflow before any agent fires. Config is fixed for the
|
||||
* lifetime of the workflow, so writing once avoids the parallel-agent race
|
||||
* on the global ~/.claude/settings.json file.
|
||||
* Assemble the final report by concatenating exploitation evidence files.
|
||||
*/
|
||||
export async function syncCodePathDenyRules(input: ActivityInput): Promise<void> {
|
||||
const logger = createActivityLogger();
|
||||
const container = getOrCreateContainer(input.workflowId, buildSessionMetadata(input), buildContainerConfig(input));
|
||||
|
||||
const configResult = await container.configLoader.loadOptional(input.configPath, undefined, input.configYAML);
|
||||
if (isErr(configResult)) {
|
||||
logger.warn(`syncCodePathDenyRules: skipping (config load failed: ${configResult.error.message})`);
|
||||
return;
|
||||
}
|
||||
|
||||
const config = configResult.value;
|
||||
const denyCount = (config?.avoid ?? []).filter((r) => r.type === 'code_path').length;
|
||||
await writeUserSettingsForCodePathAvoids(config);
|
||||
logger.info(`Synced code_path deny rules to user settings (${denyCount} entries)`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Assemble the final report by concatenating per-class deliverables.
|
||||
*
|
||||
* Under exploit=true, each exploit agent has produced `*_exploitation_evidence.md`
|
||||
* directly. Under exploit=false, exploit agents didn't run; we deterministically
|
||||
* render `*_findings.md` from each `*_exploitation_queue.json` first, then assemble.
|
||||
*/
|
||||
export async function assembleReportActivity(input: ActivityInput, exploit: boolean): Promise<void> {
|
||||
export async function assembleReportActivity(input: ActivityInput): Promise<void> {
|
||||
const { repoPath, deliverablesSubdir } = input;
|
||||
const logger = createActivityLogger();
|
||||
|
||||
if (!exploit) {
|
||||
logger.info('Rendering per-class findings from analysis queues...');
|
||||
try {
|
||||
await renderFindingsFromQueues(repoPath, deliverablesSubdir, logger);
|
||||
} catch (error) {
|
||||
const err = error as Error;
|
||||
logger.warn(`Error rendering findings from queues: ${err.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
logger.info('Assembling deliverables from specialist agents...');
|
||||
try {
|
||||
await assembleFinalReport(repoPath, deliverablesSubdir, logger);
|
||||
@@ -488,11 +447,6 @@ export async function checkExploitationQueue(input: ActivityInput, vulnType: Vul
|
||||
return checker.checkQueue(vulnType, delivPath, logger);
|
||||
}
|
||||
|
||||
interface RunScope {
|
||||
vulnClasses: VulnClass[];
|
||||
exploit: boolean;
|
||||
}
|
||||
|
||||
interface SessionJson {
|
||||
session: {
|
||||
id: string;
|
||||
@@ -500,7 +454,6 @@ interface SessionJson {
|
||||
repoPath?: string;
|
||||
originalWorkflowId?: string;
|
||||
resumeAttempts?: ResumeAttempt[];
|
||||
scope?: RunScope;
|
||||
};
|
||||
metrics: {
|
||||
agents: Record<
|
||||
@@ -618,42 +571,6 @@ export async function loadResumeState(
|
||||
};
|
||||
}
|
||||
|
||||
/** First run records scope into session.json; resume runs throw if it differs. */
|
||||
export async function persistOrValidateRunScope(
|
||||
input: ActivityInput,
|
||||
vulnClasses: VulnClass[],
|
||||
exploit: boolean,
|
||||
): Promise<void> {
|
||||
const sessionMetadata = buildSessionMetadata(input);
|
||||
const auditSession = new AuditSession(sessionMetadata);
|
||||
await auditSession.initialize(input.workflowId);
|
||||
|
||||
const sessionPath = generateSessionJsonPath(sessionMetadata);
|
||||
const session = await readJson<SessionJson>(sessionPath);
|
||||
|
||||
if (session.session.scope) {
|
||||
const recorded = session.session.scope;
|
||||
const sameClasses =
|
||||
recorded.vulnClasses.length === vulnClasses.length &&
|
||||
recorded.vulnClasses.every((c) => vulnClasses.includes(c)) &&
|
||||
vulnClasses.every((c) => recorded.vulnClasses.includes(c));
|
||||
|
||||
if (!sameClasses || recorded.exploit !== exploit) {
|
||||
throw ApplicationFailure.nonRetryable(
|
||||
`Resume scope mismatch for workspace ${input.sessionId}.\n` +
|
||||
` Original: vuln_classes=[${recorded.vulnClasses.join(', ')}], exploit=${recorded.exploit}\n` +
|
||||
` Provided: vuln_classes=[${vulnClasses.join(', ')}], exploit=${exploit}\n` +
|
||||
`Resume requires the same scope as the original run. Start a new workspace if you want different scope.`,
|
||||
'ScopeMismatchError',
|
||||
);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
session.session.scope = { vulnClasses: [...vulnClasses], exploit };
|
||||
await atomicWrite(sessionPath, session);
|
||||
}
|
||||
|
||||
async function findLatestCommit(gitDir: string, commitHashes: string[]): Promise<string> {
|
||||
if (commitHashes.length === 1) {
|
||||
const hash = commitHashes[0];
|
||||
|
||||
@@ -2,7 +2,7 @@ import { defineQuery } from '@temporalio/workflow';
|
||||
|
||||
export type { AgentMetrics } from '../types/metrics.js';
|
||||
|
||||
import type { DistributedConfig, PipelineConfig, ProviderConfig, VulnClass } from '../types/config.js';
|
||||
import type { DistributedConfig, PipelineConfig, ProviderConfig } from '../types/config.js';
|
||||
import type { ErrorCode } from '../types/errors.js';
|
||||
import type { AgentMetrics } from '../types/metrics.js';
|
||||
|
||||
@@ -29,8 +29,6 @@ export interface PipelineInput {
|
||||
checkpointsEnabled?: boolean; // Enable checkpoint activities (default: false)
|
||||
skipGitCheck?: boolean; // Skip .git directory validation in preflight (e.g. when .git is removed after clone)
|
||||
providerConfig?: ProviderConfig; // LLM provider configuration (Bedrock, Vertex, etc.)
|
||||
vulnClasses?: VulnClass[]; // omitted = all five
|
||||
exploit?: boolean; // false skips the exploitation phase
|
||||
}
|
||||
|
||||
export interface ResumeState {
|
||||
|
||||
@@ -36,7 +36,7 @@ import dotenv from 'dotenv';
|
||||
import { sanitizeHostname } from '../audit/utils.js';
|
||||
import { parseConfig } from '../config-parser.js';
|
||||
import { deliverablesDir } from '../paths.js';
|
||||
import type { PipelineConfig, VulnClass } from '../types/config.js';
|
||||
import type { PipelineConfig } from '../types/config.js';
|
||||
import { fileExists, readJson } from '../utils/file-io.js';
|
||||
import * as activities from './activities.js';
|
||||
import type { PipelineInput, PipelineProgress, PipelineState } from './shared.js';
|
||||
@@ -275,39 +275,30 @@ async function resolveWorkspace(client: Client, args: CliArgs): Promise<Workspac
|
||||
|
||||
// === Pipeline Input Construction ===
|
||||
|
||||
interface OrchestrationConfig {
|
||||
pipelineConfig: PipelineConfig;
|
||||
vulnClasses?: VulnClass[];
|
||||
exploit?: boolean;
|
||||
}
|
||||
|
||||
async function loadOrchestrationConfig(configPath: string | undefined): Promise<OrchestrationConfig> {
|
||||
if (!configPath) return { pipelineConfig: {} };
|
||||
async function loadPipelineConfig(configPath: string | undefined): Promise<PipelineConfig> {
|
||||
if (!configPath) return {};
|
||||
try {
|
||||
const config = await parseConfig(configPath);
|
||||
const raw = config.pipeline;
|
||||
if (!raw) return {};
|
||||
|
||||
const pipelineConfig: PipelineConfig = {};
|
||||
if (config.pipeline?.retry_preset !== undefined) {
|
||||
pipelineConfig.retry_preset = config.pipeline.retry_preset;
|
||||
const result: PipelineConfig = {};
|
||||
if (raw.retry_preset !== undefined) {
|
||||
result.retry_preset = raw.retry_preset;
|
||||
}
|
||||
if (config.pipeline?.max_concurrent_pipelines !== undefined) {
|
||||
pipelineConfig.max_concurrent_pipelines = Number(config.pipeline.max_concurrent_pipelines);
|
||||
if (raw.max_concurrent_pipelines !== undefined) {
|
||||
result.max_concurrent_pipelines = Number(raw.max_concurrent_pipelines);
|
||||
}
|
||||
|
||||
return {
|
||||
pipelineConfig,
|
||||
...(config.vuln_classes && config.vuln_classes.length > 0 && { vulnClasses: [...config.vuln_classes] }),
|
||||
...(config.exploit !== undefined && { exploit: config.exploit === 'true' }),
|
||||
};
|
||||
return result;
|
||||
} catch {
|
||||
return { pipelineConfig: {} };
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
function buildPipelineInput(
|
||||
args: CliArgs,
|
||||
workspace: WorkspaceResolution,
|
||||
orchestration: OrchestrationConfig,
|
||||
pipelineConfig: PipelineConfig,
|
||||
): PipelineInput {
|
||||
return {
|
||||
webUrl: args.webUrl,
|
||||
@@ -318,9 +309,7 @@ function buildPipelineInput(
|
||||
...(args.pipelineTestingMode && { pipelineTestingMode: args.pipelineTestingMode }),
|
||||
...(workspace.isResume && args.resumeFromWorkspace && { resumeFromWorkspace: args.resumeFromWorkspace }),
|
||||
...(workspace.terminatedWorkflows.length > 0 && { terminatedWorkflows: workspace.terminatedWorkflows }),
|
||||
...(Object.keys(orchestration.pipelineConfig).length > 0 && { pipelineConfig: orchestration.pipelineConfig }),
|
||||
...(orchestration.vulnClasses && { vulnClasses: orchestration.vulnClasses }),
|
||||
...(orchestration.exploit !== undefined && { exploit: orchestration.exploit }),
|
||||
...(Object.keys(pipelineConfig).length > 0 && { pipelineConfig }),
|
||||
};
|
||||
}
|
||||
|
||||
@@ -428,8 +417,8 @@ async function run(): Promise<void> {
|
||||
|
||||
// 4. Resolve workspace and build pipeline input
|
||||
const workspace = await resolveWorkspace(client, args);
|
||||
const orchestration = await loadOrchestrationConfig(args.configPath);
|
||||
const input = buildPipelineInput(args, workspace, orchestration);
|
||||
const pipelineConfig = await loadPipelineConfig(args.configPath);
|
||||
const input = buildPipelineInput(args, workspace, pipelineConfig);
|
||||
|
||||
// 5. Start worker polling in the background
|
||||
const workerDone = worker.run();
|
||||
|
||||
@@ -33,7 +33,6 @@ import {
|
||||
} from '@temporalio/workflow';
|
||||
import type { AgentName, VulnType } from '../types/agents.js';
|
||||
import { ALL_AGENTS } from '../types/agents.js';
|
||||
import { ALL_VULN_CLASSES, type VulnClass } from '../types/config.js';
|
||||
import type * as activities from './activities.js';
|
||||
import type { ActivityInput } from './activities.js';
|
||||
import {
|
||||
@@ -49,19 +48,6 @@ import {
|
||||
import { toWorkflowSummary } from './summary-mapper.js';
|
||||
import { classifyErrorCode, formatWorkflowError } from './workflow-errors.js';
|
||||
|
||||
/** Agents this run is expected to produce — drives the resume short-circuit. */
|
||||
function computeExpectedAgents(vulnClasses: readonly VulnClass[], exploit: boolean): string[] {
|
||||
const expected: string[] = ['pre-recon', 'recon'];
|
||||
for (const cls of vulnClasses) {
|
||||
expected.push(`${cls}-vuln`);
|
||||
if (exploit) {
|
||||
expected.push(`${cls}-exploit`);
|
||||
}
|
||||
}
|
||||
expected.push('report');
|
||||
return expected;
|
||||
}
|
||||
|
||||
// Retry configuration for production (long intervals for billing recovery)
|
||||
const PRODUCTION_RETRY = {
|
||||
initialInterval: '5 minutes',
|
||||
@@ -229,14 +215,6 @@ export async function pentestPipeline(input: PipelineInput): Promise<PipelineSta
|
||||
...(input.providerConfig !== undefined && { providerConfig: input.providerConfig }),
|
||||
};
|
||||
|
||||
const selectedVulnClasses: readonly VulnClass[] =
|
||||
input.vulnClasses && input.vulnClasses.length > 0 ? input.vulnClasses : ALL_VULN_CLASSES;
|
||||
const selectedClassSet = new Set<VulnClass>(selectedVulnClasses);
|
||||
const exploit: boolean = input.exploit ?? true;
|
||||
const expectedAgents = computeExpectedAgents(selectedVulnClasses, exploit);
|
||||
|
||||
await a.persistOrValidateRunScope(activityInput, [...selectedVulnClasses], exploit);
|
||||
|
||||
let resumeState: ResumeState | null = null;
|
||||
|
||||
if (input.resumeFromWorkspace) {
|
||||
@@ -260,11 +238,9 @@ export async function pentestPipeline(input: PipelineInput): Promise<PipelineSta
|
||||
input.deliverablesSubdir,
|
||||
);
|
||||
|
||||
// 3. Short-circuit when every agent expected by this run is done.
|
||||
// Uses dynamic expectedAgents (not ALL_AGENTS) so a class-scoped run completes sooner.
|
||||
const allExpectedDone = expectedAgents.every((a) => resumeState?.completedAgents.includes(a));
|
||||
if (allExpectedDone) {
|
||||
log.info(`All ${expectedAgents.length} expected agents already completed. Nothing to resume.`);
|
||||
// 3. Short-circuit if all agents already completed
|
||||
if (resumeState.completedAgents.length === ALL_AGENTS.length) {
|
||||
log.info(`All ${ALL_AGENTS.length} agents already completed. Nothing to resume.`);
|
||||
state.status = 'completed';
|
||||
state.completedAgents = [...resumeState.completedAgents];
|
||||
state.summary = computeSummary(state);
|
||||
@@ -423,11 +399,6 @@ export async function pentestPipeline(input: PipelineInput): Promise<PipelineSta
|
||||
// === Initialize Deliverables Git ===
|
||||
await a.initDeliverableGit(activityInput);
|
||||
|
||||
// === Sync SDK deny rules ===
|
||||
await a.syncCodePathDenyRules(activityInput);
|
||||
|
||||
log.info(`Run scope: vuln_classes=[${selectedVulnClasses.join(', ')}] exploit=${exploit}`);
|
||||
|
||||
// === Phase 1: Pre-Reconnaissance ===
|
||||
await runSequentialPhase('pre-recon', 'pre-recon', a.runPreReconAgent);
|
||||
|
||||
@@ -471,17 +442,19 @@ export async function pentestPipeline(input: PipelineInput): Promise<PipelineSta
|
||||
// 2. Check exploitation queue for actionable findings
|
||||
const decision = await a.checkExploitationQueue(activityInput, vulnType);
|
||||
|
||||
// 3. Previously-completed exploits are preserved regardless of mode; new exploits gated by mode.
|
||||
// 3. Conditionally run exploitation agent
|
||||
let exploitMetrics: AgentMetrics | null = null;
|
||||
if (shouldSkip(exploitAgentName)) {
|
||||
log.info(`Skipping ${exploitAgentName} (already complete)`);
|
||||
state.completedAgents.push(exploitAgentName);
|
||||
} else if (decision.shouldExploit && exploit) {
|
||||
exploitMetrics = await runExploitAgent();
|
||||
state.agentMetrics[exploitAgentName] = exploitMetrics;
|
||||
state.completedAgents.push(exploitAgentName);
|
||||
if (input.checkpointsEnabled) {
|
||||
await a.saveCheckpoint(activityInput, exploitAgentName, 'exploitation', state);
|
||||
if (decision.shouldExploit) {
|
||||
if (!shouldSkip(exploitAgentName)) {
|
||||
exploitMetrics = await runExploitAgent();
|
||||
state.agentMetrics[exploitAgentName] = exploitMetrics;
|
||||
state.completedAgents.push(exploitAgentName);
|
||||
if (input.checkpointsEnabled) {
|
||||
await a.saveCheckpoint(activityInput, exploitAgentName, 'exploitation', state);
|
||||
}
|
||||
} else {
|
||||
log.info(`Skipping ${exploitAgentName} (already complete)`);
|
||||
state.completedAgents.push(exploitAgentName);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -503,11 +476,6 @@ export async function pentestPipeline(input: PipelineInput): Promise<PipelineSta
|
||||
const pipelineThunks: Array<() => Promise<VulnExploitPipelineResult>> = [];
|
||||
|
||||
for (const config of pipelineConfigs) {
|
||||
// Excluded classes drop entirely; any prior deliverables stay on disk but don't count this run.
|
||||
if (!selectedClassSet.has(config.vulnType)) {
|
||||
log.info(`Skipping ${config.vulnType} pipeline (class not selected this run)`);
|
||||
continue;
|
||||
}
|
||||
if (!shouldSkip(config.vulnAgent) || !shouldSkip(config.exploitAgent)) {
|
||||
pipelineThunks.push(() => runVulnExploitPipeline(config.vulnType, config.runVuln, config.runExploit));
|
||||
} else {
|
||||
@@ -529,8 +497,8 @@ export async function pentestPipeline(input: PipelineInput): Promise<PipelineSta
|
||||
state.currentAgent = 'report';
|
||||
await a.logPhaseTransition(activityInput, 'reporting', 'start');
|
||||
|
||||
// First, assemble the concatenated report from per-class deliverables
|
||||
await a.assembleReportActivity(activityInput, exploit);
|
||||
// First, assemble the concatenated report from exploitation evidence files
|
||||
await a.assembleReportActivity(activityInput);
|
||||
|
||||
// Then run the report agent to add executive summary and clean up
|
||||
state.agentMetrics.report = await a.runReportAgent(activityInput);
|
||||
|
||||
@@ -8,12 +8,12 @@
|
||||
* Configuration type definitions
|
||||
*/
|
||||
|
||||
export type RuleType = 'url_path' | 'subdomain' | 'domain' | 'method' | 'header' | 'parameter' | 'code_path';
|
||||
export type RuleType = 'path' | 'subdomain' | 'domain' | 'method' | 'header' | 'parameter';
|
||||
|
||||
export interface Rule {
|
||||
description: string;
|
||||
type: RuleType;
|
||||
value: string;
|
||||
url_path: string;
|
||||
}
|
||||
|
||||
export interface Rules {
|
||||
@@ -21,19 +21,6 @@ export interface Rules {
|
||||
focus?: Rule[];
|
||||
}
|
||||
|
||||
export type VulnClass = 'injection' | 'xss' | 'auth' | 'authz' | 'ssrf';
|
||||
|
||||
export const ALL_VULN_CLASSES: readonly VulnClass[] = ['injection', 'xss', 'auth', 'authz', 'ssrf'];
|
||||
|
||||
export type Severity = 'low' | 'medium' | 'high' | 'critical';
|
||||
export type Confidence = 'low' | 'medium' | 'high';
|
||||
|
||||
export interface ReportConfig {
|
||||
min_severity?: Severity;
|
||||
min_confidence?: Confidence;
|
||||
guidance?: string;
|
||||
}
|
||||
|
||||
export type LoginType = 'form' | 'sso' | 'api' | 'basic';
|
||||
|
||||
export interface SuccessCondition {
|
||||
@@ -60,10 +47,6 @@ export interface Config {
|
||||
authentication?: Authentication;
|
||||
pipeline?: PipelineConfig;
|
||||
description?: string;
|
||||
vuln_classes?: VulnClass[];
|
||||
exploit?: 'true' | 'false';
|
||||
report?: ReportConfig;
|
||||
rules_of_engagement?: string;
|
||||
}
|
||||
|
||||
export type RetryPreset = 'default' | 'subscription';
|
||||
@@ -78,10 +61,6 @@ export interface DistributedConfig {
|
||||
focus: Rule[];
|
||||
authentication: Authentication | null;
|
||||
description: string;
|
||||
vuln_classes: VulnClass[];
|
||||
exploit: boolean;
|
||||
report: ReportConfig;
|
||||
rules_of_engagement: string;
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
/**
|
||||
* Specific error codes for reliable classification.
|
||||
*
|
||||
* ErrorCode provides precision within the coarse 7-category PentestErrorType.
|
||||
* ErrorCode provides precision within the coarse 8-category PentestErrorType.
|
||||
* Used by classifyErrorForTemporal for code-based classification (preferred)
|
||||
* with string matching as fallback for external errors.
|
||||
*/
|
||||
@@ -47,7 +47,15 @@ export enum ErrorCode {
|
||||
BILLING_ERROR = 'BILLING_ERROR',
|
||||
}
|
||||
|
||||
export type PentestErrorType = 'config' | 'network' | 'prompt' | 'filesystem' | 'validation' | 'billing' | 'unknown';
|
||||
export type PentestErrorType =
|
||||
| 'config'
|
||||
| 'network'
|
||||
| 'tool'
|
||||
| 'prompt'
|
||||
| 'filesystem'
|
||||
| 'validation'
|
||||
| 'billing'
|
||||
| 'unknown';
|
||||
|
||||
export interface PentestErrorContext {
|
||||
[key: string]: unknown;
|
||||
|
||||
@@ -1,11 +0,0 @@
|
||||
// Copyright (C) 2025 Keygraph, Inc.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License version 3
|
||||
// as published by the Free Software Foundation.
|
||||
|
||||
import { glob } from 'zx';
|
||||
|
||||
export function isGlobPattern(value: string): boolean {
|
||||
return glob.isDynamicPattern(value);
|
||||
}
|
||||
Generated
+176
-688
File diff suppressed because it is too large
Load Diff
+1
-1
@@ -2,4 +2,4 @@ packages:
|
||||
- "apps/*"
|
||||
|
||||
catalog:
|
||||
"@anthropic-ai/claude-agent-sdk": ^0.2.114
|
||||
"@anthropic-ai/claude-agent-sdk": ^0.2.38
|
||||
|
||||
Reference in New Issue
Block a user