feat: backport auth-validation preflight + email_login credentials #6

Open
Hugh Commit wants to merge 1 commits from far-138/backport-phase-3 into main
16 changed files with 489 additions and 26 deletions
+2 -1
View File
@@ -65,7 +65,7 @@ export async function start(args: StartArgs): Promise<void> {
const workspacePath = path.join(workspacesDir, workspace);
fs.mkdirSync(workspacePath, { recursive: true });
fs.chmodSync(workspacePath, 0o777);
for (const dir of ['deliverables', 'scratchpad', '.playwright-cli']) {
for (const dir of ['deliverables', 'scratchpad', '.playwright-cli', '.playwright']) {
const dirPath = path.join(workspacePath, dir);
fs.mkdirSync(dirPath, { recursive: true });
fs.chmodSync(dirPath, 0o777);
@@ -76,6 +76,7 @@ export async function start(args: StartArgs): Promise<void> {
for (const dir of ['deliverables', 'scratchpad', '.playwright-cli']) {
fs.mkdirSync(path.join(shannonDir, dir), { recursive: true });
}
fs.mkdirSync(path.join(repo.hostPath, '.playwright'), { recursive: true });
const credentialsPath = getCredentialsPath();
const hasCredentials = fs.existsSync(credentialsPath);
+2 -1
View File
@@ -186,11 +186,12 @@ export function spawnWorker(opts: WorkerOptions): ChildProcess {
args.push('-v', `${opts.workspacesDir}:/app/workspaces`);
args.push('-v', `${opts.repo.hostPath}:${opts.repo.containerPath}:ro`);
// Writable overlays: shadow .shannon/ inside the :ro repo with workspace-backed dirs
// Writable overlays: shadow .shannon/ and .playwright/ inside the :ro repo with workspace-backed dirs
const workspacePath = path.join(opts.workspacesDir, opts.workspace);
args.push('-v', `${path.join(workspacePath, 'deliverables')}:${opts.repo.containerPath}/.shannon/deliverables`);
args.push('-v', `${path.join(workspacePath, 'scratchpad')}:${opts.repo.containerPath}/.shannon/scratchpad`);
args.push('-v', `${path.join(workspacePath, '.playwright-cli')}:${opts.repo.containerPath}/.shannon/.playwright-cli`);
args.push('-v', `${path.join(workspacePath, '.playwright')}:${opts.repo.containerPath}/.playwright`);
// Local mode: mount prompts for live editing
if (opts.promptsDir) {
+25 -1
View File
@@ -39,9 +39,33 @@
"type": "string",
"pattern": "^[A-Za-z2-7]+=*$",
"description": "TOTP secret for two-factor authentication (Base32 encoded, case insensitive)"
},
"email_login": {
"type": "object",
"description": "Email-based login credentials for magic-link and email-OTP flows",
"properties": {
"address": {
"type": "string",
"format": "email",
"description": "Email address for authentication"
},
"password": {
"type": "string",
"minLength": 1,
"maxLength": 255,
"description": "Password for the email account"
},
"totp_secret": {
"type": "string",
"pattern": "^[A-Za-z2-7]+=*$",
"description": "TOTP secret for email 2FA (Base32 encoded)"
}
},
"required": ["address", "password"],
"additionalProperties": false
}
},
"required": ["username", "password"],
"required": ["username"],
"additionalProperties": false
},
"login_flow": {
+19
View File
@@ -47,6 +47,25 @@ rules:
type: path
url_path: "/api/v2/user-profile"
# Email-based login (for magic-link / email-OTP flows)
# authentication:
# login_type: form
# login_url: "https://example.com/login"
# credentials:
# username: "testuser"
# email_login:
# address: "testuser@example.com"
# password: "email-password"
# totp_secret: "JBSWY3DPEHPK3PXP" # Optional TOTP for email 2FA
# login_flow:
# - "Type $username into the username field"
# - "Type $email_address into the email field"
# - "Type $email_password into the email password field"
# - "Enter $email_totp in the verification code field"
# success_condition:
# type: url_contains
# value: "/dashboard"
# Pipeline execution settings (optional)
# pipeline:
# retry_preset: subscription # 'default' or 'subscription' (6h max retry for rate limit recovery)
@@ -0,0 +1 @@
Return the structured verdict `{ "login_success": true }` and stop.
@@ -0,0 +1,26 @@
<role>
You are a credential validation agent. Your sole job is to verify that the provided login credentials work against the target application.
</role>
<objective>
Attempt to log in to the target application using the provided credentials. Report whether the login succeeded or failed.
</objective>
<target_authentication>
{{AUTH_CONTEXT}}
</target_authentication>
<cli_tools>
Use playwright-cli with session flag: `-s={{PLAYWRIGHT_SESSION}}`
</cli_tools>
<login_instructions>
{{LOGIN_INSTRUCTIONS}}
</login_instructions>
<critical>
- Do NOT explore the application beyond verifying the login.
- Do NOT modify any data or settings.
- After verifying, return your structured verdict immediately and stop.
- If login fails, include which step failed and a brief detail (mask sensitive values like passwords).
</critical>
@@ -0,0 +1,78 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
import fs from 'node:fs/promises';
import path from 'node:path';
export type StealthConfigWriteResult = 'wrote' | 'skipped-existing';
const STEALTH_INIT_SCRIPT = `
// Remove webdriver flag
Object.defineProperty(navigator, 'webdriver', { get: () => false });
// Fake plugins array (Chrome PDF Plugin, PDF Viewer, Native Client)
Object.defineProperty(navigator, 'plugins', {
get: () => {
const plugins = [
{ name: 'Chrome PDF Plugin', filename: 'internal-pdf-viewer', description: 'Portable Document Format' },
{ name: 'Chrome PDF Viewer', filename: 'mhjfbmdgcfjbbpaeojofohoefgiehjai', description: '' },
{ name: 'Native Client', filename: 'internal-nacl-plugin', description: '' },
];
plugins.refresh = () => {};
return plugins;
},
});
// Stub window.chrome.runtime
if (!window.chrome) window.chrome = {};
if (!window.chrome.runtime) window.chrome.runtime = {};
`.trim();
function buildStealthConfig(): object {
return {
browser: 'chromium',
launchOptions: {
headless: true,
args: ['--disable-blink-features=AutomationControlled', '--no-first-run', '--no-default-browser-check'],
ignoreDefaultArgs: ['--enable-automation'],
},
contextOptions: {
viewport: { width: 1920, height: 1080 },
locale: 'en-US',
userAgent:
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36',
},
};
}
/**
* Write Playwright stealth configuration to the source directory.
* No-ops if the config file already exists.
*/
export async function writePlaywrightStealthConfig(sourceDir: string): Promise<StealthConfigWriteResult> {
const playwrightDir = path.join(sourceDir, '.playwright');
const configPath = path.join(playwrightDir, 'cli.config.json');
// Skip if config already exists
try {
await fs.stat(configPath);
return 'skipped-existing';
} catch {
// File doesn't exist, proceed with writing
}
await fs.mkdir(playwrightDir, { recursive: true });
const config = buildStealthConfig();
const initScriptPath = path.join(playwrightDir, 'stealth-init.js');
await Promise.all([
fs.writeFile(configPath, JSON.stringify(config, null, 2), 'utf8'),
fs.writeFile(initScriptPath, STEALTH_INIT_SCRIPT, 'utf8'),
]);
return 'wrote';
}
+10 -10
View File
@@ -381,15 +381,6 @@ const performSecurityValidation = (config: Config): void => {
ErrorCode.CONFIG_VALIDATION_FAILED,
);
}
if (pattern.test(auth.credentials.password)) {
throw new PentestError(
`authentication.credentials.password contains potentially dangerous pattern: ${pattern.source}`,
'config',
false,
{ field: 'credentials.password', pattern: pattern.source },
ErrorCode.CONFIG_VALIDATION_FAILED,
);
}
}
}
@@ -605,8 +596,17 @@ const sanitizeAuthentication = (auth: Authentication): Authentication => {
login_url: auth.login_url.trim(),
credentials: {
username: auth.credentials.username.trim(),
password: auth.credentials.password,
...(auth.credentials.password !== undefined && { password: auth.credentials.password }),
...(auth.credentials.totp_secret && { totp_secret: auth.credentials.totp_secret.trim() }),
...(auth.credentials.email_login && {
email_login: {
address: auth.credentials.email_login.address.trim(),
password: auth.credentials.email_login.password,
...(auth.credentials.email_login.totp_secret && {
totp_secret: auth.credentials.email_login.totp_secret.trim(),
}),
},
}),
},
...(auth.login_flow && { login_flow: auth.login_flow.map((step) => step.trim()) }),
success_condition: {
@@ -138,6 +138,9 @@ function classifyByErrorCode(code: ErrorCode, retryableFromError: boolean): { ty
case ErrorCode.AUTH_FAILED:
return { type: 'AuthenticationError', retryable: false };
case ErrorCode.AUTH_LOGIN_FAILED:
return { type: 'AuthLoginFailedError', retryable: false };
case ErrorCode.BILLING_ERROR:
return { type: 'BillingError', retryable: true };
@@ -76,6 +76,17 @@ async function buildLoginInstructions(
`generated TOTP code using secret "${authentication.credentials.totp_secret}"`,
);
}
if (authentication.credentials.email_login) {
const emailLogin = authentication.credentials.email_login;
userInstructions = userInstructions.replace(/\$email_address/g, emailLogin.address);
userInstructions = userInstructions.replace(/\$email_password/g, emailLogin.password);
if (emailLogin.totp_secret) {
userInstructions = userInstructions.replace(
/\$email_totp/g,
`generated TOTP code using secret "${emailLogin.totp_secret}"`,
);
}
}
}
loginInstructions = loginInstructions.replace(/{{user_instructions}}/g, userInstructions);
@@ -221,6 +232,16 @@ async function interpolateVariables(
}
}
/**
* Resolve a prompt directory override to an absolute path.
* Falls back to the compiled-in PROMPTS_DIR when no override is given.
*/
export function resolvePromptDir(promptDir: string | undefined): string {
if (!promptDir) return PROMPTS_DIR;
if (path.isAbsolute(promptDir)) return promptDir;
return path.resolve(process.env.SHANNON_WORKER_ROOT ?? process.cwd(), promptDir);
}
// Pure function: Load and interpolate prompt template
export async function loadPrompt(
promptName: string,
@@ -0,0 +1,128 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
/**
* Auth-validation preflight service.
*
* Drives a real browser login before the full pipeline runs,
* catching bad credentials early and saving API budget.
*/
import type { JsonSchemaOutputFormat } from '@anthropic-ai/claude-agent-sdk';
import { z } from 'zod';
import { runClaudePrompt } from '../ai/claude-executor.js';
import type { AuditSession } from '../audit/index.js';
import type { ActivityLogger } from '../types/activity-logger.js';
import type { DistributedConfig, ProviderConfig } from '../types/config.js';
import { ErrorCode } from '../types/errors.js';
import type { Result } from '../types/result.js';
import { err, ok } from '../types/result.js';
import { PentestError } from './error-handling.js';
import { loadPrompt } from './prompt-manager.js';
type FailurePoint = 'username_or_password' | 'totp_secret' | 'out_of_band';
const AuthValidationSchema = z.object({
login_success: z.boolean(),
failure_point: z.enum(['username_or_password', 'totp_secret', 'out_of_band']).optional(),
failure_detail: z.string().max(250).optional(),
});
const AUTH_VALIDATION_OUTPUT_FORMAT: JsonSchemaOutputFormat = {
type: 'json_schema',
schema: z.toJSONSchema(AuthValidationSchema, { target: 'draft-07' }) as Record<string, unknown>,
};
export interface AuthValidationInput {
webUrl: string;
repoPath: string;
config: DistributedConfig;
pipelineTestingMode: boolean;
auditSession: AuditSession;
logger: ActivityLogger;
promptDir?: string;
apiKey?: string;
providerConfig?: ProviderConfig;
}
function classifyResult(parsed: z.infer<typeof AuthValidationSchema>): Result<void, PentestError> {
if (parsed.login_success) {
return ok(undefined);
}
const failurePoint: FailurePoint = parsed.failure_point ?? 'username_or_password';
const detail = parsed.failure_detail ?? 'Login failed';
return err(
new PentestError(
`Authentication validation failed at "${failurePoint}": ${detail}`,
'config',
false,
{ failurePoint, failureDetail: detail },
ErrorCode.AUTH_LOGIN_FAILED,
),
);
}
export async function validateAuthentication(input: AuthValidationInput): Promise<Result<void, PentestError>> {
const { webUrl, repoPath, config, pipelineTestingMode, auditSession, logger, promptDir, apiKey, providerConfig } =
input;
// 1. Load the validation prompt
const prompt = await loadPrompt(
'validate-authentication',
{ webUrl, repoPath },
config,
pipelineTestingMode,
logger,
promptDir,
);
// 2. Run the agent with structured output
const result = await runClaudePrompt(
prompt,
repoPath,
'',
'Auth validation',
'validate-authentication',
auditSession,
logger,
'medium',
AUTH_VALIDATION_OUTPUT_FORMAT,
apiKey,
undefined,
providerConfig,
);
// 3. Parse structured output
if (!result.success || !result.structuredOutput) {
return err(
new PentestError(
`Auth validation agent did not return a structured verdict: ${result.error ?? 'unknown error'}`,
'validation',
true,
{ agentError: result.error },
ErrorCode.AGENT_EXECUTION_FAILED,
),
);
}
const parseResult = AuthValidationSchema.safeParse(result.structuredOutput);
if (!parseResult.success) {
return err(
new PentestError(
`Auth validation output failed schema validation: ${parseResult.error.message}`,
'validation',
true,
{ zodErrors: parseResult.error.issues },
ErrorCode.OUTPUT_VALIDATION_FAILED,
),
);
}
// 4. Classify the verdict
return classifyResult(parseResult.data);
}
+3
View File
@@ -151,6 +151,9 @@ function createExploitValidator(vulnType: VulnType): AgentValidator {
// Playwright session mapping - assigns each agent to a specific session for browser isolation
// Keys are promptTemplate values from AGENTS registry
export const PLAYWRIGHT_SESSION_MAPPING: Record<string, PlaywrightSession> = Object.freeze({
// Runs before any agent — non-concurrent, so agent1 is safe to share
'validate-authentication': 'agent1',
// Phase 1: Pre-reconnaissance
'pre-recon-code': 'agent1',
+132 -11
View File
@@ -18,10 +18,12 @@
import fs from 'node:fs/promises';
import path from 'node:path';
import { ApplicationFailure, Context, heartbeat } from '@temporalio/activity';
import { type StealthConfigWriteResult, writePlaywrightStealthConfig } from '../ai/playwright-config-writer.js';
import { AuditSession } from '../audit/index.js';
import type { ResumeAttempt } from '../audit/metrics-tracker.js';
import type { SessionMetadata } from '../audit/utils.js';
import type { WorkflowSummary } from '../audit/workflow-logger.js';
import { distributeConfig, parseConfig, parseConfigYAML } from '../config-parser.js';
import type { CheckpointContext } from '../interfaces/checkpoint-provider.js';
import { DEFAULT_DELIVERABLES_SUBDIR, deliverablesDir } from '../paths.js';
import { getContainer, getOrCreateContainer, removeContainer } from '../services/container.js';
@@ -29,12 +31,14 @@ import { classifyErrorForTemporal, PentestError } from '../services/error-handli
import { ExploitationCheckerService } from '../services/exploitation-checker.js';
import { executeGitCommandWithRetry } from '../services/git-manager.js';
import { runPreflightChecks } from '../services/preflight.js';
import { resolvePromptDir } from '../services/prompt-manager.js';
import type { ExploitationDecision, VulnType } from '../services/queue-validation.js';
import { assembleFinalReport, injectModelIntoReport } from '../services/reporting.js';
import { validateAuthentication } from '../services/validate-authentication.js';
import { AGENTS } from '../session-manager.js';
import type { AgentName } from '../types/agents.js';
import { ALL_AGENTS } from '../types/agents.js';
import type { ContainerConfig, ProviderConfig } from '../types/config.js';
import type { Config, ContainerConfig, ProviderConfig } from '../types/config.js';
import { ErrorCode } from '../types/errors.js';
import { isErr } from '../types/result.js';
import { fileExists, readJson } from '../utils/file-io.js';
@@ -182,11 +186,7 @@ async function runAgentActivity(agentName: AgentName, input: ActivityInput): Pro
attemptNumber,
...(input.apiKey !== undefined && { apiKey: input.apiKey }),
...(input.providerConfig !== undefined && { providerConfig: input.providerConfig }),
...(input.promptDir !== undefined && {
promptDir: path.isAbsolute(input.promptDir)
? input.promptDir
: path.resolve(process.env.SHANNON_WORKER_ROOT ?? process.cwd(), input.promptDir),
}),
...(input.promptDir !== undefined && { promptDir: resolvePromptDir(input.promptDir) }),
...(input.configYAML !== undefined && { configYAML: input.configYAML }),
},
auditSession,
@@ -373,6 +373,131 @@ export async function runPreflightValidation(input: ActivityInput): Promise<void
}
}
/**
* Write Playwright stealth configuration to the source directory.
* Thin activity wrapper — delegates to writePlaywrightStealthConfig.
*/
export async function syncPlaywrightStealthConfig(input: ActivityInput): Promise<StealthConfigWriteResult> {
const logger = createActivityLogger();
const result = await writePlaywrightStealthConfig(input.repoPath);
logger.info(`Playwright stealth config: ${result}`);
return result;
}
/**
* Auth-validation preflight activity.
*
* Runs a real browser login attempt to confirm credentials work
* before committing to the full pipeline.
*
* NOT using runAgentActivity — auth validation has its own structured output
* flow and retry semantics.
*/
export async function runAuthenticationValidation(input: ActivityInput): Promise<void> {
const startTime = Date.now();
const attemptNumber = Context.current().info.attempt;
const heartbeatInterval = setInterval(() => {
const elapsed = Math.floor((Date.now() - startTime) / 1000);
heartbeat({ phase: 'auth-validation', elapsedSeconds: elapsed, attempt: attemptNumber });
}, HEARTBEAT_INTERVAL_MS);
try {
const logger = createActivityLogger();
logger.info('Running authentication validation...', { attempt: attemptNumber });
// 1. Load config to get authentication details
let config: Config;
if (input.configYAML) {
config = parseConfigYAML(input.configYAML);
} else if (input.configPath) {
config = await parseConfig(input.configPath);
} else {
logger.info('No config provided, skipping auth validation');
return;
}
const distributed = distributeConfig(config);
if (!distributed.authentication) {
logger.info('No authentication configured, skipping auth validation');
return;
}
// 2. Create audit session
const sessionMetadata = buildSessionMetadata(input);
const auditSession = new AuditSession(sessionMetadata);
await auditSession.initialize(input.workflowId);
// 3. Run validation
const result = await validateAuthentication({
webUrl: input.webUrl,
repoPath: input.repoPath,
config: distributed,
pipelineTestingMode: input.pipelineTestingMode ?? false,
auditSession,
logger,
promptDir: resolvePromptDir(input.promptDir),
...(input.apiKey !== undefined && { apiKey: input.apiKey }),
...(input.providerConfig !== undefined && { providerConfig: input.providerConfig }),
});
if (isErr(result)) {
const classified = classifyErrorForTemporal(result.error);
const message = truncateErrorMessage(result.error.message);
const details: Record<string, unknown>[] = [
{ phase: 'auth-validation', attemptNumber, elapsed: Date.now() - startTime },
];
// Include failure point and detail for consumer error handling
if (result.error.context.failurePoint) {
details.push({
failurePoint: result.error.context.failurePoint,
failureDetail: result.error.context.failureDetail,
});
}
if (classified.retryable) {
const failure = ApplicationFailure.create({
message,
type: classified.type,
details,
});
truncateStackTrace(failure);
throw failure;
} else {
const failure = ApplicationFailure.nonRetryable(message, classified.type, details);
truncateStackTrace(failure);
throw failure;
}
}
logger.info('Authentication validation passed');
} catch (error) {
if (error instanceof ApplicationFailure) {
throw error;
}
const classified = classifyErrorForTemporal(error);
const rawMessage = error instanceof Error ? error.message : String(error);
const message = truncateErrorMessage(rawMessage);
const failure = classified.retryable
? ApplicationFailure.create({
message,
type: classified.type,
details: [{ phase: 'auth-validation', attemptNumber, elapsed: Date.now() - startTime }],
})
: ApplicationFailure.nonRetryable(message, classified.type, [
{ phase: 'auth-validation', attemptNumber, elapsed: Date.now() - startTime },
]);
truncateStackTrace(failure);
throw failure;
} finally {
clearInterval(heartbeatInterval);
}
}
/**
* Initialize a private git repository inside the workspace deliverables directory.
* Idempotent — skips if .git already exists (resume case).
@@ -799,11 +924,7 @@ export async function generateReportOutputActivity(input: ActivityInput): Promis
// Resolve promptDir against the worker root so providers are cwd-independent.
const resolvedInput: ActivityInput = {
...input,
...(input.promptDir !== undefined && {
promptDir: path.isAbsolute(input.promptDir)
? input.promptDir
: path.resolve(process.env.SHANNON_WORKER_ROOT ?? process.cwd(), input.promptDir),
}),
...(input.promptDir !== undefined && { promptDir: resolvePromptDir(input.promptDir) }),
};
const result = await container.reportOutputProvider.generate(resolvedInput, logger);
+27
View File
@@ -56,6 +56,7 @@ const PRODUCTION_RETRY = {
maximumAttempts: 50,
nonRetryableErrorTypes: [
'AuthenticationError',
'AuthLoginFailedError',
'PermissionError',
'InvalidRequestError',
'RequestTooLargeError',
@@ -120,6 +121,22 @@ const preflightActs = proxyActivities<typeof activities>({
retry: PREFLIGHT_RETRY,
});
// Retry configuration for auth validation (browser-based, longer timeout)
const AUTH_VALIDATION_RETRY = {
initialInterval: '10 seconds',
maximumInterval: '1 minute',
backoffCoefficient: 2,
maximumAttempts: 3,
nonRetryableErrorTypes: PRODUCTION_RETRY.nonRetryableErrorTypes,
};
// Activity proxy for auth validation (10-minute timeout for browser login)
const authValidationActs = proxyActivities<typeof activities>({
startToCloseTimeout: '10 minutes',
heartbeatTimeout: '10 minutes',
retry: AUTH_VALIDATION_RETRY,
});
/**
* Compute aggregated metrics from the current pipeline state.
* Called on both success and failure to provide partial metrics.
@@ -396,6 +413,16 @@ export async function pentestPipeline(input: PipelineInput): Promise<PipelineSta
await preflightActs.runPreflightValidation(activityInput);
log.info('Preflight validation passed');
// === Playwright Stealth Config ===
await a.syncPlaywrightStealthConfig(activityInput);
// === Auth Validation ===
// Browser-based credential check before committing to the full pipeline.
state.currentPhase = 'auth-validation';
state.currentAgent = 'validate-authentication';
await authValidationActs.runAuthenticationValidation(activityInput);
log.info('Auth validation passed');
// === Initialize Deliverables Git ===
await a.initDeliverableGit(activityInput);
+9 -2
View File
@@ -28,12 +28,19 @@ export interface SuccessCondition {
value: string;
}
export interface Credentials {
username: string;
export interface EmailLogin {
address: string;
password: string;
totp_secret?: string;
}
export interface Credentials {
username: string;
password?: string;
totp_secret?: string;
email_login?: EmailLogin;
}
export interface Authentication {
login_type: LoginType;
login_url: string;
+3
View File
@@ -45,6 +45,9 @@ export enum ErrorCode {
TARGET_UNREACHABLE = 'TARGET_UNREACHABLE',
AUTH_FAILED = 'AUTH_FAILED',
BILLING_ERROR = 'BILLING_ERROR',
// Auth validation errors
AUTH_LOGIN_FAILED = 'AUTH_LOGIN_FAILED',
}
export type PentestErrorType = 'config' | 'network' | 'prompt' | 'filesystem' | 'validation' | 'billing' | 'unknown';