feat: add three-tier model system with Bedrock support

Introduce small/medium/large model tiers so agents use the appropriate
model for their task complexity. Pre-recon uses Opus (large) for deep
source code analysis, most agents use Sonnet (medium), and report uses
Haiku (small) for summarization.

- Add src/ai/models.ts with ModelTier type and resolveModel()
- Add modelTier field to AgentDefinition
- Refactor claude-executor env var passthrough into loop
- Add Bedrock credential validation in preflight and CLI
- Pass through Bedrock and model env vars in docker-compose
This commit is contained in:
ezl-keygraph
2026-03-03 01:08:26 +05:30
parent 98e3446448
commit b62abfea4c
10 changed files with 159 additions and 21 deletions
+20 -13
View File
@@ -24,6 +24,7 @@ import { detectExecutionContext, formatErrorOutput, formatCompletionMessage } fr
import { createProgressManager } from './progress-manager.js';
import { createAuditLogger } from './audit-logger.js';
import { getActualModelName } from './router-utils.js';
import { resolveModel, type ModelTier } from './models.js';
import type { ActivityLogger } from '../types/activity-logger.js';
declare global {
@@ -202,7 +203,8 @@ export async function runClaudePrompt(
description: string = 'Claude analysis',
agentName: string | null = null,
auditSession: AuditSession | null = null,
logger: ActivityLogger
logger: ActivityLogger,
modelTier: ModelTier = 'medium'
): Promise<ClaudePromptResult> {
// 1. Initialize timing and prompt
const timer = new Timer(`agent-${description.toLowerCase().replace(/\s+/g, '-')}`);
@@ -225,22 +227,27 @@ export async function runClaudePrompt(
const sdkEnv: Record<string, string> = {
CLAUDE_CODE_MAX_OUTPUT_TOKENS: process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS || '64000',
};
if (process.env.ANTHROPIC_API_KEY) {
sdkEnv.ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY;
}
if (process.env.CLAUDE_CODE_OAUTH_TOKEN) {
sdkEnv.CLAUDE_CODE_OAUTH_TOKEN = process.env.CLAUDE_CODE_OAUTH_TOKEN;
}
if (process.env.ANTHROPIC_BASE_URL) {
sdkEnv.ANTHROPIC_BASE_URL = process.env.ANTHROPIC_BASE_URL;
}
if (process.env.ANTHROPIC_AUTH_TOKEN) {
sdkEnv.ANTHROPIC_AUTH_TOKEN = process.env.ANTHROPIC_AUTH_TOKEN;
const passthroughVars = [
'ANTHROPIC_API_KEY',
'CLAUDE_CODE_OAUTH_TOKEN',
'ANTHROPIC_BASE_URL',
'ANTHROPIC_AUTH_TOKEN',
'CLAUDE_CODE_USE_BEDROCK',
'AWS_REGION',
'AWS_BEARER_TOKEN_BEDROCK',
'ANTHROPIC_SMALL_MODEL',
'ANTHROPIC_MEDIUM_MODEL',
'ANTHROPIC_LARGE_MODEL',
];
for (const name of passthroughVars) {
if (process.env[name]) {
sdkEnv[name] = process.env[name]!;
}
}
// 5. Configure SDK options
const options = {
model: 'claude-sonnet-4-5-20250929',
model: resolveModel(modelTier),
maxTurns: 10_000,
cwd: sourceDir,
permissionMode: 'bypassPermissions' as const,
+37
View File
@@ -0,0 +1,37 @@
// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
/**
* Model tier definitions and resolution.
*
* Three tiers mapped to capability levels:
* - "small" (Haiku — summarization, structured extraction)
* - "medium" (Sonnet — tool use, general analysis)
* - "large" (Opus — deep reasoning, complex analysis)
*
* Users override via ANTHROPIC_SMALL_MODEL / ANTHROPIC_MEDIUM_MODEL / ANTHROPIC_LARGE_MODEL,
* which works across all providers (direct, Bedrock, Vertex).
*/
export type ModelTier = 'small' | 'medium' | 'large';
const DEFAULT_MODELS: Readonly<Record<ModelTier, string>> = {
small: 'claude-haiku-4-5-20251001',
medium: 'claude-sonnet-4-6',
large: 'claude-opus-4-6',
};
/** Resolve a model tier to a concrete model ID. */
export function resolveModel(tier: ModelTier = 'medium'): string {
switch (tier) {
case 'small':
return process.env.ANTHROPIC_SMALL_MODEL || DEFAULT_MODELS.small;
case 'large':
return process.env.ANTHROPIC_LARGE_MODEL || DEFAULT_MODELS.large;
default:
return process.env.ANTHROPIC_MEDIUM_MODEL || DEFAULT_MODELS.medium;
}
}
+2 -1
View File
@@ -156,7 +156,8 @@ export class AgentExecutionService {
agentName, // description
agentName,
auditSession,
logger
logger,
AGENTS[agentName].modelTier
);
// 6. Spending cap check - defense-in-depth
+24 -4
View File
@@ -24,6 +24,7 @@ import { PentestError, isRetryableError } from './error-handling.js';
import { ErrorCode } from '../types/errors.js';
import { type Result, ok, err } from '../types/result.js';
import { parseConfig } from '../config-parser.js';
import { resolveModel } from '../ai/models.js';
import type { ActivityLogger } from '../types/activity-logger.js';
// === Repository Validation ===
@@ -165,11 +166,30 @@ async function validateCredentials(
return ok(undefined);
}
// 2. Check that at least one credential is present
// 2. Bedrock mode — validate required AWS credentials are present
if (process.env.CLAUDE_CODE_USE_BEDROCK === '1') {
const required = ['AWS_REGION', 'AWS_BEARER_TOKEN_BEDROCK', 'ANTHROPIC_SMALL_MODEL', 'ANTHROPIC_MEDIUM_MODEL', 'ANTHROPIC_LARGE_MODEL'];
const missing = required.filter(v => !process.env[v]);
if (missing.length > 0) {
return err(
new PentestError(
`Bedrock mode requires the following env vars in .env: ${missing.join(', ')}`,
'config',
false,
{ missing },
ErrorCode.AUTH_FAILED
)
);
}
logger.info('Bedrock credentials OK');
return ok(undefined);
}
// 3. Check that at least one credential is present
if (!process.env.ANTHROPIC_API_KEY && !process.env.CLAUDE_CODE_OAUTH_TOKEN) {
return err(
new PentestError(
'No API credentials found. Set ANTHROPIC_API_KEY or CLAUDE_CODE_OAUTH_TOKEN in .env',
'No API credentials found. Set ANTHROPIC_API_KEY or CLAUDE_CODE_OAUTH_TOKEN in .env (or use CLAUDE_CODE_USE_BEDROCK=1 for AWS Bedrock)',
'config',
false,
{},
@@ -178,12 +198,12 @@ async function validateCredentials(
);
}
// 3. Validate via SDK query
// 4. Validate via SDK query
const authType = process.env.CLAUDE_CODE_OAUTH_TOKEN ? 'OAuth token' : 'API key';
logger.info(`Validating ${authType} via SDK...`);
try {
for await (const message of query({ prompt: 'hi', options: { model: 'claude-haiku-4-5-20251001', maxTurns: 1 } })) {
for await (const message of query({ prompt: 'hi', options: { model: resolveModel('small'), maxTurns: 1 } })) {
if (message.type === 'assistant' && message.error) {
return classifySdkError(message.error, authType);
}
+2
View File
@@ -18,6 +18,7 @@ export const AGENTS: Readonly<Record<AgentName, AgentDefinition>> = Object.freez
prerequisites: [],
promptTemplate: 'pre-recon-code',
deliverableFilename: 'code_analysis_deliverable.md',
modelTier: 'large',
},
'recon': {
name: 'recon',
@@ -102,6 +103,7 @@ export const AGENTS: Readonly<Record<AgentName, AgentDefinition>> = Object.freez
prerequisites: ['injection-exploit', 'xss-exploit', 'auth-exploit', 'ssrf-exploit', 'authz-exploit'],
promptTemplate: 'report-executive',
deliverableFilename: 'comprehensive_security_assessment_report.md',
modelTier: 'small',
},
});
+1
View File
@@ -58,6 +58,7 @@ export interface AgentDefinition {
prerequisites: AgentName[];
promptTemplate: string;
deliverableFilename: string;
modelTier?: 'small' | 'medium' | 'large';
}
/**