Files
trebuchet/src/audit/metrics-tracker.ts
T
ajmallesh a960ad1182 refactor: add numbered step comments to 20 complex sequential functions
- Add // N. Description steps to temporal layer (client, activities, workflows)
- Add steps to AI layer (claude-executor: runClaudePrompt, buildMcpServers)
- Add steps to services layer (prompt-manager, config-parser, git-manager)
- Add steps to audit layer (metrics-tracker, audit-session)
- Update CLAUDE.md comment guidelines with clearer numbered-step vs section-divider guidance
2026-02-16 20:45:58 -08:00

394 lines
11 KiB
TypeScript

// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
/**
* Metrics Tracker
*
* Manages session.json with comprehensive timing, cost, and validation metrics.
* Tracks attempt-level data for complete forensic trail.
*/
import {
generateSessionJsonPath,
type SessionMetadata,
} from './utils.js';
import { atomicWrite, readJson, fileExists } from '../utils/file-io.js';
import { formatTimestamp, calculatePercentage } from '../utils/formatting.js';
import { AGENT_PHASE_MAP, type PhaseName } from '../session-manager.js';
import { PentestError } from '../services/error-handling.js';
import { ErrorCode } from '../types/errors.js';
import type { AgentName, AgentEndResult } from '../types/index.js';
interface AttemptData {
attempt_number: number;
duration_ms: number;
cost_usd: number;
success: boolean;
timestamp: string;
model?: string | undefined;
error?: string | undefined;
}
interface AgentAuditMetrics {
status: 'in-progress' | 'success' | 'failed';
attempts: AttemptData[];
final_duration_ms: number;
total_cost_usd: number;
model?: string | undefined;
checkpoint?: string | undefined;
}
interface PhaseMetrics {
duration_ms: number;
duration_percentage: number;
cost_usd: number;
agent_count: number;
}
export interface ResumeAttempt {
workflowId: string;
timestamp: string;
terminatedPrevious?: string;
resumedFromCheckpoint?: string;
}
interface SessionData {
session: {
id: string;
webUrl: string;
repoPath?: string;
status: 'in-progress' | 'completed' | 'failed';
createdAt: string;
completedAt?: string;
originalWorkflowId?: string; // First workflow that created this workspace
resumeAttempts?: ResumeAttempt[]; // Track all resume attempts
};
metrics: {
total_duration_ms: number;
total_cost_usd: number;
phases: Record<string, PhaseMetrics>;
agents: Record<string, AgentAuditMetrics>;
};
}
interface ActiveTimer {
startTime: number;
attemptNumber: number;
}
/**
* MetricsTracker - Manages metrics for a session
*/
export class MetricsTracker {
private sessionMetadata: SessionMetadata;
private sessionJsonPath: string;
private data: SessionData | null = null;
private activeTimers: Map<string, ActiveTimer> = new Map();
constructor(sessionMetadata: SessionMetadata) {
this.sessionMetadata = sessionMetadata;
this.sessionJsonPath = generateSessionJsonPath(sessionMetadata);
}
/**
* Initialize session.json (idempotent)
*
* @param workflowId - Optional workflow ID to set as originalWorkflowId for new sessions
*/
async initialize(workflowId?: string): Promise<void> {
// Check if session.json already exists
const exists = await fileExists(this.sessionJsonPath);
if (exists) {
// Load existing data
this.data = await readJson<SessionData>(this.sessionJsonPath);
} else {
// Create new session.json
this.data = this.createInitialData(workflowId);
await this.save();
}
}
/**
* Create initial session.json structure
*
* @param workflowId - Optional workflow ID to set as originalWorkflowId
*/
private createInitialData(workflowId?: string): SessionData {
const sessionData: SessionData = {
session: {
id: this.sessionMetadata.id,
webUrl: this.sessionMetadata.webUrl,
status: 'in-progress',
createdAt: (this.sessionMetadata as { createdAt?: string }).createdAt || formatTimestamp(),
resumeAttempts: [],
},
metrics: {
total_duration_ms: 0,
total_cost_usd: 0,
phases: {}, // Phase-level aggregations
agents: {}, // Agent-level metrics
},
};
// Set originalWorkflowId if provided (for new workspaces)
if (workflowId) {
sessionData.session.originalWorkflowId = workflowId;
}
// Only add repoPath if it exists
if (this.sessionMetadata.repoPath) {
sessionData.session.repoPath = this.sessionMetadata.repoPath;
}
return sessionData;
}
/**
* Start tracking an agent execution
*/
startAgent(agentName: string, attemptNumber: number): void {
this.activeTimers.set(agentName, {
startTime: Date.now(),
attemptNumber,
});
}
/**
* End agent execution and update metrics
*/
async endAgent(agentName: string, result: AgentEndResult): Promise<void> {
if (!this.data) {
throw new PentestError(
'MetricsTracker not initialized',
'validation',
false,
{},
ErrorCode.AGENT_EXECUTION_FAILED
);
}
// 1. Initialize agent metrics if first time seeing this agent
const existingAgent = this.data.metrics.agents[agentName];
const agent = existingAgent ?? {
status: 'in-progress' as const,
attempts: [],
final_duration_ms: 0,
total_cost_usd: 0,
};
this.data.metrics.agents[agentName] = agent;
// 2. Build attempt record with optional model/error fields
const attempt: AttemptData = {
attempt_number: result.attemptNumber,
duration_ms: result.duration_ms,
cost_usd: result.cost_usd,
success: result.success,
timestamp: formatTimestamp(),
};
if (result.model) {
attempt.model = result.model;
}
if (result.error) {
attempt.error = result.error;
}
// 3. Append attempt to history
agent.attempts.push(attempt);
// 4. Recalculate total cost across all attempts (includes failures)
agent.total_cost_usd = agent.attempts.reduce((sum, a) => sum + a.cost_usd, 0);
// 5. Update agent status based on outcome
if (result.success) {
agent.status = 'success';
agent.final_duration_ms = result.duration_ms;
// 6. Attach model and checkpoint metadata on success
if (result.model) {
agent.model = result.model;
}
if (result.checkpoint) {
agent.checkpoint = result.checkpoint;
}
} else {
if (result.isFinalAttempt) {
agent.status = 'failed';
}
}
// 7. Clear active timer
this.activeTimers.delete(agentName);
// 8. Recalculate phase and session-level aggregations
this.recalculateAggregations();
// 9. Persist to session.json
await this.save();
}
/**
* Update session status
*/
async updateSessionStatus(status: 'in-progress' | 'completed' | 'failed'): Promise<void> {
if (!this.data) return;
this.data.session.status = status;
if (status === 'completed' || status === 'failed') {
this.data.session.completedAt = formatTimestamp();
}
await this.save();
}
/**
* Add a resume attempt to the session
*
* @param workflowId - The new workflow ID for this resume attempt
* @param terminatedWorkflows - IDs of workflows that were terminated
* @param checkpointHash - Git checkpoint hash that was restored
*/
async addResumeAttempt(
workflowId: string,
terminatedWorkflows: string[],
checkpointHash?: string
): Promise<void> {
if (!this.data) {
throw new PentestError(
'MetricsTracker not initialized',
'validation',
false,
{},
ErrorCode.AGENT_EXECUTION_FAILED
);
}
// Ensure originalWorkflowId is set (backfill if missing from old sessions)
if (!this.data.session.originalWorkflowId) {
this.data.session.originalWorkflowId = this.data.session.id;
}
// Ensure resumeAttempts array exists
if (!this.data.session.resumeAttempts) {
this.data.session.resumeAttempts = [];
}
// Add new resume attempt
const resumeAttempt: ResumeAttempt = {
workflowId,
timestamp: formatTimestamp(),
};
if (terminatedWorkflows.length > 0) {
resumeAttempt.terminatedPrevious = terminatedWorkflows.join(',');
}
if (checkpointHash) {
resumeAttempt.resumedFromCheckpoint = checkpointHash;
}
this.data.session.resumeAttempts.push(resumeAttempt);
await this.save();
}
/**
* Recalculate aggregations (total duration, total cost, phases)
*/
private recalculateAggregations(): void {
if (!this.data) return;
const agents = this.data.metrics.agents;
// Only count successful agents
const successfulAgents = Object.entries(agents).filter(
([, data]) => data.status === 'success'
);
// Calculate total duration and cost
const totalDuration = successfulAgents.reduce(
(sum, [, data]) => sum + data.final_duration_ms,
0
);
const totalCost = successfulAgents.reduce((sum, [, data]) => sum + data.total_cost_usd, 0);
this.data.metrics.total_duration_ms = totalDuration;
this.data.metrics.total_cost_usd = totalCost;
// Calculate phase-level metrics
this.data.metrics.phases = this.calculatePhaseMetrics(successfulAgents);
}
/**
* Calculate phase-level metrics
*/
private calculatePhaseMetrics(
successfulAgents: Array<[string, AgentAuditMetrics]>
): Record<string, PhaseMetrics> {
const phases: Record<PhaseName, AgentAuditMetrics[]> = {
'pre-recon': [],
'recon': [],
'vulnerability-analysis': [],
'exploitation': [],
'reporting': [],
};
// Group agents by phase using imported AGENT_PHASE_MAP
for (const [agentName, agentData] of successfulAgents) {
const phase = AGENT_PHASE_MAP[agentName as AgentName];
if (phase) {
phases[phase].push(agentData);
}
}
// Calculate metrics per phase
const phaseMetrics: Record<string, PhaseMetrics> = {};
const totalDuration = this.data!.metrics.total_duration_ms;
for (const [phaseName, agentList] of Object.entries(phases)) {
if (agentList.length === 0) continue;
const phaseDuration = agentList.reduce((sum, agent) => sum + agent.final_duration_ms, 0);
const phaseCost = agentList.reduce((sum, agent) => sum + agent.total_cost_usd, 0);
phaseMetrics[phaseName] = {
duration_ms: phaseDuration,
duration_percentage: calculatePercentage(phaseDuration, totalDuration),
cost_usd: phaseCost,
agent_count: agentList.length,
};
}
return phaseMetrics;
}
/**
* Get current metrics
*/
getMetrics(): SessionData {
return JSON.parse(JSON.stringify(this.data)) as SessionData;
}
/**
* Save metrics to session.json (atomic write)
*/
private async save(): Promise<void> {
if (!this.data) return;
await atomicWrite(this.sessionJsonPath, this.data);
}
/**
* Reload metrics from disk
*/
async reload(): Promise<void> {
this.data = await readJson<SessionData>(this.sessionJsonPath);
}
}