Files
trebuchet/src/temporal/activities.ts
T
ezl-keygraph dbcb4587ee fix: update session.json status on workflow completion
logWorkflowComplete wrote to workflow.log but never called
updateSessionStatus, leaving all workspaces stuck as "in-progress"
in session.json. Also derive audit path for model injection instead
of requiring explicit outputPath.
2026-02-13 22:41:07 +05:30

721 lines
23 KiB
TypeScript

// Copyright (C) 2025 Keygraph, Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License version 3
// as published by the Free Software Foundation.
/**
* Temporal activities for Shannon agent execution.
*
* Each activity wraps a single agent execution with:
* - Heartbeat loop (2s interval) to signal worker liveness
* - Git checkpoint/rollback/commit per attempt
* - Error classification for Temporal retry behavior
* - Audit session logging
*
* Temporal handles retries based on error classification:
* - Retryable: BillingError, TransientError (429, 5xx, network)
* - Non-retryable: AuthenticationError, PermissionError, ConfigurationError, etc.
*/
import { heartbeat, ApplicationFailure, Context } from '@temporalio/activity';
import chalk from 'chalk';
// Max lengths to prevent Temporal protobuf buffer overflow
const MAX_ERROR_MESSAGE_LENGTH = 2000;
const MAX_STACK_TRACE_LENGTH = 1000;
// Max retries for output validation errors (agent didn't save deliverables)
// Lower than default 50 since this is unlikely to self-heal
const MAX_OUTPUT_VALIDATION_RETRIES = 3;
/**
* Truncate error message to prevent buffer overflow in Temporal serialization.
*/
function truncateErrorMessage(message: string): string {
if (message.length <= MAX_ERROR_MESSAGE_LENGTH) {
return message;
}
return message.slice(0, MAX_ERROR_MESSAGE_LENGTH - 20) + '\n[truncated]';
}
/**
* Truncate stack trace on an ApplicationFailure to prevent buffer overflow.
*/
function truncateStackTrace(failure: ApplicationFailure): void {
if (failure.stack && failure.stack.length > MAX_STACK_TRACE_LENGTH) {
failure.stack = failure.stack.slice(0, MAX_STACK_TRACE_LENGTH) + '\n[stack truncated]';
}
}
import {
runClaudePrompt,
validateAgentOutput,
type ClaudePromptResult,
} from '../ai/claude-executor.js';
import { loadPrompt } from '../prompts/prompt-manager.js';
import { parseConfig, distributeConfig } from '../config-parser.js';
import { classifyErrorForTemporal } from '../error-handling.js';
import {
safeValidateQueueAndDeliverable,
type VulnType,
type ExploitationDecision,
} from '../queue-validation.js';
import {
createGitCheckpoint,
commitGitSuccess,
rollbackGitWorkspace,
getGitCommitHash,
} from '../utils/git-manager.js';
import { assembleFinalReport, injectModelIntoReport } from '../phases/reporting.js';
import { getPromptNameForAgent } from '../types/agents.js';
import { AuditSession } from '../audit/index.js';
import type { WorkflowSummary } from '../audit/workflow-logger.js';
import type { AgentName } from '../types/agents.js';
import { getDeliverablePath, ALL_AGENTS } from '../types/agents.js';
import type { AgentMetrics, ResumeState } from './shared.js';
import type { DistributedConfig } from '../types/config.js';
import { copyDeliverablesToAudit, type SessionMetadata, readJson, fileExists } from '../audit/utils.js';
import type { ResumeAttempt } from '../audit/metrics-tracker.js';
import { executeGitCommandWithRetry } from '../utils/git-manager.js';
import path from 'path';
import fs from 'fs/promises';
const HEARTBEAT_INTERVAL_MS = 2000; // Must be < heartbeatTimeout (10min production, 5min testing)
/**
* Input for all agent activities.
* Matches PipelineInput but with required workflowId for audit correlation.
*/
export interface ActivityInput {
webUrl: string;
repoPath: string;
configPath?: string;
outputPath?: string;
pipelineTestingMode?: boolean;
workflowId: string;
sessionId: string; // Workspace name (for resume) or workflowId (for new runs)
}
/**
* Core activity implementation.
*
* Executes a single agent with:
* 1. Heartbeat loop for worker liveness
* 2. Config loading (if configPath provided)
* 3. Audit session initialization
* 4. Prompt loading
* 5. Git checkpoint before execution
* 6. Agent execution (single attempt)
* 7. Output validation
* 8. Git commit on success, rollback on failure
* 9. Error classification for Temporal retry
*/
async function runAgentActivity(
agentName: AgentName,
input: ActivityInput
): Promise<AgentMetrics> {
const {
webUrl,
repoPath,
configPath,
outputPath,
pipelineTestingMode = false,
workflowId,
} = input;
const startTime = Date.now();
// Get attempt number from Temporal context (tracks retries automatically)
const attemptNumber = Context.current().info.attempt;
// Heartbeat loop - signals worker is alive to Temporal server
const heartbeatInterval = setInterval(() => {
const elapsed = Math.floor((Date.now() - startTime) / 1000);
heartbeat({ agent: agentName, elapsedSeconds: elapsed, attempt: attemptNumber });
}, HEARTBEAT_INTERVAL_MS);
try {
// 1. Load config (if provided)
let distributedConfig: DistributedConfig | null = null;
if (configPath) {
try {
const config = await parseConfig(configPath);
distributedConfig = distributeConfig(config);
} catch (err) {
throw new Error(`Failed to load config ${configPath}: ${err instanceof Error ? err.message : String(err)}`);
}
}
// 2. Build session metadata for audit
// Use sessionId (workspace name) for directory, workflowId for tracking
const sessionMetadata: SessionMetadata = {
id: input.sessionId,
webUrl,
repoPath,
...(outputPath && { outputPath }),
};
// 3. Initialize audit session (idempotent, safe across retries)
const auditSession = new AuditSession(sessionMetadata);
await auditSession.initialize(workflowId);
// 4. Load prompt
const promptName = getPromptNameForAgent(agentName);
const prompt = await loadPrompt(
promptName,
{ webUrl, repoPath },
distributedConfig,
pipelineTestingMode
);
// 5. Create git checkpoint before execution
await createGitCheckpoint(repoPath, agentName, attemptNumber);
await auditSession.startAgent(agentName, prompt, attemptNumber);
// 6. Execute agent (single attempt - Temporal handles retries)
const result: ClaudePromptResult = await runClaudePrompt(
prompt,
repoPath,
'', // context
agentName, // description
agentName,
chalk.cyan,
sessionMetadata,
auditSession,
attemptNumber
);
// 6.5. Sanity check: Detect spending cap that slipped through all detection layers
// Defense-in-depth: A successful agent execution should never have ≤2 turns with $0 cost
if (result.success && (result.turns ?? 0) <= 2 && (result.cost || 0) === 0) {
const resultText = result.result || '';
const looksLikeBillingError = /spending|cap|limit|budget|resets/i.test(resultText);
if (looksLikeBillingError) {
await rollbackGitWorkspace(repoPath, 'spending cap detected');
await auditSession.endAgent(agentName, {
attemptNumber,
duration_ms: result.duration,
cost_usd: 0,
success: false,
model: result.model,
error: `Spending cap likely reached: ${resultText.slice(0, 100)}`,
});
// Throw as billing error so Temporal retries with long backoff
throw new Error(`Spending cap likely reached: ${resultText.slice(0, 100)}`);
}
}
// 7. Handle execution failure
if (!result.success) {
await rollbackGitWorkspace(repoPath, 'execution failure');
await auditSession.endAgent(agentName, {
attemptNumber,
duration_ms: result.duration,
cost_usd: result.cost || 0,
success: false,
model: result.model,
error: result.error || 'Execution failed',
});
throw new Error(result.error || 'Agent execution failed');
}
// 8. Validate output
const validationPassed = await validateAgentOutput(result, agentName, repoPath);
if (!validationPassed) {
await rollbackGitWorkspace(repoPath, 'validation failure');
await auditSession.endAgent(agentName, {
attemptNumber,
duration_ms: result.duration,
cost_usd: result.cost || 0,
success: false,
model: result.model,
error: 'Output validation failed',
});
// Limit output validation retries (unlikely to self-heal)
if (attemptNumber >= MAX_OUTPUT_VALIDATION_RETRIES) {
throw ApplicationFailure.nonRetryable(
`Agent ${agentName} failed output validation after ${attemptNumber} attempts`,
'OutputValidationError',
[{ agentName, attemptNumber, elapsed: Date.now() - startTime }]
);
}
// Let Temporal retry (will be classified as OutputValidationError)
throw new Error(`Agent ${agentName} failed output validation`);
}
// 9. Success - commit and log
const commitHash = await getGitCommitHash(repoPath);
await auditSession.endAgent(agentName, {
attemptNumber,
duration_ms: result.duration,
cost_usd: result.cost || 0,
success: true,
model: result.model,
...(commitHash && { checkpoint: commitHash }),
});
await commitGitSuccess(repoPath, agentName);
// 9.5. Copy deliverables to audit-logs (non-fatal)
try {
await copyDeliverablesToAudit(sessionMetadata, repoPath);
} catch (copyErr) {
console.error(`Failed to copy deliverables to audit-logs for ${agentName}:`, copyErr);
}
// 10. Return metrics
return {
durationMs: Date.now() - startTime,
inputTokens: null, // Not currently exposed by SDK wrapper
outputTokens: null,
costUsd: result.cost ?? null,
numTurns: result.turns ?? null,
model: result.model,
};
} catch (error) {
// Rollback git workspace before Temporal retry to ensure clean state
try {
await rollbackGitWorkspace(repoPath, 'error recovery');
} catch (rollbackErr) {
// Log but don't fail - rollback is best-effort
console.error(`Failed to rollback git workspace for ${agentName}:`, rollbackErr);
}
// If error is already an ApplicationFailure (e.g., from our retry limit logic),
// re-throw it directly without re-classifying
if (error instanceof ApplicationFailure) {
throw error;
}
// Classify error for Temporal retry behavior
const classified = classifyErrorForTemporal(error);
// Truncate message to prevent protobuf buffer overflow
const rawMessage = error instanceof Error ? error.message : String(error);
const message = truncateErrorMessage(rawMessage);
if (classified.retryable) {
// Temporal will retry with configured backoff
const failure = ApplicationFailure.create({
message,
type: classified.type,
details: [{ agentName, attemptNumber, elapsed: Date.now() - startTime }],
});
truncateStackTrace(failure);
throw failure;
} else {
// Fail immediately - no retry
const failure = ApplicationFailure.nonRetryable(message, classified.type, [
{ agentName, attemptNumber, elapsed: Date.now() - startTime },
]);
truncateStackTrace(failure);
throw failure;
}
} finally {
clearInterval(heartbeatInterval);
}
}
// === Individual Agent Activity Exports ===
// Each function is a thin wrapper around runAgentActivity with the agent name.
export async function runPreReconAgent(input: ActivityInput): Promise<AgentMetrics> {
return runAgentActivity('pre-recon', input);
}
export async function runReconAgent(input: ActivityInput): Promise<AgentMetrics> {
return runAgentActivity('recon', input);
}
export async function runInjectionVulnAgent(input: ActivityInput): Promise<AgentMetrics> {
return runAgentActivity('injection-vuln', input);
}
export async function runXssVulnAgent(input: ActivityInput): Promise<AgentMetrics> {
return runAgentActivity('xss-vuln', input);
}
export async function runAuthVulnAgent(input: ActivityInput): Promise<AgentMetrics> {
return runAgentActivity('auth-vuln', input);
}
export async function runSsrfVulnAgent(input: ActivityInput): Promise<AgentMetrics> {
return runAgentActivity('ssrf-vuln', input);
}
export async function runAuthzVulnAgent(input: ActivityInput): Promise<AgentMetrics> {
return runAgentActivity('authz-vuln', input);
}
export async function runInjectionExploitAgent(input: ActivityInput): Promise<AgentMetrics> {
return runAgentActivity('injection-exploit', input);
}
export async function runXssExploitAgent(input: ActivityInput): Promise<AgentMetrics> {
return runAgentActivity('xss-exploit', input);
}
export async function runAuthExploitAgent(input: ActivityInput): Promise<AgentMetrics> {
return runAgentActivity('auth-exploit', input);
}
export async function runSsrfExploitAgent(input: ActivityInput): Promise<AgentMetrics> {
return runAgentActivity('ssrf-exploit', input);
}
export async function runAuthzExploitAgent(input: ActivityInput): Promise<AgentMetrics> {
return runAgentActivity('authz-exploit', input);
}
export async function runReportAgent(input: ActivityInput): Promise<AgentMetrics> {
return runAgentActivity('report', input);
}
/**
* Assemble the final report by concatenating exploitation evidence files.
* This must be called BEFORE runReportAgent to create the file that the report agent will modify.
*/
export async function assembleReportActivity(input: ActivityInput): Promise<void> {
const { repoPath } = input;
console.log(chalk.blue('📝 Assembling deliverables from specialist agents...'));
try {
await assembleFinalReport(repoPath);
} catch (error) {
const err = error as Error;
console.log(chalk.yellow(`⚠️ Error assembling final report: ${err.message}`));
// Don't throw - the report agent can still create content even if no exploitation files exist
}
}
/**
* Inject model metadata into the final report.
* This must be called AFTER runReportAgent to add the model information to the Executive Summary.
*/
export async function injectReportMetadataActivity(input: ActivityInput): Promise<void> {
const { repoPath, sessionId, outputPath } = input;
const effectiveOutputPath = outputPath
? path.join(outputPath, sessionId)
: path.join('./audit-logs', sessionId);
try {
await injectModelIntoReport(repoPath, effectiveOutputPath);
} catch (error) {
const err = error as Error;
console.log(chalk.yellow(`⚠️ Error injecting model into report: ${err.message}`));
// Don't throw - this is a non-critical enhancement
}
}
/**
* Check if exploitation should run for a given vulnerability type.
* Reads the vulnerability queue file and returns the decision.
*
* This activity allows the workflow to skip exploit agents entirely
* when no vulnerabilities were found, saving API calls and time.
*
* Error handling:
* - Retryable errors (missing files, invalid JSON): re-throw for Temporal retry
* - Non-retryable errors: skip exploitation gracefully
*/
export async function checkExploitationQueue(
input: ActivityInput,
vulnType: VulnType
): Promise<ExploitationDecision> {
const { repoPath } = input;
const result = await safeValidateQueueAndDeliverable(vulnType, repoPath);
if (result.success && result.data) {
const { shouldExploit, vulnerabilityCount } = result.data;
console.log(
chalk.blue(
`🔍 ${vulnType}: ${shouldExploit ? `${vulnerabilityCount} vulnerabilities found` : 'no vulnerabilities, skipping exploitation'}`
)
);
return result.data;
}
// Validation failed - check if we should retry or skip
const error = result.error;
if (error?.retryable) {
// Re-throw retryable errors so Temporal can retry the vuln agent
console.log(chalk.yellow(`⚠️ ${vulnType}: ${error.message} (retrying)`));
throw error;
}
// Non-retryable error - skip exploitation gracefully
console.log(
chalk.yellow(`⚠️ ${vulnType}: ${error?.message ?? 'Unknown error'}, skipping exploitation`)
);
return {
shouldExploit: false,
shouldRetry: false,
vulnerabilityCount: 0,
vulnType,
};
}
// === Resume Activities ===
/**
* Session.json structure for resume state loading
*/
interface SessionJson {
session: {
id: string;
webUrl: string;
repoPath?: string;
originalWorkflowId?: string;
resumeAttempts?: ResumeAttempt[];
};
metrics: {
agents: Record<string, {
status: 'in-progress' | 'success' | 'failed';
checkpoint?: string;
}>;
};
}
/**
* Load resume state from an existing workspace.
* Validates workspace exists, URL matches, and determines which agents to skip.
*
* @throws ApplicationFailure.nonRetryable if workspace not found or URL mismatch
*/
export async function loadResumeState(
workspaceName: string,
expectedUrl: string,
expectedRepoPath: string
): Promise<ResumeState> {
const sessionPath = path.join('./audit-logs', workspaceName, 'session.json');
// Validate workspace exists
const exists = await fileExists(sessionPath);
if (!exists) {
throw ApplicationFailure.nonRetryable(
`Workspace not found: ${workspaceName}\nExpected path: ${sessionPath}`,
'WorkspaceNotFoundError'
);
}
// Load session.json
let session: SessionJson;
try {
session = await readJson<SessionJson>(sessionPath);
} catch (error) {
const errorMsg = error instanceof Error ? error.message : String(error);
throw ApplicationFailure.nonRetryable(
`Corrupted session.json in workspace ${workspaceName}: ${errorMsg}`,
'CorruptedSessionError'
);
}
// Validate URL matches
if (session.session.webUrl !== expectedUrl) {
throw ApplicationFailure.nonRetryable(
`URL mismatch with workspace\n Workspace URL: ${session.session.webUrl}\n Provided URL: ${expectedUrl}`,
'URLMismatchError'
);
}
// Find completed agents (status === 'success' AND deliverable exists)
const completedAgents: string[] = [];
const agents = session.metrics.agents;
for (const agentName of ALL_AGENTS) {
const agentData = agents[agentName];
// Skip if agent never ran or didn't succeed
if (!agentData || agentData.status !== 'success') {
continue;
}
// Validate deliverable exists
const deliverablePath = getDeliverablePath(agentName, expectedRepoPath);
const deliverableExists = await fileExists(deliverablePath);
if (!deliverableExists) {
console.log(
chalk.yellow(`Agent ${agentName} shows success but deliverable missing, will re-run`)
);
continue;
}
// Agent completed successfully and deliverable exists
completedAgents.push(agentName);
}
// Find latest checkpoint from completed agents
const checkpoints = completedAgents
.map((name) => agents[name]?.checkpoint)
.filter((hash): hash is string => hash != null);
if (checkpoints.length === 0) {
throw ApplicationFailure.nonRetryable(
`No successful agent checkpoints found in workspace ${workspaceName}`,
'NoCheckpointsError'
);
}
// Find most recent commit among checkpoints
const checkpointHash = await findLatestCommit(expectedRepoPath, checkpoints);
const originalWorkflowId = session.session.originalWorkflowId || session.session.id;
console.log(chalk.cyan(`=== RESUME STATE ===`));
console.log(`Workspace: ${workspaceName}`);
console.log(`Completed agents: ${completedAgents.length}`);
console.log(`Checkpoint: ${checkpointHash}`);
return {
workspaceName,
originalUrl: session.session.webUrl,
completedAgents,
checkpointHash,
originalWorkflowId,
};
}
/**
* Find the most recent commit among a list of commit hashes.
* Uses git rev-list to determine which commit is newest.
*/
async function findLatestCommit(repoPath: string, commitHashes: string[]): Promise<string> {
if (commitHashes.length === 1) {
const hash = commitHashes[0];
if (!hash) {
throw new Error('Empty commit hash in array');
}
return hash;
}
// Use git rev-list to find the most recent commit among all hashes
const result = await executeGitCommandWithRetry(
['git', 'rev-list', '--max-count=1', ...commitHashes],
repoPath,
'find latest commit'
);
return result.stdout.trim();
}
/**
* Restore git workspace to a checkpoint and clean up partial deliverables.
*
* @param repoPath - Repository path
* @param checkpointHash - Git commit hash to reset to
* @param incompleteAgents - Agents that didn't complete (will have deliverables cleaned up)
*/
export async function restoreGitCheckpoint(
repoPath: string,
checkpointHash: string,
incompleteAgents: AgentName[]
): Promise<void> {
console.log(chalk.blue(`Restoring git workspace to ${checkpointHash}...`));
// Git reset to checkpoint
await executeGitCommandWithRetry(
['git', 'reset', '--hard', checkpointHash],
repoPath,
'reset to checkpoint for resume'
);
await executeGitCommandWithRetry(
['git', 'clean', '-fd'],
repoPath,
'clean untracked files for resume'
);
// Explicitly delete deliverables for incomplete agents
for (const agentName of incompleteAgents) {
const deliverablePath = getDeliverablePath(agentName, repoPath);
try {
const exists = await fileExists(deliverablePath);
if (exists) {
console.log(chalk.yellow(`Cleaning partial deliverable: ${agentName}`));
await fs.unlink(deliverablePath);
}
} catch (error) {
// Non-fatal, just log
console.log(chalk.gray(`Note: Failed to delete ${deliverablePath}: ${error}`));
}
}
console.log(chalk.green('Workspace restored to clean state'));
}
/**
* Record a resume attempt in session.json.
* Tracks the new workflow ID, terminated workflows, and checkpoint hash.
*/
export async function recordResumeAttempt(
input: ActivityInput,
terminatedWorkflows: string[],
checkpointHash: string
): Promise<void> {
const { webUrl, repoPath, outputPath, sessionId, workflowId } = input;
const sessionMetadata: SessionMetadata = {
id: sessionId,
webUrl,
repoPath,
...(outputPath && { outputPath }),
};
const auditSession = new AuditSession(sessionMetadata);
await auditSession.initialize();
await auditSession.addResumeAttempt(workflowId, terminatedWorkflows, checkpointHash);
}
/**
* Log phase transition to the unified workflow log.
* Called at phase boundaries for per-workflow logging.
*/
export async function logPhaseTransition(
input: ActivityInput,
phase: string,
event: 'start' | 'complete'
): Promise<void> {
const { webUrl, repoPath, outputPath, sessionId, workflowId } = input;
const sessionMetadata: SessionMetadata = {
id: sessionId,
webUrl,
repoPath,
...(outputPath && { outputPath }),
};
const auditSession = new AuditSession(sessionMetadata);
await auditSession.initialize(workflowId);
if (event === 'start') {
await auditSession.logPhaseStart(phase);
} else {
await auditSession.logPhaseComplete(phase);
}
}
/**
* Log workflow completion with full summary to the unified workflow log.
* Called at the end of the workflow to write a summary breakdown.
*/
export async function logWorkflowComplete(
input: ActivityInput,
summary: WorkflowSummary
): Promise<void> {
const { webUrl, repoPath, outputPath, sessionId, workflowId } = input;
const sessionMetadata: SessionMetadata = {
id: sessionId,
webUrl,
repoPath,
...(outputPath && { outputPath }),
};
const auditSession = new AuditSession(sessionMetadata);
await auditSession.initialize(workflowId);
await auditSession.updateSessionStatus(summary.status);
await auditSession.logWorkflowComplete(summary);
}