feat: add workflow resume from workspace via --workspace flag
When a workflow is interrupted (VM crash, Ctrl+C, Docker restart), it can now be resumed by passing the workspace name. The system reads session.json to determine which agents completed, validates deliverables exist on disk, restores the git checkpoint, and skips already-completed agents. - Add --workspace CLI flag and auto-terminate conflicting workflows - Add loadResumeState, restoreGitCheckpoint, recordResumeAttempt activities - Add skip logic for all 5 pipeline phases including parallel execution - Separate sessionId (persistent directory) from workflowId (execution ID) - Track resume attempts in session.json for audit trail - Derive AgentName type from ALL_AGENTS array to eliminate duplication - Add getDeliverablePath mapping for deliverable validation
This commit is contained in:
@@ -46,6 +46,13 @@ interface PhaseMetrics {
|
||||
agent_count: number;
|
||||
}
|
||||
|
||||
export interface ResumeAttempt {
|
||||
workflowId: string;
|
||||
timestamp: string;
|
||||
terminatedPrevious?: string;
|
||||
resumedFromCheckpoint?: string;
|
||||
}
|
||||
|
||||
interface SessionData {
|
||||
session: {
|
||||
id: string;
|
||||
@@ -54,6 +61,8 @@ interface SessionData {
|
||||
status: 'in-progress' | 'completed' | 'failed';
|
||||
createdAt: string;
|
||||
completedAt?: string;
|
||||
originalWorkflowId?: string; // First workflow that created this workspace
|
||||
resumeAttempts?: ResumeAttempt[]; // Track all resume attempts
|
||||
};
|
||||
metrics: {
|
||||
total_duration_ms: number;
|
||||
@@ -95,8 +104,10 @@ export class MetricsTracker {
|
||||
|
||||
/**
|
||||
* Initialize session.json (idempotent)
|
||||
*
|
||||
* @param workflowId - Optional workflow ID to set as originalWorkflowId for new sessions
|
||||
*/
|
||||
async initialize(): Promise<void> {
|
||||
async initialize(workflowId?: string): Promise<void> {
|
||||
// Check if session.json already exists
|
||||
const exists = await fileExists(this.sessionJsonPath);
|
||||
|
||||
@@ -105,21 +116,24 @@ export class MetricsTracker {
|
||||
this.data = await readJson<SessionData>(this.sessionJsonPath);
|
||||
} else {
|
||||
// Create new session.json
|
||||
this.data = this.createInitialData();
|
||||
this.data = this.createInitialData(workflowId);
|
||||
await this.save();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create initial session.json structure
|
||||
*
|
||||
* @param workflowId - Optional workflow ID to set as originalWorkflowId
|
||||
*/
|
||||
private createInitialData(): SessionData {
|
||||
private createInitialData(workflowId?: string): SessionData {
|
||||
const sessionData: SessionData = {
|
||||
session: {
|
||||
id: this.sessionMetadata.id,
|
||||
webUrl: this.sessionMetadata.webUrl,
|
||||
status: 'in-progress',
|
||||
createdAt: (this.sessionMetadata as { createdAt?: string }).createdAt || formatTimestamp(),
|
||||
resumeAttempts: [],
|
||||
},
|
||||
metrics: {
|
||||
total_duration_ms: 0,
|
||||
@@ -128,6 +142,12 @@ export class MetricsTracker {
|
||||
agents: {}, // Agent-level metrics
|
||||
},
|
||||
};
|
||||
|
||||
// Set originalWorkflowId if provided (for new workspaces)
|
||||
if (workflowId) {
|
||||
sessionData.session.originalWorkflowId = workflowId;
|
||||
}
|
||||
|
||||
// Only add repoPath if it exists
|
||||
if (this.sessionMetadata.repoPath) {
|
||||
sessionData.session.repoPath = this.sessionMetadata.repoPath;
|
||||
@@ -229,6 +249,51 @@ export class MetricsTracker {
|
||||
await this.save();
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a resume attempt to the session
|
||||
*
|
||||
* @param workflowId - The new workflow ID for this resume attempt
|
||||
* @param terminatedWorkflows - IDs of workflows that were terminated
|
||||
* @param checkpointHash - Git checkpoint hash that was restored
|
||||
*/
|
||||
async addResumeAttempt(
|
||||
workflowId: string,
|
||||
terminatedWorkflows: string[],
|
||||
checkpointHash?: string
|
||||
): Promise<void> {
|
||||
if (!this.data) {
|
||||
throw new Error('MetricsTracker not initialized');
|
||||
}
|
||||
|
||||
// Ensure originalWorkflowId is set (backfill if missing from old sessions)
|
||||
if (!this.data.session.originalWorkflowId) {
|
||||
this.data.session.originalWorkflowId = this.data.session.id;
|
||||
}
|
||||
|
||||
// Ensure resumeAttempts array exists
|
||||
if (!this.data.session.resumeAttempts) {
|
||||
this.data.session.resumeAttempts = [];
|
||||
}
|
||||
|
||||
// Add new resume attempt
|
||||
const resumeAttempt: ResumeAttempt = {
|
||||
workflowId,
|
||||
timestamp: formatTimestamp(),
|
||||
};
|
||||
|
||||
if (terminatedWorkflows.length > 0) {
|
||||
resumeAttempt.terminatedPrevious = terminatedWorkflows.join(',');
|
||||
}
|
||||
|
||||
if (checkpointHash) {
|
||||
resumeAttempt.resumedFromCheckpoint = checkpointHash;
|
||||
}
|
||||
|
||||
this.data.session.resumeAttempts.push(resumeAttempt);
|
||||
|
||||
await this.save();
|
||||
}
|
||||
|
||||
/**
|
||||
* Recalculate aggregations (total duration, total cost, phases)
|
||||
*/
|
||||
|
||||
Reference in New Issue
Block a user