fix: store checkpoint as success commit hash and show cumulative metrics
- Swap commitGitSuccess/getGitCommitHash order so checkpoint in session.json points to the success commit (which contains deliverables) instead of the pre-agent marker commit - Simplify restoreGitCheckpoint: git reset --hard now naturally preserves completed agent deliverables, removing the in-memory backup/restore - Show cumulative cost/duration in workflow.log from session.json - Fill in per-agent metrics for skipped agents in workflow.log breakdown - Display cumulative cost in client output for resume runs
This commit is contained in:
@@ -18,6 +18,12 @@ git clone https://github.com/org/repo.git ./repos/my-repo
|
|||||||
./shannon start URL=<url> REPO=my-repo
|
./shannon start URL=<url> REPO=my-repo
|
||||||
./shannon start URL=<url> REPO=my-repo CONFIG=./configs/my-config.yaml
|
./shannon start URL=<url> REPO=my-repo CONFIG=./configs/my-config.yaml
|
||||||
|
|
||||||
|
# Workspaces & Resume
|
||||||
|
./shannon start URL=<url> REPO=my-repo WORKSPACE=my-audit # New named workspace
|
||||||
|
./shannon start URL=<url> REPO=my-repo WORKSPACE=my-audit # Resume (same command)
|
||||||
|
./shannon start URL=<url> REPO=my-repo WORKSPACE=<auto-name> # Resume auto-named run
|
||||||
|
./shannon workspaces # List all workspaces
|
||||||
|
|
||||||
# Monitor
|
# Monitor
|
||||||
./shannon logs # Real-time worker logs
|
./shannon logs # Real-time worker logs
|
||||||
./shannon query ID=<workflow-id> # Query workflow progress
|
./shannon query ID=<workflow-id> # Query workflow progress
|
||||||
@@ -31,7 +37,7 @@ git clone https://github.com/org/repo.git ./repos/my-repo
|
|||||||
npm run build
|
npm run build
|
||||||
```
|
```
|
||||||
|
|
||||||
**Options:** `CONFIG=<file>` (YAML config), `OUTPUT=<path>` (default: `./audit-logs/`), `PIPELINE_TESTING=true` (minimal prompts, 10s retries), `REBUILD=true` (force Docker rebuild), `ROUTER=true` (multi-model routing via [claude-code-router](https://github.com/musistudio/claude-code-router))
|
**Options:** `CONFIG=<file>` (YAML config), `OUTPUT=<path>` (default: `./audit-logs/`), `WORKSPACE=<name>` (named workspace; auto-resumes if exists), `PIPELINE_TESTING=true` (minimal prompts, 10s retries), `REBUILD=true` (force Docker rebuild), `ROUTER=true` (multi-model routing via [claude-code-router](https://github.com/musistudio/claude-code-router))
|
||||||
|
|
||||||
## Architecture
|
## Architecture
|
||||||
|
|
||||||
@@ -67,6 +73,7 @@ Durable workflow orchestration with crash recovery, queryable progress, intellig
|
|||||||
- **SDK Integration** — Uses `@anthropic-ai/claude-agent-sdk` with `maxTurns: 10_000` and `bypassPermissions` mode. Playwright MCP for browser automation, TOTP generation via MCP tool. Login flow template at `prompts/shared/login-instructions.txt` supports form, SSO, API, and basic auth
|
- **SDK Integration** — Uses `@anthropic-ai/claude-agent-sdk` with `maxTurns: 10_000` and `bypassPermissions` mode. Playwright MCP for browser automation, TOTP generation via MCP tool. Login flow template at `prompts/shared/login-instructions.txt` supports form, SSO, API, and basic auth
|
||||||
- **Audit System** — Crash-safe append-only logging in `audit-logs/{hostname}_{sessionId}/`. Tracks session metrics, per-agent logs, prompts, and deliverables
|
- **Audit System** — Crash-safe append-only logging in `audit-logs/{hostname}_{sessionId}/`. Tracks session metrics, per-agent logs, prompts, and deliverables
|
||||||
- **Deliverables** — Saved to `deliverables/` in the target repo via the `save_deliverable` MCP tool
|
- **Deliverables** — Saved to `deliverables/` in the target repo via the `save_deliverable` MCP tool
|
||||||
|
- **Workspaces & Resume** — Named workspaces via `WORKSPACE=<name>` or auto-named from URL+timestamp. Resume passes `--workspace` to the Temporal client (`src/temporal/client.ts`), which loads `session.json` to detect completed agents. `loadResumeState()` in `src/temporal/activities.ts` validates deliverable existence, restores git checkpoints, and cleans up incomplete deliverables. Workspace listing via `src/temporal/workspaces.ts`
|
||||||
|
|
||||||
## Development Notes
|
## Development Notes
|
||||||
|
|
||||||
|
|||||||
@@ -85,6 +85,7 @@ Shannon is available in two editions:
|
|||||||
- [Monitoring Progress](#monitoring-progress)
|
- [Monitoring Progress](#monitoring-progress)
|
||||||
- [Stopping Shannon](#stopping-shannon)
|
- [Stopping Shannon](#stopping-shannon)
|
||||||
- [Usage Examples](#usage-examples)
|
- [Usage Examples](#usage-examples)
|
||||||
|
- [Workspaces and Resuming](#workspaces-and-resuming)
|
||||||
- [Configuration (Optional)](#configuration-optional)
|
- [Configuration (Optional)](#configuration-optional)
|
||||||
- [[EXPERIMENTAL - UNSUPPORTED] Router Mode (Alternative Providers)](#experimental---unsupported-router-mode-alternative-providers)
|
- [[EXPERIMENTAL - UNSUPPORTED] Router Mode (Alternative Providers)](#experimental---unsupported-router-mode-alternative-providers)
|
||||||
- [Output and Results](#output-and-results)
|
- [Output and Results](#output-and-results)
|
||||||
@@ -167,8 +168,41 @@ open http://localhost:8233
|
|||||||
|
|
||||||
# Custom output directory
|
# Custom output directory
|
||||||
./shannon start URL=https://example.com REPO=repo-name OUTPUT=./my-reports
|
./shannon start URL=https://example.com REPO=repo-name OUTPUT=./my-reports
|
||||||
|
|
||||||
|
# Named workspace
|
||||||
|
./shannon start URL=https://example.com REPO=repo-name WORKSPACE=q1-audit
|
||||||
|
|
||||||
|
# List all workspaces
|
||||||
|
./shannon workspaces
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Workspaces and Resuming
|
||||||
|
|
||||||
|
Shannon supports **workspaces** that allow you to resume interrupted or failed runs without re-running completed agents.
|
||||||
|
|
||||||
|
**How it works:**
|
||||||
|
- Every run creates a workspace in `audit-logs/` (auto-named by default, e.g. `example-com_shannon-1771007534808`)
|
||||||
|
- Use `WORKSPACE=<name>` to give your run a custom name for easier reference
|
||||||
|
- To resume any run, pass its workspace name via `WORKSPACE=` — Shannon detects which agents completed successfully and picks up where it left off
|
||||||
|
- Each agent's progress is checkpointed via git commits, so resumed runs start from a clean, validated state
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Start with a named workspace
|
||||||
|
./shannon start URL=https://example.com REPO=repo-name WORKSPACE=my-audit
|
||||||
|
|
||||||
|
# Resume the same workspace (skips completed agents)
|
||||||
|
./shannon start URL=https://example.com REPO=repo-name WORKSPACE=my-audit
|
||||||
|
|
||||||
|
# Resume an auto-named workspace from a previous run
|
||||||
|
./shannon start URL=https://example.com REPO=repo-name WORKSPACE=example-com_shannon-1771007534808
|
||||||
|
|
||||||
|
# List all workspaces and their status
|
||||||
|
./shannon workspaces
|
||||||
|
```
|
||||||
|
|
||||||
|
> [!NOTE]
|
||||||
|
> The `URL` must match the original workspace URL when resuming. Shannon will reject mismatched URLs to prevent cross-target contamination.
|
||||||
|
|
||||||
### Prepare Your Repository
|
### Prepare Your Repository
|
||||||
|
|
||||||
Shannon expects target repositories to be placed under the `./repos/` directory at the project root. The `REPO` flag refers to a folder name inside `./repos/`. Copy the repository you want to scan into `./repos/`, or clone it directly there:
|
Shannon expects target repositories to be placed under the `./repos/` directory at the project root. The `REPO` flag refers to a folder name inside `./repos/`. Copy the repository you want to scan into `./repos/`, or clone it directly there:
|
||||||
|
|||||||
@@ -246,7 +246,8 @@ async function runAgentActivity(
|
|||||||
throw new Error(`Agent ${agentName} failed output validation`);
|
throw new Error(`Agent ${agentName} failed output validation`);
|
||||||
}
|
}
|
||||||
|
|
||||||
// 9. Success - commit and log
|
// 9. Success - commit deliverables, then capture checkpoint hash
|
||||||
|
await commitGitSuccess(repoPath, agentName);
|
||||||
const commitHash = await getGitCommitHash(repoPath);
|
const commitHash = await getGitCommitHash(repoPath);
|
||||||
await auditSession.endAgent(agentName, {
|
await auditSession.endAgent(agentName, {
|
||||||
attemptNumber,
|
attemptNumber,
|
||||||
@@ -256,7 +257,6 @@ async function runAgentActivity(
|
|||||||
model: result.model,
|
model: result.model,
|
||||||
...(commitHash && { checkpoint: commitHash }),
|
...(commitHash && { checkpoint: commitHash }),
|
||||||
});
|
});
|
||||||
await commitGitSuccess(repoPath, agentName);
|
|
||||||
|
|
||||||
// 10. Return metrics
|
// 10. Return metrics
|
||||||
return {
|
return {
|
||||||
@@ -606,7 +606,8 @@ export async function restoreGitCheckpoint(
|
|||||||
): Promise<void> {
|
): Promise<void> {
|
||||||
console.log(chalk.blue(`Restoring git workspace to ${checkpointHash}...`));
|
console.log(chalk.blue(`Restoring git workspace to ${checkpointHash}...`));
|
||||||
|
|
||||||
// Git reset to checkpoint
|
// Checkpoint hash points to the success commit (after commitGitSuccess),
|
||||||
|
// so git reset --hard naturally preserves all completed agent deliverables.
|
||||||
await executeGitCommandWithRetry(
|
await executeGitCommandWithRetry(
|
||||||
['git', 'reset', '--hard', checkpointHash],
|
['git', 'reset', '--hard', checkpointHash],
|
||||||
repoPath,
|
repoPath,
|
||||||
@@ -618,8 +619,7 @@ export async function restoreGitCheckpoint(
|
|||||||
'clean untracked files for resume'
|
'clean untracked files for resume'
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// Clean up any partial deliverables from incomplete agents
|
||||||
// Explicitly delete deliverables for incomplete agents
|
|
||||||
for (const agentName of incompleteAgents) {
|
for (const agentName of incompleteAgents) {
|
||||||
const deliverablePath = getDeliverablePath(agentName, repoPath);
|
const deliverablePath = getDeliverablePath(agentName, repoPath);
|
||||||
try {
|
try {
|
||||||
@@ -629,7 +629,6 @@ export async function restoreGitCheckpoint(
|
|||||||
await fs.unlink(deliverablePath);
|
await fs.unlink(deliverablePath);
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
// Non-fatal, just log
|
|
||||||
console.log(chalk.gray(`Note: Failed to delete ${deliverablePath}: ${error}`));
|
console.log(chalk.gray(`Note: Failed to delete ${deliverablePath}: ${error}`));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -709,7 +708,37 @@ export async function logWorkflowComplete(
|
|||||||
const auditSession = new AuditSession(sessionMetadata);
|
const auditSession = new AuditSession(sessionMetadata);
|
||||||
await auditSession.initialize(workflowId);
|
await auditSession.initialize(workflowId);
|
||||||
await auditSession.updateSessionStatus(summary.status);
|
await auditSession.updateSessionStatus(summary.status);
|
||||||
await auditSession.logWorkflowComplete(summary);
|
|
||||||
|
// Use cumulative metrics from session.json (includes all resume attempts)
|
||||||
|
const sessionData = await auditSession.getMetrics() as {
|
||||||
|
metrics: {
|
||||||
|
total_duration_ms: number;
|
||||||
|
total_cost_usd: number;
|
||||||
|
agents: Record<string, { final_duration_ms: number; total_cost_usd: number }>;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
// Fill in metrics for skipped agents (completed in previous runs)
|
||||||
|
const agentMetrics = { ...summary.agentMetrics };
|
||||||
|
for (const agentName of summary.completedAgents) {
|
||||||
|
if (!agentMetrics[agentName]) {
|
||||||
|
const agentData = sessionData.metrics.agents[agentName];
|
||||||
|
if (agentData) {
|
||||||
|
agentMetrics[agentName] = {
|
||||||
|
durationMs: agentData.final_duration_ms,
|
||||||
|
costUsd: agentData.total_cost_usd,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const cumulativeSummary: WorkflowSummary = {
|
||||||
|
...summary,
|
||||||
|
totalDurationMs: sessionData.metrics.total_duration_ms,
|
||||||
|
totalCostUsd: sessionData.metrics.total_cost_usd,
|
||||||
|
agentMetrics,
|
||||||
|
};
|
||||||
|
await auditSession.logWorkflowComplete(cumulativeSummary);
|
||||||
|
|
||||||
// Copy all deliverables to audit-logs once at workflow end (non-fatal)
|
// Copy all deliverables to audit-logs once at workflow end (non-fatal)
|
||||||
try {
|
try {
|
||||||
|
|||||||
+16
-1
@@ -46,6 +46,9 @@ interface SessionJson {
|
|||||||
originalWorkflowId?: string;
|
originalWorkflowId?: string;
|
||||||
resumeAttempts?: Array<{ workflowId: string }>;
|
resumeAttempts?: Array<{ workflowId: string }>;
|
||||||
};
|
};
|
||||||
|
metrics: {
|
||||||
|
total_cost_usd: number;
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
dotenv.config();
|
dotenv.config();
|
||||||
@@ -356,7 +359,19 @@ async function startPipeline(): Promise<void> {
|
|||||||
console.log(chalk.gray(`Duration: ${Math.floor(result.summary.totalDurationMs / 1000)}s`));
|
console.log(chalk.gray(`Duration: ${Math.floor(result.summary.totalDurationMs / 1000)}s`));
|
||||||
console.log(chalk.gray(`Agents completed: ${result.summary.agentCount}`));
|
console.log(chalk.gray(`Agents completed: ${result.summary.agentCount}`));
|
||||||
console.log(chalk.gray(`Total turns: ${result.summary.totalTurns}`));
|
console.log(chalk.gray(`Total turns: ${result.summary.totalTurns}`));
|
||||||
console.log(chalk.gray(`Total cost: $${result.summary.totalCostUsd.toFixed(4)}`));
|
console.log(chalk.gray(`Run cost: $${result.summary.totalCostUsd.toFixed(4)}`));
|
||||||
|
|
||||||
|
// Show cumulative cost from session.json (includes all resume attempts)
|
||||||
|
if (isResume) {
|
||||||
|
try {
|
||||||
|
const session = await readJson<SessionJson>(
|
||||||
|
path.join('./audit-logs', sessionId, 'session.json')
|
||||||
|
);
|
||||||
|
console.log(chalk.gray(`Cumulative cost: $${session.metrics.total_cost_usd.toFixed(4)}`));
|
||||||
|
} catch {
|
||||||
|
// Non-fatal, skip cumulative cost display
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
clearInterval(progressInterval);
|
clearInterval(progressInterval);
|
||||||
|
|||||||
Reference in New Issue
Block a user