[codex] Bound productivity review recovery loops (#4948)

## Thinking Path

> - Paperclip orchestrates AI agents for zero-human companies.
> - The heartbeat/productivity review subsystem detects when assigned
work is likely stuck or churning.
> - Productivity reviews are useful, but repeated reconciliation can
create noisy refresh comments or repeated review issues around the same
source issue.
> - That makes manager follow-up harder because the signal can get
buried under duplicate review activity.
> - This pull request bounds productivity review refreshes and creation
loops while preserving the existing escalation path.
> - The benefit is a quieter recovery loop that still surfaces stuck or
high-churn work for manager attention.

## What Changed

- Added refresh throttling for open productivity review issues,
including a one-hour default interval and a maximum of three refresh
comments per open review.
- Added a rolling 24-hour creation cap so completed/closed reviews
cannot immediately recreate review issues indefinitely for the same
source issue.
- Excluded cancelled productivity reviews from the creation cap so
manager cancellations do not silently suppress future legitimate
reviews.
- Preserved productivity review timestamps in deterministic test paths
and added targeted coverage for immediate refresh suppression, refresh
caps, creation caps, and cancelled-review exclusion.

## Verification

- `pnpm run preflight:workspace-links && pnpm exec vitest run
server/src/__tests__/productivity-review-service.test.ts`
- `pnpm exec vitest run
server/src/__tests__/productivity-review-service.test.ts`
- Greptile Review: 5/5 on commit
`bcf25832d0ffae25890b2ee7eed112d1c2d114fe` with review threads resolved.
- GitHub PR checks passed on the latest head: `policy`, `verify`, `e2e`,
`Greptile Review`, and `security/snyk (cryppadotta)`.
- Verified the branch is rebased onto `public-gh/master` with no
conflicts.
- Verified the diff does not include `pnpm-lock.yaml`, database schema
changes, or migrations.

## Risks

- Low-to-medium risk: this changes automation cadence for productivity
reviews. A truly stuck issue may receive fewer repeated refresh
comments, but the original review issue remains open and assigned for
manager action.
- No migration risk: this is server logic and tests only.

> Checked [`ROADMAP.md`](ROADMAP.md) for overlapping planned core work;
this is a targeted recovery-loop fix and does not add a new roadmap
feature.

## Model Used

- OpenAI Codex coding agent, GPT-5 model family, tool-using software
engineering mode. Exact context window is not exposed in this runtime.

## Checklist

- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable
- [x] If this change affects the UI, I have included before/after
screenshots (not applicable; server-only change)
- [x] I have updated relevant documentation to reflect my changes (not
applicable; no user-facing docs or commands changed)
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge

---------

Co-authored-by: Paperclip <noreply@paperclip.ing>
This commit is contained in:
Dotta
2026-05-01 08:32:04 -05:00
committed by GitHub
parent d2dd759caa
commit 42a299fb9d
2 changed files with 266 additions and 11 deletions
@@ -16,7 +16,10 @@ import {
} from "./helpers/embedded-postgres.js";
import { MAX_ISSUE_REQUEST_DEPTH } from "@paperclipai/shared";
import {
DEFAULT_PRODUCTIVITY_REVIEW_MAX_REFRESH_COMMENTS,
DEFAULT_PRODUCTIVITY_REVIEW_NO_COMMENT_STREAK_RUNS,
DEFAULT_PRODUCTIVITY_REVIEW_REFRESH_INTERVAL_MS,
PRODUCTIVITY_REVIEW_REFRESH_COMMENT_PREFIX,
PRODUCTIVITY_REVIEW_ORIGIN_KIND,
productivityReviewService,
} from "../services/productivity-review.ts";
@@ -165,7 +168,18 @@ describeEmbeddedPostgres("productivity review service", () => {
.orderBy(issues.createdAt);
}
it("creates exactly one manager-assigned review for a no-comment run streak and refreshes it idempotently", async () => {
async function listRefreshComments(reviewIssueId: string) {
return db
.select()
.from(issueComments)
.where(and(
eq(issueComments.issueId, reviewIssueId),
sql`${issueComments.body} like ${`${PRODUCTIVITY_REVIEW_REFRESH_COMMENT_PREFIX}%`}`,
))
.orderBy(issueComments.createdAt);
}
it("creates exactly one manager-assigned review for a no-comment run streak and rate-limits immediate refresh", async () => {
const now = new Date("2026-04-28T12:00:00.000Z");
const seeded = await seedAssignedIssue();
await insertRuns({
@@ -181,7 +195,8 @@ describeEmbeddedPostgres("productivity review service", () => {
const second = await service.reconcileProductivityReviews({ now, companyId: seeded.companyId });
expect(first.created).toBe(1);
expect(second.updated).toBe(1);
expect(second.updated).toBe(0);
expect(second.existing).toBe(1);
const reviews = await listProductivityReviews(seeded.companyId);
expect(reviews).toHaveLength(1);
expect(reviews[0]?.parentId).toBe(seeded.issueId);
@@ -191,11 +206,134 @@ describeEmbeddedPostgres("productivity review service", () => {
expect(reviews[0]?.description).toContain("Primary trigger: `no_comment_streak`");
expect(reviews[0]?.description).toContain("No-comment completed-run streak: 10");
const comments = await db
.select()
.from(issueComments)
.where(eq(issueComments.issueId, reviews[0]!.id));
expect(comments.some((comment) => comment.body.includes("Productivity review evidence refreshed"))).toBe(true);
expect(await listRefreshComments(reviews[0]!.id)).toHaveLength(0);
});
it("refreshes open productivity reviews only once per interval and caps refresh comments", async () => {
const now = new Date("2026-04-28T12:00:00.000Z");
const seeded = await seedAssignedIssue();
await insertRuns({
companyId: seeded.companyId,
agentId: seeded.coderId,
issueId: seeded.issueId,
count: DEFAULT_PRODUCTIVITY_REVIEW_NO_COMMENT_STREAK_RUNS,
now,
});
const service = productivityReviewService(db);
await service.reconcileProductivityReviews({ now, companyId: seeded.companyId });
const [review] = await listProductivityReviews(seeded.companyId);
const firstRefreshAt = new Date(now.getTime() + DEFAULT_PRODUCTIVITY_REVIEW_REFRESH_INTERVAL_MS);
const firstRefresh = await service.reconcileProductivityReviews({
now: firstRefreshAt,
companyId: seeded.companyId,
});
const tooSoonRefresh = await service.reconcileProductivityReviews({
now: new Date(firstRefreshAt.getTime() + 30 * 60 * 1000),
companyId: seeded.companyId,
});
await service.reconcileProductivityReviews({
now: new Date(firstRefreshAt.getTime() + DEFAULT_PRODUCTIVITY_REVIEW_REFRESH_INTERVAL_MS),
companyId: seeded.companyId,
});
await service.reconcileProductivityReviews({
now: new Date(firstRefreshAt.getTime() + 2 * DEFAULT_PRODUCTIVITY_REVIEW_REFRESH_INTERVAL_MS),
companyId: seeded.companyId,
});
const cappedRefresh = await service.reconcileProductivityReviews({
now: new Date(firstRefreshAt.getTime() + 3 * DEFAULT_PRODUCTIVITY_REVIEW_REFRESH_INTERVAL_MS),
companyId: seeded.companyId,
});
expect(firstRefresh.updated).toBe(1);
expect(tooSoonRefresh.updated).toBe(0);
expect(tooSoonRefresh.existing).toBe(1);
expect(cappedRefresh.updated).toBe(0);
expect(cappedRefresh.existing).toBe(1);
expect(await listRefreshComments(review!.id)).toHaveLength(DEFAULT_PRODUCTIVITY_REVIEW_MAX_REFRESH_COMMENTS);
});
it("caps productivity review creation per source issue in the rolling creation window", async () => {
const now = new Date("2026-04-28T12:00:00.000Z");
const seeded = await seedAssignedIssue();
await insertRuns({
companyId: seeded.companyId,
agentId: seeded.coderId,
issueId: seeded.issueId,
count: DEFAULT_PRODUCTIVITY_REVIEW_NO_COMMENT_STREAK_RUNS,
now,
});
await db.insert(issues).values(
[8, 9, 10].map((hoursAgo, index) => {
const createdAt = new Date(now.getTime() - hoursAgo * 60 * 60 * 1000);
return {
id: randomUUID(),
companyId: seeded.companyId,
title: `Completed productivity review ${index + 1}`,
status: "done",
priority: "high",
originKind: PRODUCTIVITY_REVIEW_ORIGIN_KIND,
originId: seeded.issueId,
originFingerprint: `productivity-review:${seeded.issueId}`,
parentId: seeded.issueId,
issueNumber: index + 2,
identifier: `${seeded.issuePrefix}-${index + 2}`,
createdAt,
updatedAt: createdAt,
};
}),
);
const result = await productivityReviewService(db).reconcileProductivityReviews({
now,
companyId: seeded.companyId,
});
expect(result.created).toBe(0);
expect(result.creationCapped).toBe(1);
expect(await listProductivityReviews(seeded.companyId)).toHaveLength(3);
});
it("does not count cancelled productivity reviews toward the creation cap", async () => {
const now = new Date("2026-04-28T12:00:00.000Z");
const seeded = await seedAssignedIssue();
await insertRuns({
companyId: seeded.companyId,
agentId: seeded.coderId,
issueId: seeded.issueId,
count: DEFAULT_PRODUCTIVITY_REVIEW_NO_COMMENT_STREAK_RUNS,
now,
});
await db.insert(issues).values(
[8, 9, 10].map((hoursAgo, index) => {
const createdAt = new Date(now.getTime() - hoursAgo * 60 * 60 * 1000);
return {
id: randomUUID(),
companyId: seeded.companyId,
title: `Cancelled productivity review ${index + 1}`,
status: "cancelled",
priority: "high",
originKind: PRODUCTIVITY_REVIEW_ORIGIN_KIND,
originId: seeded.issueId,
originFingerprint: `productivity-review:${seeded.issueId}`,
parentId: seeded.issueId,
issueNumber: index + 2,
identifier: `${seeded.issuePrefix}-${index + 2}`,
createdAt,
updatedAt: createdAt,
};
}),
);
const result = await productivityReviewService(db).reconcileProductivityReviews({
now,
companyId: seeded.companyId,
});
expect(result.created).toBe(1);
expect(result.creationCapped).toBe(0);
expect(await listProductivityReviews(seeded.companyId)).toHaveLength(4);
});
it("creates a long-active review without enabling a continuation hold", async () => {
+121 -4
View File
@@ -22,12 +22,17 @@ export const DEFAULT_PRODUCTIVITY_REVIEW_LONG_ACTIVE_HOURS = 6;
export const DEFAULT_PRODUCTIVITY_REVIEW_HIGH_CHURN_HOURLY = 10;
export const DEFAULT_PRODUCTIVITY_REVIEW_HIGH_CHURN_SIX_HOURS = 30;
export const DEFAULT_PRODUCTIVITY_REVIEW_RESOLVED_SNOOZE_MS = 6 * 60 * 60 * 1000;
export const DEFAULT_PRODUCTIVITY_REVIEW_REFRESH_INTERVAL_MS = 60 * 60 * 1000;
export const DEFAULT_PRODUCTIVITY_REVIEW_MAX_REFRESH_COMMENTS = 3;
export const DEFAULT_PRODUCTIVITY_REVIEW_CREATION_WINDOW_MS = 24 * 60 * 60 * 1000;
export const DEFAULT_PRODUCTIVITY_REVIEW_MAX_CREATIONS_PER_WINDOW = 3;
const TERMINAL_RUN_STATUSES = ["succeeded", "failed", "cancelled", "timed_out"] as const;
const ACTIVE_RUN_STATUSES = ["queued", "running", "scheduled_retry"] as const;
const MAX_CANDIDATE_ISSUES = 250;
const MAX_RUNS_FOR_STREAK = 100;
const MAX_PARENT_WALK_DEPTH = 25;
export const PRODUCTIVITY_REVIEW_REFRESH_COMMENT_PREFIX = "Productivity review evidence refreshed.";
type IssueRow = typeof issues.$inferSelect;
type AgentRow = typeof agents.$inferSelect;
@@ -40,6 +45,10 @@ type ProductivityReviewThresholds = {
highChurnHourly: number;
highChurnSixHours: number;
resolvedSnoozeMs: number;
refreshIntervalMs: number;
maxRefreshComments: number;
creationWindowMs: number;
maxCreationsPerWindow: number;
};
type ProductivityReviewEvidence = {
@@ -120,6 +129,11 @@ function readPositiveInteger(value: number, fallback: number) {
return Number.isFinite(value) && value > 0 ? Math.floor(value) : fallback;
}
function coerceDate(value: Date | string | null | undefined) {
if (!value) return null;
return value instanceof Date ? value : new Date(value);
}
function buildThresholds(overrides?: Partial<ProductivityReviewThresholds>): ProductivityReviewThresholds {
return {
noCommentStreakRuns: readPositiveInteger(
@@ -142,6 +156,22 @@ function buildThresholds(overrides?: Partial<ProductivityReviewThresholds>): Pro
overrides?.resolvedSnoozeMs ?? DEFAULT_PRODUCTIVITY_REVIEW_RESOLVED_SNOOZE_MS,
DEFAULT_PRODUCTIVITY_REVIEW_RESOLVED_SNOOZE_MS,
),
refreshIntervalMs: readPositiveInteger(
overrides?.refreshIntervalMs ?? DEFAULT_PRODUCTIVITY_REVIEW_REFRESH_INTERVAL_MS,
DEFAULT_PRODUCTIVITY_REVIEW_REFRESH_INTERVAL_MS,
),
maxRefreshComments: readPositiveInteger(
overrides?.maxRefreshComments ?? DEFAULT_PRODUCTIVITY_REVIEW_MAX_REFRESH_COMMENTS,
DEFAULT_PRODUCTIVITY_REVIEW_MAX_REFRESH_COMMENTS,
),
creationWindowMs: readPositiveInteger(
overrides?.creationWindowMs ?? DEFAULT_PRODUCTIVITY_REVIEW_CREATION_WINDOW_MS,
DEFAULT_PRODUCTIVITY_REVIEW_CREATION_WINDOW_MS,
),
maxCreationsPerWindow: readPositiveInteger(
overrides?.maxCreationsPerWindow ?? DEFAULT_PRODUCTIVITY_REVIEW_MAX_CREATIONS_PER_WINDOW,
DEFAULT_PRODUCTIVITY_REVIEW_MAX_CREATIONS_PER_WINDOW,
),
};
}
@@ -249,6 +279,69 @@ export function productivityReviewService(db: Db, deps?: { enqueueWakeup?: Enque
.then((rows) => rows[0] ?? null);
}
async function countRecentProductivityReviews(
companyId: string,
sourceIssueId: string,
thresholds: ProductivityReviewThresholds,
now: Date,
) {
const cutoff = new Date(now.getTime() - thresholds.creationWindowMs);
return db
.select({ count: sql<number>`count(*)::int` })
.from(issues)
.where(
and(
eq(issues.companyId, companyId),
eq(issues.originKind, PRODUCTIVITY_REVIEW_ORIGIN_KIND),
eq(issues.originId, sourceIssueId),
isNull(issues.hiddenAt),
sql`${issues.status} <> 'cancelled'`,
sql`${issues.createdAt} >= ${cutoff.toISOString()}::timestamptz`,
),
)
.then((rows) => Number(rows[0]?.count ?? 0));
}
async function getRefreshCommentState(companyId: string, reviewIssueId: string) {
return db
.select({
count: sql<number>`count(*)::int`,
latestCreatedAt: sql<Date | null>`max(${issueComments.createdAt})`,
})
.from(issueComments)
.where(
and(
eq(issueComments.companyId, companyId),
eq(issueComments.issueId, reviewIssueId),
sql`${issueComments.body} like ${`${PRODUCTIVITY_REVIEW_REFRESH_COMMENT_PREFIX}%`}`,
),
)
.then((rows) => {
const row = rows[0];
return {
count: Number(row?.count ?? 0),
latestCreatedAt: coerceDate(row?.latestCreatedAt),
};
});
}
async function addRefreshComment(
reviewIssueId: string,
body: string,
generatedAt: Date,
) {
const comment = await issuesSvc.addComment(reviewIssueId, body, {});
await db
.update(issueComments)
.set({ createdAt: generatedAt, updatedAt: generatedAt })
.where(eq(issueComments.id, comment.id));
await db
.update(issues)
.set({ updatedAt: generatedAt })
.where(eq(issues.id, reviewIssueId));
return comment;
}
async function countIssueRunsSince(companyId: string, agentId: string, issueId: string, since: Date) {
return db
.select({ count: sql<number>`count(*)::int` })
@@ -538,11 +631,19 @@ export function productivityReviewService(db: Db, deps?: { enqueueWakeup?: Enque
async function createOrUpdateReview(
evidence: ProductivityReviewEvidence,
opts: { prefix: string },
opts: { prefix: string; thresholds: ProductivityReviewThresholds },
) {
const existing = await findOpenProductivityReview(evidence.sourceIssue.companyId, evidence.sourceIssue.id);
if (existing) {
await issuesSvc.addComment(existing.id, buildRefreshComment(evidence, opts.prefix), {});
const refreshState = await getRefreshCommentState(evidence.sourceIssue.companyId, existing.id);
const lastRefreshOrCreationAt = refreshState.latestCreatedAt ?? existing.createdAt;
if (
refreshState.count >= opts.thresholds.maxRefreshComments ||
evidence.generatedAt.getTime() - lastRefreshOrCreationAt.getTime() < opts.thresholds.refreshIntervalMs
) {
return { kind: "existing" as const, reviewIssueId: existing.id };
}
await addRefreshComment(existing.id, buildRefreshComment(evidence, opts.prefix), evidence.generatedAt);
await logActivity(db, {
companyId: evidence.sourceIssue.companyId,
actorType: "system",
@@ -563,6 +664,16 @@ export function productivityReviewService(db: Db, deps?: { enqueueWakeup?: Enque
return { kind: "updated" as const, reviewIssueId: existing.id };
}
const recentCreationCount = await countRecentProductivityReviews(
evidence.sourceIssue.companyId,
evidence.sourceIssue.id,
opts.thresholds,
evidence.generatedAt,
);
if (recentCreationCount >= opts.thresholds.maxCreationsPerWindow) {
return { kind: "creation_capped" as const, reviewIssueId: null };
}
const ownerAgentId = await resolveReviewOwnerAgentId(evidence.sourceIssue, evidence.sourceAgent);
let review: Awaited<ReturnType<typeof issuesSvc.create>>;
try {
@@ -593,6 +704,10 @@ export function productivityReviewService(db: Db, deps?: { enqueueWakeup?: Enque
if (!raced) throw error;
return { kind: "existing" as const, reviewIssueId: raced.id };
}
await db
.update(issues)
.set({ createdAt: evidence.generatedAt, updatedAt: evidence.generatedAt })
.where(eq(issues.id, review.id));
await logActivity(db, {
companyId: evidence.sourceIssue.companyId,
@@ -667,6 +782,7 @@ export function productivityReviewService(db: Db, deps?: { enqueueWakeup?: Enque
updated: 0,
existing: 0,
snoozed: 0,
creationCapped: 0,
skipped: 0,
failed: 0,
reviewIssueIds: [] as string[],
@@ -703,11 +819,12 @@ export function productivityReviewService(db: Db, deps?: { enqueueWakeup?: Enque
prefixCache.set(candidate.companyId, prefix);
}
try {
const outcome = await createOrUpdateReview(evidence, { prefix });
const outcome = await createOrUpdateReview(evidence, { prefix, thresholds });
if (outcome.kind === "created") result.created += 1;
else if (outcome.kind === "updated") result.updated += 1;
else if (outcome.kind === "creation_capped") result.creationCapped += 1;
else result.existing += 1;
result.reviewIssueIds.push(outcome.reviewIssueId);
if (outcome.reviewIssueId) result.reviewIssueIds.push(outcome.reviewIssueId);
} catch (err) {
result.failed += 1;
result.failedIssueIds.push(candidate.id);