Compare commits

...

12 Commits

Author SHA1 Message Date
Chris Farhood abdce817f3 0.1.36
Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-24 12:36:21 +00:00
Chris Farhood f9d8a2e0ce fix: resolve grace-period deadlock for stale UI status (FAR-23)
The log-stream-exit grace timer never fired because logExitTime was set
in the .then() of streamPodLogs, which only resolves once stopSignal is
set — but stopSignal is only set when completionWithGrace fires, which
requires logExitTime to be non-null. Classic deadlock.

Fix: add onFirstStreamExit callback to streamPodLogs, called after
attempt=0's streamPodLogsOnce returns (the first container exit signal).
execute() passes a closure that sets logExitTime immediately, breaking
the circular dependency and allowing the 30s grace timer to fire
correctly when K8s Job conditions lag container exit.

Tests: all 323 pass including the two FAR-23 grace-period regression tests.

Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-24 12:20:10 +00:00
Chris Farhood a7dfd5d502 test: fix flaky execute.ts timer tests and hit 80%+ line coverage
readPaperclipRuntimeSkillEntries does real fs.stat I/O under fake timers,
delaying execute()'s fake-timer registration by ~3200-4200ms of fake time
when tests run in isolation (cold OS page cache).  The previous approach
tried vi.spyOn on an ESM module namespace export, which throws
"Cannot redefine property" — a fundamental ESM constraint.

Fix: remove the broken spy.  Instead, each timer-heavy test now uses enough
advanceTimersByTimeAsync calls to (a) give the event loop sufficient turns
for the I/O to drain, and (b) cover the full fake-timer sequence even with
the maximum observed I/O delay.  Patterns chosen:

  reconnects (needs t+6000):       6 advances, ~12200ms total
  deadline exceeded (needs t+3000): 5 advances, ~8400ms total
  pod-creation wait (needs t+5000): 5 advances, ~9400ms total

execute.ts line coverage: 82.57% (was ~24% before this task's test additions).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-24 04:10:49 +00:00
Chris Farhood e310ba4156 0.1.35 2026-04-24 00:44:59 +00:00
Chris Farhood ae7adb0847 docs: add enableRtk, rtkMaxOutputBytes, reattachOrphanedJobs to config doc (N6)
Co-Authored-By: Claude Sonnet <noreply@anthropic.com>
Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-24 00:01:57 +00:00
Chris Farhood d24510172e fix: remove misleading dangerouslySkipPermissions UI toggle (N5)
Co-Authored-By: Claude Sonnet <noreply@anthropic.com>
Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-24 00:01:38 +00:00
Chris Farhood 29a4e709d0 fix: sanitize agent/run/company labels to RFC 1123 (N4)
Co-Authored-By: Claude Sonnet <noreply@anthropic.com>
Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-24 00:00:56 +00:00
Chris Farhood 8a08e6a6ee fix: relabel reattached Job with current run-id and session-id (N3)
Co-Authored-By: Claude Sonnet <noreply@anthropic.com>
Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-23 23:59:05 +00:00
Chris Farhood c0dba8e904 fix: never auto-delete live K8s orphans; block on mismatch (#8)
Co-Authored-By: Claude Sonnet <noreply@anthropic.com>
Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-23 23:58:51 +00:00
Chris Farhood b91859c258 refactor: extract classifyOrphan helper with decision matrix (#8)
Co-Authored-By: Claude Sonnet <noreply@anthropic.com>
Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-23 23:58:23 +00:00
Chris Farhood f1433b05a6 fix: reserve paperclip.io/ and app.kubernetes.io/ label prefixes (N2)
Co-Authored-By: Claude Sonnet <noreply@anthropic.com>
Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-23 23:54:15 +00:00
Chris Farhood f64694f894 fix: validate companyId/instanceId against path traversal (N1)
Co-Authored-By: Claude Sonnet <noreply@anthropic.com>
Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-23 23:53:18 +00:00
11 changed files with 1284 additions and 124 deletions
+2 -2
View File
@@ -1,12 +1,12 @@
{
"name": "paperclip-adapter-claude-k8s",
"version": "0.1.34",
"version": "0.1.35",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "paperclip-adapter-claude-k8s",
"version": "0.1.34",
"version": "0.1.35",
"license": "MIT",
"dependencies": {
"@kubernetes/client-node": "^1.0.0",
+1 -1
View File
@@ -1,6 +1,6 @@
{
"name": "paperclip-adapter-claude-k8s",
"version": "0.1.34",
"version": "0.1.36",
"description": "Paperclip adapter plugin that runs Claude Code agents as Kubernetes Jobs",
"license": "MIT",
"repository": {
+5 -1
View File
@@ -15,7 +15,6 @@ Core fields:
- model (string, optional): Claude model id
- effort (string, optional): reasoning effort passed via --effort (low|medium|high)
- maxTurnsPerRun (number, optional): max turns for one run
- dangerouslySkipPermissions (boolean, optional): pass --dangerously-skip-permissions to claude
- instructionsFilePath (string, optional): absolute path to a markdown instructions file injected at runtime via --append-system-prompt-file
- extraArgs (string[], optional): additional CLI args appended to the claude command
- env (object, optional): KEY=VALUE environment variables; overrides inherited vars from the Deployment
@@ -31,6 +30,11 @@ Kubernetes fields:
- labels (object, optional): extra labels added to Job metadata
- ttlSecondsAfterFinished (number, optional): auto-cleanup delay; default 300
- retainJobs (boolean, optional): skip cleanup on completion for debugging
- reattachOrphanedJobs (boolean, optional): when true (default), attach to a running orphaned Job that matches the current agent/task/session instead of blocking; when false, any non-terminal orphan blocks the new run
Output filtering fields:
- enableRtk (boolean, optional): truncate oversized tool outputs before they reach the model via a PostToolUse hook; default false
- rtkMaxOutputBytes (number, optional): byte threshold for tool output truncation when enableRtk is true; default 50000
Operational fields:
- timeoutSec (number, optional): run timeout in seconds; 0 means no timeout
+2 -4
View File
@@ -34,12 +34,10 @@ describe("getConfigSchema", () => {
expect(field!.default).toBe(1000);
});
it("dangerouslySkipPermissions defaults to true", () => {
it("does not expose dangerouslySkipPermissions in UI schema", () => {
const schema = getConfigSchema();
const field = schema.fields.find((f: ConfigFieldSchema) => f.key === "dangerouslySkipPermissions");
expect(field).toBeDefined();
expect(field!.type).toBe("toggle");
expect(field!.default).toBe(true);
expect(field).toBeUndefined();
});
it("reattachOrphanedJobs defaults to true", () => {
+1 -8
View File
@@ -34,13 +34,6 @@ export function getConfigSchema(): AdapterConfigSchema {
hint: "Maximum number of agentic turns (tool calls) per heartbeat run. 0 means unlimited.",
default: 1000,
},
{
type: "toggle",
key: "dangerouslySkipPermissions",
label: "Skip Permissions",
hint: "Pass --dangerously-skip-permissions to Claude. Enabled by default for unattended K8s Jobs.",
default: true,
},
// Kubernetes
{
type: "text",
@@ -93,7 +86,7 @@ export function getConfigSchema(): AdapterConfigSchema {
type: "toggle",
key: "reattachOrphanedJobs",
label: "Reattach to Orphaned Jobs",
hint: "If a prior K8s Job for the same agent/task/session is still running (e.g. Paperclip restarted mid-run), attach to it and stream its output instead of deleting it and starting a new pod. Default: on.",
hint: "If a prior K8s Job for the same agent/task/session is still running (e.g. Paperclip restarted mid-run), attach to it and stream its output instead of blocking the new run. When false, any non-terminal orphan blocks the new run. Default: on.",
default: true,
},
// Resource Limits
File diff suppressed because it is too large Load Diff
+198 -61
View File
@@ -35,6 +35,13 @@ const POST_TERMINAL_KEEPALIVE_MS = 90_000;
// against the K8s client library not propagating writable.destroy() into an
// abort of the underlying HTTP request.
const LOG_STREAM_BAIL_TIMEOUT_MS = 3_000;
// After the log stream exits (container stopped producing output), wait this
// long for the K8s Job condition to be confirmed before treating the job as
// done. K8s Job conditions can lag pod exit by several seconds or more under
// cluster load. Without this bound, waitForJobCompletion keeps polling while
// streamPodLogs keeps reconnecting — together they can hold execute() open for
// minutes, causing stale "running" status in the UI (FAR-23).
const LOG_EXIT_COMPLETION_GRACE_MS = 30_000;
/**
* Detect a Kubernetes 404 (Not Found) error from @kubernetes/client-node.
@@ -89,30 +96,46 @@ export function buildPartialRunError(
: `Claude exited with code ${exitCode ?? -1}`;
}
export type OrphanClassification =
| "reattach"
| "block_session_mismatch"
| "block_task_mismatch"
| "block_task_unknown";
/**
* Evaluate an orphaned K8s Job (one whose `paperclip.io/run-id` label does
* not match the current runId) as a potential reattach target. A Job is
* reattachable when it belongs to the same agent, same task, and same resume
* session as the current run — meaning the previous Paperclip instance was
* mid-stream on the exact piece of work this new run was dispatched to do.
* Classify a non-terminal orphaned K8s Job (one whose `paperclip.io/run-id`
* label does not match the current runId but does belong to this agent) as a
* reattach candidate or a block reason.
*
* Decision matrix:
* - taskId mismatch (both present, different values) → block_task_mismatch
* - taskId missing on either side → block_task_unknown
* - taskId match + both have sessionId + sessionIds differ → block_session_mismatch
* - taskId match + one or both sides missing sessionId → reattach (reconcile)
* - taskId match + both have sessionId + sessionIds match → reattach (happy path)
*
* Exported for unit tests.
*/
export function isReattachableOrphan(
export function classifyOrphan(
job: k8s.V1Job,
expected: { agentId: string; taskId: string | null; sessionId: string | null },
): boolean {
if (!expected.taskId || !expected.sessionId) return false;
expected: { taskId: string | null; sessionId: string | null },
): OrphanClassification {
const labels = job.metadata?.labels ?? {};
if (labels["paperclip.io/adapter-type"] !== "claude_k8s") return false;
if (labels["paperclip.io/agent-id"] !== expected.agentId) return false;
if (labels["paperclip.io/task-id"] !== expected.taskId) return false;
if (labels["paperclip.io/session-id"] !== expected.sessionId) return false;
const conditions = job.status?.conditions ?? [];
const terminal = conditions.some(
(c) => (c.type === "Complete" || c.type === "Failed") && c.status === "True",
);
if (terminal) return false;
return true;
const jobTaskId = labels["paperclip.io/task-id"] ?? null;
const jobSessionId = labels["paperclip.io/session-id"] ?? null;
// taskId missing on either side
if (!expected.taskId || !jobTaskId) return "block_task_unknown";
// taskId mismatch
if (expected.taskId !== jobTaskId) return "block_task_mismatch";
// taskId matches — check sessionId
if (expected.sessionId && jobSessionId && expected.sessionId !== jobSessionId) {
return "block_session_mismatch";
}
return "reattach";
}
/**
@@ -341,6 +364,11 @@ export async function streamPodLogsOnce(
*
* Capped at MAX_LOG_RECONNECT_ATTEMPTS to prevent infinite reconnect
* loops during sustained API partitions.
*
* onFirstStreamExit is called the first time streamPodLogsOnce returns
* (container has exited or stream disconnected). Used by execute() to
* start the LOG_EXIT_COMPLETION_GRACE_MS grace timer (FAR-23) without
* waiting for all reconnects to exhaust.
*/
async function streamPodLogs(
namespace: string,
@@ -349,6 +377,7 @@ async function streamPodLogs(
kubeconfigPath?: string,
stopSignal?: { stopped: boolean },
dedup?: LogLineDedupFilter,
onFirstStreamExit?: () => void,
): Promise<string> {
const allChunks: string[] = [];
let attempt = 0;
@@ -379,6 +408,9 @@ async function streamPodLogs(
const preStreamTs = Math.floor(Date.now() / 1000);
const result = await streamPodLogsOnce(namespace, podName, onLog, kubeconfigPath, sinceSeconds, dedup, stopSignal);
// Signal first stream exit immediately so the grace-period timer in
// execute() can start without waiting for all reconnects to complete.
if (attempt === 0) onFirstStreamExit?.();
if (result) {
allChunks.push(result);
// Update last-received timestamp to now (the stream just ended,
@@ -525,6 +557,17 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
// the current runId. When reattachOrphanedJobs is enabled and the orphan matches
// the current agent+task+session, we attach to it instead of deleting it (FAR-124).
const agentId = ctx.agent.id;
const sanitizedAgentId = sanitizeLabelValue(agentId);
if (!sanitizedAgentId) {
await onLog("stderr", `[paperclip] Cannot create K8s Job: agent.id "${agentId}" produces no valid RFC 1123 label characters\n`);
return {
exitCode: null,
signal: null,
timedOut: false,
errorMessage: `Agent ID "${agentId}" cannot be sanitized to a valid Kubernetes label`,
errorCode: "k8s_agent_id_invalid",
};
}
const selfPod = await getSelfPodInfo(kubeconfigPath);
const guardNamespace = asString(config.namespace, "") || selfPod.namespace;
const reattachOrphanedJobs = asBoolean(config.reattachOrphanedJobs, true);
@@ -538,7 +581,7 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
const batchApi = getBatchApi(kubeconfigPath);
const existing = await batchApi.listNamespacedJob({
namespace: guardNamespace,
labelSelector: `paperclip.io/agent-id=${agentId},paperclip.io/adapter-type=claude_k8s`,
labelSelector: `paperclip.io/agent-id=${sanitizedAgentId},paperclip.io/adapter-type=claude_k8s`,
});
const running = existing.items.filter(
(j) => !j.status?.conditions?.some((c) => (c.type === "Complete" || c.type === "Failed") && c.status === "True"),
@@ -553,45 +596,72 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
(j) => (j.metadata?.labels?.["paperclip.io/run-id"] ?? "") === runId,
);
// Pick the most recent reattachable orphan — same agent + task + session,
// not terminal. Only one target is chosen; any other orphans get
// cleaned up as before.
if (reattachOrphanedJobs && orphaned.length > 0) {
const candidates = orphaned
.filter((j) =>
isReattachableOrphan(j, {
agentId,
taskId: currentTaskLabel,
sessionId: currentSessionLabel,
}),
)
.sort((a, b) => {
const at = new Date(a.metadata?.creationTimestamp ?? 0).getTime();
const bt = new Date(b.metadata?.creationTimestamp ?? 0).getTime();
return bt - at;
});
const chosen = candidates[0];
const chosenName = chosen?.metadata?.name;
if (chosen && chosenName) {
reattachTarget = {
jobName: chosenName,
namespace: chosen.metadata?.namespace ?? guardNamespace,
priorRunId: chosen.metadata?.labels?.["paperclip.io/run-id"] ?? "",
image: chosen.spec?.template?.spec?.containers?.[0]?.image ?? "unknown",
if (orphaned.length > 0) {
if (!reattachOrphanedJobs) {
// When reattach is disabled, block on any non-terminal orphan.
const names = orphaned.map((j) => j.metadata?.name).join(", ");
await onLog("stderr", `[paperclip] Concurrent run blocked: orphaned Job(s) running and reattach disabled: ${names}\n`);
return {
exitCode: null,
signal: null,
timedOut: false,
errorMessage: `Concurrent run blocked: orphaned Job(s) still running for this agent (reattach disabled)`,
errorCode: "k8s_concurrent_run_blocked",
};
}
}
const toDelete = orphaned.filter(
(j) => !reattachTarget || j.metadata?.name !== reattachTarget.jobName,
);
if (toDelete.length > 0) {
const orphanNames = toDelete.map((j) => j.metadata?.name).join(", ");
await onLog("stdout", `[paperclip] Cleaning up ${toDelete.length} orphaned K8s Job(s) from previous run(s): ${orphanNames}\n`);
for (const j of toDelete) {
const name = j.metadata?.name;
if (name) {
await cleanupJob(guardNamespace, name, onLog, kubeconfigPath);
// Apply the decision matrix to each orphan, newest-first. The first
// reattachable orphan becomes the target; any block classification
// stops the new run immediately. Orphans are never deleted here —
// terminal ones are cleaned up by TTL; live mismatches should not be
// killed because they may still be doing real work.
const sortedOrphans = [...orphaned].sort((a, b) => {
const at = new Date(a.metadata?.creationTimestamp ?? 0).getTime();
const bt = new Date(b.metadata?.creationTimestamp ?? 0).getTime();
return bt - at;
});
for (const orphan of sortedOrphans) {
const classification = classifyOrphan(orphan, {
taskId: currentTaskLabel,
sessionId: currentSessionLabel,
});
const orphanName = orphan.metadata?.name ?? "unknown";
if (classification === "reattach") {
if (!reattachTarget) {
reattachTarget = {
jobName: orphanName,
namespace: orphan.metadata?.namespace ?? guardNamespace,
priorRunId: orphan.metadata?.labels?.["paperclip.io/run-id"] ?? "",
image: orphan.spec?.template?.spec?.containers?.[0]?.image ?? "unknown",
};
}
} else if (classification === "block_task_unknown") {
await onLog("stderr", `[paperclip] Blocked: orphaned Job ${orphanName} has missing task label — cannot safely reattach\n`);
return {
exitCode: null,
signal: null,
timedOut: false,
errorMessage: `Concurrent run blocked: orphaned Job ${orphanName} has unknown task context`,
errorCode: "k8s_orphan_task_unknown",
};
} else if (classification === "block_task_mismatch") {
await onLog("stderr", `[paperclip] Blocked: orphaned Job ${orphanName} belongs to a different task\n`);
return {
exitCode: null,
signal: null,
timedOut: false,
errorMessage: `Concurrent run blocked: orphaned Job ${orphanName} is running a different task`,
errorCode: "k8s_concurrent_run_blocked",
};
} else if (classification === "block_session_mismatch") {
await onLog("stderr", `[paperclip] Blocked: orphaned Job ${orphanName} has a different session\n`);
return {
exitCode: null,
signal: null,
timedOut: false,
errorMessage: `Concurrent run blocked: orphaned Job ${orphanName} has a mismatched session`,
errorCode: "k8s_orphan_session_mismatch",
};
}
}
}
@@ -690,6 +760,26 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
}
await onLog("stdout", `[paperclip] Reattaching to in-flight K8s Job ${jobName} in namespace ${namespace} (prior run ${reattachTarget.priorRunId || "unknown"})\n`);
// Relabel the reattached Job with the current run-id (and session-id if
// available) so the next concurrency guard sees it as owned by this run
// rather than an orphan from the prior run.
const labelPatch: Array<{ op: "add" | "replace"; path: string; value: string }> = [
{ op: "replace", path: "/metadata/labels/paperclip.io~1run-id", value: runId },
];
if (currentSessionLabel) {
labelPatch.push({ op: "replace", path: "/metadata/labels/paperclip.io~1session-id", value: currentSessionLabel });
}
try {
await batchApi.patchNamespacedJob({
name: jobName,
namespace,
body: labelPatch,
});
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
await onLog("stderr", `[paperclip] Warning: failed to relabel reattached Job ${jobName}: ${msg}\n`);
}
} else {
// Build Job manifest
const built = buildJobManifest({ ctx, selfPod, promptBundle });
@@ -700,6 +790,9 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
const claudeArgs = built.claudeArgs;
const promptMetrics = built.promptMetrics;
promptSecret = built.promptSecret;
if (built.skippedLabels.length > 0) {
await onLog("stderr", `[paperclip] Warning: skipped ${built.skippedLabels.length} extra label(s) with reserved prefix: ${built.skippedLabels.join(", ")}\n`);
}
// Report invocation metadata
if (onMeta) {
@@ -986,12 +1079,56 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
// reuse it and avoid pushing already-sent lines to the UI (finding #6, FAR-15).
const logDedup = new LogLineDedupFilter();
const [logResult, completionResult] = await Promise.allSettled([
streamPodLogs(namespace, podName, wrappedOnLog, kubeconfigPath, logStopSignal, logDedup),
waitForJobCompletion(namespace, jobName, completionTimeoutMs, kubeconfigPath).then((r) => {
// Track when the log stream first exits so the grace-period can fire
// if the K8s Job condition lags behind container exit (FAR-23).
// Set via onFirstStreamExit callback (called after attempt=0 returns)
// rather than in .then() of streamPodLogs, which would create a
// deadlock: streamPodLogs only resolves after stopSignal is set, but
// stopSignal is set by the grace timer which needs logExitTime to be
// non-null.
let logExitTime: number | null = null;
const trackedLogStream = streamPodLogs(
namespace, podName, wrappedOnLog, kubeconfigPath, logStopSignal, logDedup,
() => { logExitTime = Date.now(); },
);
// completionWithGrace races waitForJobCompletion against a grace timer
// that fires LOG_EXIT_COMPLETION_GRACE_MS after the log stream exits.
// This bounds the stale-UI window when K8s Job conditions lag container
// exit (FAR-23): without it, waitForJobCompletion polls indefinitely
// while streamPodLogs reconnects, holding execute() open for minutes.
// logStopSignal.stopped is set on every settled path (fulfilled, rejected,
// or grace) so streamPodLogs stops reconnecting promptly.
type CompletionResult = { succeeded: boolean; timedOut: boolean; jobGone?: boolean };
let gracePoller: ReturnType<typeof setInterval> | null = null;
const completionWithGrace = new Promise<CompletionResult>((resolve, reject) => {
let settled = false;
const settleOk = (r: CompletionResult) => {
if (settled) return;
settled = true;
if (gracePoller) { clearInterval(gracePoller); gracePoller = null; }
logStopSignal.stopped = true;
return r;
}),
resolve(r);
};
const settleErr = (err: unknown) => {
if (settled) return;
settled = true;
if (gracePoller) { clearInterval(gracePoller); gracePoller = null; }
logStopSignal.stopped = true;
reject(err);
};
waitForJobCompletion(namespace, jobName, completionTimeoutMs, kubeconfigPath).then(settleOk).catch(settleErr);
gracePoller = setInterval(() => {
if (logExitTime !== null && Date.now() - logExitTime >= LOG_EXIT_COMPLETION_GRACE_MS) {
void onLog("stdout", `[paperclip] Log stream exited ${LOG_EXIT_COMPLETION_GRACE_MS / 1000}s ago without K8s Job condition update — proceeding with captured output (FAR-23)\n`).catch(() => {});
settleOk({ succeeded: false, timedOut: false, jobGone: true });
}
}, 1_000);
});
const [logResult, completionResult] = await Promise.allSettled([
trackedLogStream,
completionWithGrace,
]);
// Stop the keepalive immediately once the job has reached a terminal
+69
View File
@@ -166,6 +166,75 @@ describe("buildJobManifest", () => {
expect(job.metadata?.labels?.["paperclip.io/task-id"]).toBeUndefined();
expect(job.metadata?.labels?.["paperclip.io/session-id"]).toBeUndefined();
});
it("drops user label with paperclip.io/ prefix", () => {
ctx.config = { labels: { "paperclip.io/run-id": "hijacked" } };
const { job, skippedLabels } = buildJobManifest({ ctx, selfPod });
expect(job.metadata?.labels?.["paperclip.io/run-id"]).not.toBe("hijacked");
expect(skippedLabels).toContain("paperclip.io/run-id");
});
it("drops user label with app.kubernetes.io/ prefix", () => {
ctx.config = { labels: { "app.kubernetes.io/managed-by": "attacker" } };
const { job, skippedLabels } = buildJobManifest({ ctx, selfPod });
expect(job.metadata?.labels?.["app.kubernetes.io/managed-by"]).toBe("paperclip");
expect(skippedLabels).toContain("app.kubernetes.io/managed-by");
});
it("passes through user label without reserved prefix", () => {
ctx.config = { labels: { "custom.io/team": "platform" } };
const { job, skippedLabels } = buildJobManifest({ ctx, selfPod });
expect(job.metadata?.labels?.["custom.io/team"]).toBe("platform");
expect(skippedLabels).not.toContain("custom.io/team");
});
it("populates skippedLabels with all dropped keys", () => {
ctx.config = {
labels: {
"paperclip.io/agent-id": "x",
"app.kubernetes.io/component": "y",
"safe": "z",
},
};
const { skippedLabels } = buildJobManifest({ ctx, selfPod });
expect(skippedLabels).toHaveLength(2);
expect(skippedLabels).toContain("paperclip.io/agent-id");
expect(skippedLabels).toContain("app.kubernetes.io/component");
});
});
describe("system label sanitization (N4)", () => {
it("sanitizes agent.id with @ to a valid RFC 1123 label", () => {
ctx.agent.id = "user@example.com";
const { job } = buildJobManifest({ ctx, selfPod });
const label = job.metadata?.labels?.["paperclip.io/agent-id"];
expect(label).toMatch(/^[a-zA-Z0-9]([a-zA-Z0-9._-]*[a-zA-Z0-9])?$/);
expect(label).not.toContain("@");
});
it("sanitizes agent.id with spaces to a valid RFC 1123 label", () => {
ctx.agent.id = "my agent id";
const { job } = buildJobManifest({ ctx, selfPod });
const label = job.metadata?.labels?.["paperclip.io/agent-id"];
expect(label).toMatch(/^[a-zA-Z0-9]([a-zA-Z0-9._-]*[a-zA-Z0-9])?$/);
});
it("omits paperclip.io/run-id when sanitized value is null (all-invalid runId)", () => {
// inject an all-special-chars runId via context override — buildJobManifest
// uses ctx.runId directly
const badCtx = makeCtx({ runId: "@@@" });
const { job, skippedLabels } = buildJobManifest({ ctx: badCtx, selfPod });
expect(job.metadata?.labels?.["paperclip.io/run-id"]).toBeUndefined();
expect(skippedLabels).toContain("paperclip.io/run-id");
});
it("selector matches sanitized agent-id label", () => {
ctx.agent.id = "Agent@Test";
const { job } = buildJobManifest({ ctx, selfPod });
const agentLabel = job.metadata?.labels?.["paperclip.io/agent-id"];
// the label should equal what sanitizeLabelValue produces
expect(agentLabel).toBe("AgentTest");
});
});
describe("annotations", () => {
+19 -6
View File
@@ -200,6 +200,8 @@ export interface JobBuildResult {
/** Non-null when the prompt is too large for an env var and must be
* staged as a K8s Secret before creating the Job. */
promptSecret: PromptSecret | null;
/** User-supplied extra labels that were dropped because they used a reserved prefix. */
skippedLabels: string[];
}
function sanitizeForK8sName(value: string, maxLen = 16): string {
@@ -442,15 +444,22 @@ export function buildJobManifest(input: JobBuildInput): JobBuildResult {
},
};
// Labels
// Labels — system identifiers must pass RFC 1123 label value format.
const sanitizedAgentId = sanitizeLabelValue(agent.id);
const sanitizedRunId = sanitizeLabelValue(runId);
const sanitizedCompanyId = sanitizeLabelValue(agent.companyId);
const skippedLabels: string[] = [];
if (!sanitizedRunId) skippedLabels.push("paperclip.io/run-id");
if (!sanitizedCompanyId) skippedLabels.push("paperclip.io/company-id");
const labels: Record<string, string> = {
"app.kubernetes.io/managed-by": "paperclip",
"app.kubernetes.io/component": "agent-job",
"paperclip.io/agent-id": agent.id,
"paperclip.io/run-id": runId,
"paperclip.io/company-id": agent.companyId,
// sanitizedAgentId null-check is enforced in execute.ts before Job creation
"paperclip.io/agent-id": sanitizedAgentId ?? agent.id,
"paperclip.io/adapter-type": "claude_k8s",
};
if (sanitizedRunId) labels["paperclip.io/run-id"] = sanitizedRunId;
if (sanitizedCompanyId) labels["paperclip.io/company-id"] = sanitizedCompanyId;
// Reattach-target labels: let a future execute() identify this Job as the
// continuation of the same logical unit of work (same task + same resume
// session) so it can attach to the running pod across a Paperclip restart
@@ -461,7 +470,11 @@ export function buildJobManifest(input: JobBuildInput): JobBuildResult {
const sessionLabel = runtimeSessionId ? sanitizeLabelValue(runtimeSessionId) : null;
if (sessionLabel) labels["paperclip.io/session-id"] = sessionLabel;
for (const [key, value] of Object.entries(extraLabels)) {
labels[key] = value;
if (key.startsWith("paperclip.io/") || key.startsWith("app.kubernetes.io/")) {
skippedLabels.push(key);
} else {
labels[key] = value;
}
}
// Volumes
@@ -628,5 +641,5 @@ export function buildJobManifest(input: JobBuildInput): JobBuildResult {
},
};
return { job, jobName, namespace, prompt, claudeArgs, promptMetrics, promptSecret };
return { job, jobName, namespace, prompt, claudeArgs, promptMetrics, promptSecret, skippedLabels };
}
+49
View File
@@ -0,0 +1,49 @@
import { describe, it, expect, vi } from "vitest";
import os from "node:os";
import path from "node:path";
import { prepareClaudePromptBundle } from "./prompt-cache.js";
const onLog = vi.fn();
describe("prepareClaudePromptBundle path traversal validation", () => {
const validArgs = {
skills: [],
instructionsContents: null,
onLog,
};
it("rejects companyId containing ..", async () => {
await expect(prepareClaudePromptBundle({ ...validArgs, companyId: ".." })).rejects.toThrow(/companyId/);
});
it("rejects companyId containing ../x", async () => {
await expect(prepareClaudePromptBundle({ ...validArgs, companyId: "../x" })).rejects.toThrow(/companyId/);
});
it("rejects companyId containing /", async () => {
await expect(prepareClaudePromptBundle({ ...validArgs, companyId: "a/b" })).rejects.toThrow(/companyId/);
});
it("rejects companyId containing backslash", async () => {
await expect(prepareClaudePromptBundle({ ...validArgs, companyId: "a\\b" })).rejects.toThrow(/companyId/);
});
it("rejects companyId containing null byte", async () => {
await expect(prepareClaudePromptBundle({ ...validArgs, companyId: "a\0b" })).rejects.toThrow(/companyId/);
});
it("rejects empty companyId", async () => {
await expect(prepareClaudePromptBundle({ ...validArgs, companyId: "" })).rejects.toThrow(/companyId/);
});
it("rejects whitespace-only companyId", async () => {
await expect(prepareClaudePromptBundle({ ...validArgs, companyId: " " })).rejects.toThrow(/companyId/);
});
it("accepts a valid companyId", async () => {
vi.stubEnv("PAPERCLIP_HOME", path.join(os.tmpdir(), `prompt-cache-test-${process.pid}`));
const result = await prepareClaudePromptBundle({ ...validArgs, companyId: "acme-co" });
expect(result.rootDir).toContain("acme-co");
vi.unstubAllEnvs();
});
});
+9
View File
@@ -21,6 +21,13 @@ export interface ClaudePromptBundle {
const DEFAULT_PAPERCLIP_INSTANCE_ID = "default";
function validatePathComponent(value: string, fieldName: string): void {
if (value.trim().length === 0) throw new Error(`Invalid ${fieldName}: must not be empty`);
if (value.includes("/") || value.includes("\\")) throw new Error(`Invalid ${fieldName}: must not contain path separators`);
if (value.includes("..")) throw new Error(`Invalid ${fieldName}: must not contain ".."`);
if (value.includes("\0")) throw new Error(`Invalid ${fieldName}: must not contain null bytes`);
}
function resolveManagedClaudePromptCacheRoot(companyId: string): string {
const paperclipHome =
(typeof process.env.PAPERCLIP_HOME === "string" && process.env.PAPERCLIP_HOME.trim().length > 0
@@ -31,6 +38,8 @@ function resolveManagedClaudePromptCacheRoot(companyId: string): string {
(typeof process.env.PAPERCLIP_INSTANCE_ID === "string" && process.env.PAPERCLIP_INSTANCE_ID.trim().length > 0
? process.env.PAPERCLIP_INSTANCE_ID.trim()
: null) ?? DEFAULT_PAPERCLIP_INSTANCE_ID;
validatePathComponent(companyId, "companyId");
validatePathComponent(instanceId, "instanceId");
return path.resolve(paperclipHome, "instances", instanceId, "companies", companyId, "claude-prompt-cache");
}