Compare commits
9 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| e310ba4156 | |||
| ae7adb0847 | |||
| d24510172e | |||
| 29a4e709d0 | |||
| 8a08e6a6ee | |||
| c0dba8e904 | |||
| b91859c258 | |||
| f1433b05a6 | |||
| f64694f894 |
Generated
+2
-2
@@ -1,12 +1,12 @@
|
|||||||
{
|
{
|
||||||
"name": "paperclip-adapter-claude-k8s",
|
"name": "paperclip-adapter-claude-k8s",
|
||||||
"version": "0.1.34",
|
"version": "0.1.35",
|
||||||
"lockfileVersion": 3,
|
"lockfileVersion": 3,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"packages": {
|
"packages": {
|
||||||
"": {
|
"": {
|
||||||
"name": "paperclip-adapter-claude-k8s",
|
"name": "paperclip-adapter-claude-k8s",
|
||||||
"version": "0.1.34",
|
"version": "0.1.35",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@kubernetes/client-node": "^1.0.0",
|
"@kubernetes/client-node": "^1.0.0",
|
||||||
|
|||||||
+1
-1
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "paperclip-adapter-claude-k8s",
|
"name": "paperclip-adapter-claude-k8s",
|
||||||
"version": "0.1.34",
|
"version": "0.1.35",
|
||||||
"description": "Paperclip adapter plugin that runs Claude Code agents as Kubernetes Jobs",
|
"description": "Paperclip adapter plugin that runs Claude Code agents as Kubernetes Jobs",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"repository": {
|
"repository": {
|
||||||
|
|||||||
+5
-1
@@ -15,7 +15,6 @@ Core fields:
|
|||||||
- model (string, optional): Claude model id
|
- model (string, optional): Claude model id
|
||||||
- effort (string, optional): reasoning effort passed via --effort (low|medium|high)
|
- effort (string, optional): reasoning effort passed via --effort (low|medium|high)
|
||||||
- maxTurnsPerRun (number, optional): max turns for one run
|
- maxTurnsPerRun (number, optional): max turns for one run
|
||||||
- dangerouslySkipPermissions (boolean, optional): pass --dangerously-skip-permissions to claude
|
|
||||||
- instructionsFilePath (string, optional): absolute path to a markdown instructions file injected at runtime via --append-system-prompt-file
|
- instructionsFilePath (string, optional): absolute path to a markdown instructions file injected at runtime via --append-system-prompt-file
|
||||||
- extraArgs (string[], optional): additional CLI args appended to the claude command
|
- extraArgs (string[], optional): additional CLI args appended to the claude command
|
||||||
- env (object, optional): KEY=VALUE environment variables; overrides inherited vars from the Deployment
|
- env (object, optional): KEY=VALUE environment variables; overrides inherited vars from the Deployment
|
||||||
@@ -31,6 +30,11 @@ Kubernetes fields:
|
|||||||
- labels (object, optional): extra labels added to Job metadata
|
- labels (object, optional): extra labels added to Job metadata
|
||||||
- ttlSecondsAfterFinished (number, optional): auto-cleanup delay; default 300
|
- ttlSecondsAfterFinished (number, optional): auto-cleanup delay; default 300
|
||||||
- retainJobs (boolean, optional): skip cleanup on completion for debugging
|
- retainJobs (boolean, optional): skip cleanup on completion for debugging
|
||||||
|
- reattachOrphanedJobs (boolean, optional): when true (default), attach to a running orphaned Job that matches the current agent/task/session instead of blocking; when false, any non-terminal orphan blocks the new run
|
||||||
|
|
||||||
|
Output filtering fields:
|
||||||
|
- enableRtk (boolean, optional): truncate oversized tool outputs before they reach the model via a PostToolUse hook; default false
|
||||||
|
- rtkMaxOutputBytes (number, optional): byte threshold for tool output truncation when enableRtk is true; default 50000
|
||||||
|
|
||||||
Operational fields:
|
Operational fields:
|
||||||
- timeoutSec (number, optional): run timeout in seconds; 0 means no timeout
|
- timeoutSec (number, optional): run timeout in seconds; 0 means no timeout
|
||||||
|
|||||||
@@ -34,12 +34,10 @@ describe("getConfigSchema", () => {
|
|||||||
expect(field!.default).toBe(1000);
|
expect(field!.default).toBe(1000);
|
||||||
});
|
});
|
||||||
|
|
||||||
it("dangerouslySkipPermissions defaults to true", () => {
|
it("does not expose dangerouslySkipPermissions in UI schema", () => {
|
||||||
const schema = getConfigSchema();
|
const schema = getConfigSchema();
|
||||||
const field = schema.fields.find((f: ConfigFieldSchema) => f.key === "dangerouslySkipPermissions");
|
const field = schema.fields.find((f: ConfigFieldSchema) => f.key === "dangerouslySkipPermissions");
|
||||||
expect(field).toBeDefined();
|
expect(field).toBeUndefined();
|
||||||
expect(field!.type).toBe("toggle");
|
|
||||||
expect(field!.default).toBe(true);
|
|
||||||
});
|
});
|
||||||
|
|
||||||
it("reattachOrphanedJobs defaults to true", () => {
|
it("reattachOrphanedJobs defaults to true", () => {
|
||||||
|
|||||||
@@ -34,13 +34,6 @@ export function getConfigSchema(): AdapterConfigSchema {
|
|||||||
hint: "Maximum number of agentic turns (tool calls) per heartbeat run. 0 means unlimited.",
|
hint: "Maximum number of agentic turns (tool calls) per heartbeat run. 0 means unlimited.",
|
||||||
default: 1000,
|
default: 1000,
|
||||||
},
|
},
|
||||||
{
|
|
||||||
type: "toggle",
|
|
||||||
key: "dangerouslySkipPermissions",
|
|
||||||
label: "Skip Permissions",
|
|
||||||
hint: "Pass --dangerously-skip-permissions to Claude. Enabled by default for unattended K8s Jobs.",
|
|
||||||
default: true,
|
|
||||||
},
|
|
||||||
// Kubernetes
|
// Kubernetes
|
||||||
{
|
{
|
||||||
type: "text",
|
type: "text",
|
||||||
@@ -93,7 +86,7 @@ export function getConfigSchema(): AdapterConfigSchema {
|
|||||||
type: "toggle",
|
type: "toggle",
|
||||||
key: "reattachOrphanedJobs",
|
key: "reattachOrphanedJobs",
|
||||||
label: "Reattach to Orphaned Jobs",
|
label: "Reattach to Orphaned Jobs",
|
||||||
hint: "If a prior K8s Job for the same agent/task/session is still running (e.g. Paperclip restarted mid-run), attach to it and stream its output instead of deleting it and starting a new pod. Default: on.",
|
hint: "If a prior K8s Job for the same agent/task/session is still running (e.g. Paperclip restarted mid-run), attach to it and stream its output instead of blocking the new run. When false, any non-terminal orphan blocks the new run. Default: on.",
|
||||||
default: true,
|
default: true,
|
||||||
},
|
},
|
||||||
// Resource Limits
|
// Resource Limits
|
||||||
|
|||||||
+54
-35
@@ -15,7 +15,7 @@ vi.mock("./k8s-client.js", () => ({
|
|||||||
resetCache: vi.fn(),
|
resetCache: vi.fn(),
|
||||||
}));
|
}));
|
||||||
|
|
||||||
const { isK8s404, buildPartialRunError, isReattachableOrphan, describePodTerminatedError, streamPodLogsOnce } = await import("./execute.js");
|
const { isK8s404, buildPartialRunError, classifyOrphan, describePodTerminatedError, streamPodLogsOnce, execute } = await import("./execute.js");
|
||||||
|
|
||||||
function makeJob(opts: {
|
function makeJob(opts: {
|
||||||
runId?: string;
|
runId?: string;
|
||||||
@@ -146,59 +146,59 @@ describe("buildPartialRunError", () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("isReattachableOrphan", () => {
|
describe("classifyOrphan", () => {
|
||||||
const agentId = "agent-abc";
|
|
||||||
const taskId = "task-xyz";
|
const taskId = "task-xyz";
|
||||||
const sessionId = "sess-123";
|
const sessionId = "sess-123";
|
||||||
|
|
||||||
it("returns true when agent/task/session all match and Job is not terminal", () => {
|
// --- Happy path: reattach ---
|
||||||
const job = makeJob({ agentId, taskId, sessionId, runId: "old-run" });
|
it("returns reattach when taskId matches and both sessionIds match", () => {
|
||||||
expect(isReattachableOrphan(job, { agentId, taskId, sessionId })).toBe(true);
|
const job = makeJob({ taskId, sessionId });
|
||||||
|
expect(classifyOrphan(job, { taskId, sessionId })).toBe("reattach");
|
||||||
});
|
});
|
||||||
|
|
||||||
it("returns false when the Job is already Complete", () => {
|
it("returns reattach when taskId matches and expected sessionId is null (missing on current side)", () => {
|
||||||
const job = makeJob({ agentId, taskId, sessionId, runId: "old-run", terminal: true });
|
const job = makeJob({ taskId, sessionId });
|
||||||
expect(isReattachableOrphan(job, { agentId, taskId, sessionId })).toBe(false);
|
expect(classifyOrphan(job, { taskId, sessionId: null })).toBe("reattach");
|
||||||
});
|
});
|
||||||
|
|
||||||
it("returns false when expected taskId is null (caller couldn't derive one)", () => {
|
it("returns reattach when taskId matches and job has no session-id label (missing on job side)", () => {
|
||||||
const job = makeJob({ agentId, taskId, sessionId });
|
const job = makeJob({ taskId });
|
||||||
expect(isReattachableOrphan(job, { agentId, taskId: null, sessionId })).toBe(false);
|
expect(classifyOrphan(job, { taskId, sessionId })).toBe("reattach");
|
||||||
});
|
});
|
||||||
|
|
||||||
it("returns false when expected sessionId is null", () => {
|
it("returns reattach when taskId matches and neither side has a sessionId", () => {
|
||||||
const job = makeJob({ agentId, taskId, sessionId });
|
const job = makeJob({ taskId });
|
||||||
expect(isReattachableOrphan(job, { agentId, taskId, sessionId: null })).toBe(false);
|
expect(classifyOrphan(job, { taskId, sessionId: null })).toBe("reattach");
|
||||||
});
|
});
|
||||||
|
|
||||||
it("returns false when agent id doesn't match", () => {
|
// --- Block: task unknown ---
|
||||||
const job = makeJob({ agentId: "agent-other", taskId, sessionId });
|
it("returns block_task_unknown when expected taskId is null", () => {
|
||||||
expect(isReattachableOrphan(job, { agentId, taskId, sessionId })).toBe(false);
|
const job = makeJob({ taskId, sessionId });
|
||||||
|
expect(classifyOrphan(job, { taskId: null, sessionId })).toBe("block_task_unknown");
|
||||||
});
|
});
|
||||||
|
|
||||||
it("returns false when task id doesn't match", () => {
|
it("returns block_task_unknown when job has no task-id label", () => {
|
||||||
const job = makeJob({ agentId, taskId: "task-other", sessionId });
|
const job = makeJob({ sessionId });
|
||||||
expect(isReattachableOrphan(job, { agentId, taskId, sessionId })).toBe(false);
|
expect(classifyOrphan(job, { taskId, sessionId })).toBe("block_task_unknown");
|
||||||
});
|
});
|
||||||
|
|
||||||
it("returns false when session id doesn't match", () => {
|
// --- Block: task mismatch ---
|
||||||
const job = makeJob({ agentId, taskId, sessionId: "sess-other" });
|
it("returns block_task_mismatch when both sides have taskId but they differ", () => {
|
||||||
expect(isReattachableOrphan(job, { agentId, taskId, sessionId })).toBe(false);
|
const job = makeJob({ taskId: "task-other", sessionId });
|
||||||
|
expect(classifyOrphan(job, { taskId, sessionId })).toBe("block_task_mismatch");
|
||||||
});
|
});
|
||||||
|
|
||||||
it("returns false when the Job is from a different adapter type", () => {
|
// --- Block: session mismatch ---
|
||||||
const job = makeJob({ agentId, taskId, sessionId, adapterType: "claude_local" });
|
it("returns block_session_mismatch when taskId matches but sessionIds differ", () => {
|
||||||
expect(isReattachableOrphan(job, { agentId, taskId, sessionId })).toBe(false);
|
const job = makeJob({ taskId, sessionId: "sess-other" });
|
||||||
|
expect(classifyOrphan(job, { taskId, sessionId })).toBe("block_session_mismatch");
|
||||||
});
|
});
|
||||||
|
|
||||||
it("returns false when Job has no task-id label (labels were introduced in FAR-124)", () => {
|
// --- Terminal orphan (caller filters these before classifyOrphan) ---
|
||||||
const job = makeJob({ agentId, sessionId });
|
it("returns reattach for terminal job (caller is responsible for filtering terminals)", () => {
|
||||||
expect(isReattachableOrphan(job, { agentId, taskId, sessionId })).toBe(false);
|
const job = makeJob({ taskId, sessionId, terminal: true });
|
||||||
});
|
// classifyOrphan does not check terminal status — that is the caller's job
|
||||||
|
expect(classifyOrphan(job, { taskId, sessionId })).toBe("reattach");
|
||||||
it("returns false when Job has no session-id label", () => {
|
|
||||||
const job = makeJob({ agentId, taskId });
|
|
||||||
expect(isReattachableOrphan(job, { agentId, taskId, sessionId })).toBe(false);
|
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -261,6 +261,25 @@ describe("describePodTerminatedError", () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe("execute: all-invalid agent.id (N4)", () => {
|
||||||
|
it("returns hard error without creating a Job when agent.id sanitizes to null", async () => {
|
||||||
|
const logs: string[] = [];
|
||||||
|
const result = await execute({
|
||||||
|
runId: "run-001",
|
||||||
|
agent: { id: "@@@", companyId: "co1", name: "Bad Agent", adapterType: "claude_k8s", adapterConfig: {} },
|
||||||
|
runtime: { sessionId: null, sessionParams: null, sessionDisplayId: null, taskKey: null },
|
||||||
|
config: {},
|
||||||
|
context: {},
|
||||||
|
onLog: async (_stream, msg) => { logs.push(msg); },
|
||||||
|
});
|
||||||
|
expect(result.errorCode).toBe("k8s_agent_id_invalid");
|
||||||
|
expect(result.errorMessage).toContain("@@@");
|
||||||
|
// getSelfPodInfo must NOT have been called (early return before K8s calls)
|
||||||
|
const { getSelfPodInfo } = await import("./k8s-client.js");
|
||||||
|
expect(getSelfPodInfo).not.toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
// Regression: FAR-10 hardening — streamPodLogsOnce must not hang forever when
|
// Regression: FAR-10 hardening — streamPodLogsOnce must not hang forever when
|
||||||
// the K8s client's logApi.log call never resolves. When stopSignal fires, the
|
// the K8s client's logApi.log call never resolves. When stopSignal fires, the
|
||||||
// bail timer must force-return within LOG_STREAM_BAIL_TIMEOUT_MS (3s in the
|
// bail timer must force-return within LOG_STREAM_BAIL_TIMEOUT_MS (3s in the
|
||||||
|
|||||||
+133
-56
@@ -89,30 +89,46 @@ export function buildPartialRunError(
|
|||||||
: `Claude exited with code ${exitCode ?? -1}`;
|
: `Claude exited with code ${exitCode ?? -1}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export type OrphanClassification =
|
||||||
|
| "reattach"
|
||||||
|
| "block_session_mismatch"
|
||||||
|
| "block_task_mismatch"
|
||||||
|
| "block_task_unknown";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Evaluate an orphaned K8s Job (one whose `paperclip.io/run-id` label does
|
* Classify a non-terminal orphaned K8s Job (one whose `paperclip.io/run-id`
|
||||||
* not match the current runId) as a potential reattach target. A Job is
|
* label does not match the current runId but does belong to this agent) as a
|
||||||
* reattachable when it belongs to the same agent, same task, and same resume
|
* reattach candidate or a block reason.
|
||||||
* session as the current run — meaning the previous Paperclip instance was
|
*
|
||||||
* mid-stream on the exact piece of work this new run was dispatched to do.
|
* Decision matrix:
|
||||||
|
* - taskId mismatch (both present, different values) → block_task_mismatch
|
||||||
|
* - taskId missing on either side → block_task_unknown
|
||||||
|
* - taskId match + both have sessionId + sessionIds differ → block_session_mismatch
|
||||||
|
* - taskId match + one or both sides missing sessionId → reattach (reconcile)
|
||||||
|
* - taskId match + both have sessionId + sessionIds match → reattach (happy path)
|
||||||
|
*
|
||||||
* Exported for unit tests.
|
* Exported for unit tests.
|
||||||
*/
|
*/
|
||||||
export function isReattachableOrphan(
|
export function classifyOrphan(
|
||||||
job: k8s.V1Job,
|
job: k8s.V1Job,
|
||||||
expected: { agentId: string; taskId: string | null; sessionId: string | null },
|
expected: { taskId: string | null; sessionId: string | null },
|
||||||
): boolean {
|
): OrphanClassification {
|
||||||
if (!expected.taskId || !expected.sessionId) return false;
|
|
||||||
const labels = job.metadata?.labels ?? {};
|
const labels = job.metadata?.labels ?? {};
|
||||||
if (labels["paperclip.io/adapter-type"] !== "claude_k8s") return false;
|
const jobTaskId = labels["paperclip.io/task-id"] ?? null;
|
||||||
if (labels["paperclip.io/agent-id"] !== expected.agentId) return false;
|
const jobSessionId = labels["paperclip.io/session-id"] ?? null;
|
||||||
if (labels["paperclip.io/task-id"] !== expected.taskId) return false;
|
|
||||||
if (labels["paperclip.io/session-id"] !== expected.sessionId) return false;
|
// taskId missing on either side
|
||||||
const conditions = job.status?.conditions ?? [];
|
if (!expected.taskId || !jobTaskId) return "block_task_unknown";
|
||||||
const terminal = conditions.some(
|
|
||||||
(c) => (c.type === "Complete" || c.type === "Failed") && c.status === "True",
|
// taskId mismatch
|
||||||
);
|
if (expected.taskId !== jobTaskId) return "block_task_mismatch";
|
||||||
if (terminal) return false;
|
|
||||||
return true;
|
// taskId matches — check sessionId
|
||||||
|
if (expected.sessionId && jobSessionId && expected.sessionId !== jobSessionId) {
|
||||||
|
return "block_session_mismatch";
|
||||||
|
}
|
||||||
|
|
||||||
|
return "reattach";
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -525,6 +541,17 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
|
|||||||
// the current runId. When reattachOrphanedJobs is enabled and the orphan matches
|
// the current runId. When reattachOrphanedJobs is enabled and the orphan matches
|
||||||
// the current agent+task+session, we attach to it instead of deleting it (FAR-124).
|
// the current agent+task+session, we attach to it instead of deleting it (FAR-124).
|
||||||
const agentId = ctx.agent.id;
|
const agentId = ctx.agent.id;
|
||||||
|
const sanitizedAgentId = sanitizeLabelValue(agentId);
|
||||||
|
if (!sanitizedAgentId) {
|
||||||
|
await onLog("stderr", `[paperclip] Cannot create K8s Job: agent.id "${agentId}" produces no valid RFC 1123 label characters\n`);
|
||||||
|
return {
|
||||||
|
exitCode: null,
|
||||||
|
signal: null,
|
||||||
|
timedOut: false,
|
||||||
|
errorMessage: `Agent ID "${agentId}" cannot be sanitized to a valid Kubernetes label`,
|
||||||
|
errorCode: "k8s_agent_id_invalid",
|
||||||
|
};
|
||||||
|
}
|
||||||
const selfPod = await getSelfPodInfo(kubeconfigPath);
|
const selfPod = await getSelfPodInfo(kubeconfigPath);
|
||||||
const guardNamespace = asString(config.namespace, "") || selfPod.namespace;
|
const guardNamespace = asString(config.namespace, "") || selfPod.namespace;
|
||||||
const reattachOrphanedJobs = asBoolean(config.reattachOrphanedJobs, true);
|
const reattachOrphanedJobs = asBoolean(config.reattachOrphanedJobs, true);
|
||||||
@@ -538,7 +565,7 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
|
|||||||
const batchApi = getBatchApi(kubeconfigPath);
|
const batchApi = getBatchApi(kubeconfigPath);
|
||||||
const existing = await batchApi.listNamespacedJob({
|
const existing = await batchApi.listNamespacedJob({
|
||||||
namespace: guardNamespace,
|
namespace: guardNamespace,
|
||||||
labelSelector: `paperclip.io/agent-id=${agentId},paperclip.io/adapter-type=claude_k8s`,
|
labelSelector: `paperclip.io/agent-id=${sanitizedAgentId},paperclip.io/adapter-type=claude_k8s`,
|
||||||
});
|
});
|
||||||
const running = existing.items.filter(
|
const running = existing.items.filter(
|
||||||
(j) => !j.status?.conditions?.some((c) => (c.type === "Complete" || c.type === "Failed") && c.status === "True"),
|
(j) => !j.status?.conditions?.some((c) => (c.type === "Complete" || c.type === "Failed") && c.status === "True"),
|
||||||
@@ -553,45 +580,72 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
|
|||||||
(j) => (j.metadata?.labels?.["paperclip.io/run-id"] ?? "") === runId,
|
(j) => (j.metadata?.labels?.["paperclip.io/run-id"] ?? "") === runId,
|
||||||
);
|
);
|
||||||
|
|
||||||
// Pick the most recent reattachable orphan — same agent + task + session,
|
if (orphaned.length > 0) {
|
||||||
// not terminal. Only one target is chosen; any other orphans get
|
if (!reattachOrphanedJobs) {
|
||||||
// cleaned up as before.
|
// When reattach is disabled, block on any non-terminal orphan.
|
||||||
if (reattachOrphanedJobs && orphaned.length > 0) {
|
const names = orphaned.map((j) => j.metadata?.name).join(", ");
|
||||||
const candidates = orphaned
|
await onLog("stderr", `[paperclip] Concurrent run blocked: orphaned Job(s) running and reattach disabled: ${names}\n`);
|
||||||
.filter((j) =>
|
return {
|
||||||
isReattachableOrphan(j, {
|
exitCode: null,
|
||||||
agentId,
|
signal: null,
|
||||||
taskId: currentTaskLabel,
|
timedOut: false,
|
||||||
sessionId: currentSessionLabel,
|
errorMessage: `Concurrent run blocked: orphaned Job(s) still running for this agent (reattach disabled)`,
|
||||||
}),
|
errorCode: "k8s_concurrent_run_blocked",
|
||||||
)
|
|
||||||
.sort((a, b) => {
|
|
||||||
const at = new Date(a.metadata?.creationTimestamp ?? 0).getTime();
|
|
||||||
const bt = new Date(b.metadata?.creationTimestamp ?? 0).getTime();
|
|
||||||
return bt - at;
|
|
||||||
});
|
|
||||||
const chosen = candidates[0];
|
|
||||||
const chosenName = chosen?.metadata?.name;
|
|
||||||
if (chosen && chosenName) {
|
|
||||||
reattachTarget = {
|
|
||||||
jobName: chosenName,
|
|
||||||
namespace: chosen.metadata?.namespace ?? guardNamespace,
|
|
||||||
priorRunId: chosen.metadata?.labels?.["paperclip.io/run-id"] ?? "",
|
|
||||||
image: chosen.spec?.template?.spec?.containers?.[0]?.image ?? "unknown",
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
const toDelete = orphaned.filter(
|
// Apply the decision matrix to each orphan, newest-first. The first
|
||||||
(j) => !reattachTarget || j.metadata?.name !== reattachTarget.jobName,
|
// reattachable orphan becomes the target; any block classification
|
||||||
);
|
// stops the new run immediately. Orphans are never deleted here —
|
||||||
if (toDelete.length > 0) {
|
// terminal ones are cleaned up by TTL; live mismatches should not be
|
||||||
const orphanNames = toDelete.map((j) => j.metadata?.name).join(", ");
|
// killed because they may still be doing real work.
|
||||||
await onLog("stdout", `[paperclip] Cleaning up ${toDelete.length} orphaned K8s Job(s) from previous run(s): ${orphanNames}\n`);
|
const sortedOrphans = [...orphaned].sort((a, b) => {
|
||||||
for (const j of toDelete) {
|
const at = new Date(a.metadata?.creationTimestamp ?? 0).getTime();
|
||||||
const name = j.metadata?.name;
|
const bt = new Date(b.metadata?.creationTimestamp ?? 0).getTime();
|
||||||
if (name) {
|
return bt - at;
|
||||||
await cleanupJob(guardNamespace, name, onLog, kubeconfigPath);
|
});
|
||||||
|
for (const orphan of sortedOrphans) {
|
||||||
|
const classification = classifyOrphan(orphan, {
|
||||||
|
taskId: currentTaskLabel,
|
||||||
|
sessionId: currentSessionLabel,
|
||||||
|
});
|
||||||
|
const orphanName = orphan.metadata?.name ?? "unknown";
|
||||||
|
if (classification === "reattach") {
|
||||||
|
if (!reattachTarget) {
|
||||||
|
reattachTarget = {
|
||||||
|
jobName: orphanName,
|
||||||
|
namespace: orphan.metadata?.namespace ?? guardNamespace,
|
||||||
|
priorRunId: orphan.metadata?.labels?.["paperclip.io/run-id"] ?? "",
|
||||||
|
image: orphan.spec?.template?.spec?.containers?.[0]?.image ?? "unknown",
|
||||||
|
};
|
||||||
|
}
|
||||||
|
} else if (classification === "block_task_unknown") {
|
||||||
|
await onLog("stderr", `[paperclip] Blocked: orphaned Job ${orphanName} has missing task label — cannot safely reattach\n`);
|
||||||
|
return {
|
||||||
|
exitCode: null,
|
||||||
|
signal: null,
|
||||||
|
timedOut: false,
|
||||||
|
errorMessage: `Concurrent run blocked: orphaned Job ${orphanName} has unknown task context`,
|
||||||
|
errorCode: "k8s_orphan_task_unknown",
|
||||||
|
};
|
||||||
|
} else if (classification === "block_task_mismatch") {
|
||||||
|
await onLog("stderr", `[paperclip] Blocked: orphaned Job ${orphanName} belongs to a different task\n`);
|
||||||
|
return {
|
||||||
|
exitCode: null,
|
||||||
|
signal: null,
|
||||||
|
timedOut: false,
|
||||||
|
errorMessage: `Concurrent run blocked: orphaned Job ${orphanName} is running a different task`,
|
||||||
|
errorCode: "k8s_concurrent_run_blocked",
|
||||||
|
};
|
||||||
|
} else if (classification === "block_session_mismatch") {
|
||||||
|
await onLog("stderr", `[paperclip] Blocked: orphaned Job ${orphanName} has a different session\n`);
|
||||||
|
return {
|
||||||
|
exitCode: null,
|
||||||
|
signal: null,
|
||||||
|
timedOut: false,
|
||||||
|
errorMessage: `Concurrent run blocked: orphaned Job ${orphanName} has a mismatched session`,
|
||||||
|
errorCode: "k8s_orphan_session_mismatch",
|
||||||
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -690,6 +744,26 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
|
|||||||
}
|
}
|
||||||
|
|
||||||
await onLog("stdout", `[paperclip] Reattaching to in-flight K8s Job ${jobName} in namespace ${namespace} (prior run ${reattachTarget.priorRunId || "unknown"})\n`);
|
await onLog("stdout", `[paperclip] Reattaching to in-flight K8s Job ${jobName} in namespace ${namespace} (prior run ${reattachTarget.priorRunId || "unknown"})\n`);
|
||||||
|
|
||||||
|
// Relabel the reattached Job with the current run-id (and session-id if
|
||||||
|
// available) so the next concurrency guard sees it as owned by this run
|
||||||
|
// rather than an orphan from the prior run.
|
||||||
|
const labelPatch: Array<{ op: "add" | "replace"; path: string; value: string }> = [
|
||||||
|
{ op: "replace", path: "/metadata/labels/paperclip.io~1run-id", value: runId },
|
||||||
|
];
|
||||||
|
if (currentSessionLabel) {
|
||||||
|
labelPatch.push({ op: "replace", path: "/metadata/labels/paperclip.io~1session-id", value: currentSessionLabel });
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
await batchApi.patchNamespacedJob({
|
||||||
|
name: jobName,
|
||||||
|
namespace,
|
||||||
|
body: labelPatch,
|
||||||
|
});
|
||||||
|
} catch (err) {
|
||||||
|
const msg = err instanceof Error ? err.message : String(err);
|
||||||
|
await onLog("stderr", `[paperclip] Warning: failed to relabel reattached Job ${jobName}: ${msg}\n`);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// Build Job manifest
|
// Build Job manifest
|
||||||
const built = buildJobManifest({ ctx, selfPod, promptBundle });
|
const built = buildJobManifest({ ctx, selfPod, promptBundle });
|
||||||
@@ -700,6 +774,9 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
|
|||||||
const claudeArgs = built.claudeArgs;
|
const claudeArgs = built.claudeArgs;
|
||||||
const promptMetrics = built.promptMetrics;
|
const promptMetrics = built.promptMetrics;
|
||||||
promptSecret = built.promptSecret;
|
promptSecret = built.promptSecret;
|
||||||
|
if (built.skippedLabels.length > 0) {
|
||||||
|
await onLog("stderr", `[paperclip] Warning: skipped ${built.skippedLabels.length} extra label(s) with reserved prefix: ${built.skippedLabels.join(", ")}\n`);
|
||||||
|
}
|
||||||
|
|
||||||
// Report invocation metadata
|
// Report invocation metadata
|
||||||
if (onMeta) {
|
if (onMeta) {
|
||||||
|
|||||||
@@ -166,6 +166,75 @@ describe("buildJobManifest", () => {
|
|||||||
expect(job.metadata?.labels?.["paperclip.io/task-id"]).toBeUndefined();
|
expect(job.metadata?.labels?.["paperclip.io/task-id"]).toBeUndefined();
|
||||||
expect(job.metadata?.labels?.["paperclip.io/session-id"]).toBeUndefined();
|
expect(job.metadata?.labels?.["paperclip.io/session-id"]).toBeUndefined();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("drops user label with paperclip.io/ prefix", () => {
|
||||||
|
ctx.config = { labels: { "paperclip.io/run-id": "hijacked" } };
|
||||||
|
const { job, skippedLabels } = buildJobManifest({ ctx, selfPod });
|
||||||
|
expect(job.metadata?.labels?.["paperclip.io/run-id"]).not.toBe("hijacked");
|
||||||
|
expect(skippedLabels).toContain("paperclip.io/run-id");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("drops user label with app.kubernetes.io/ prefix", () => {
|
||||||
|
ctx.config = { labels: { "app.kubernetes.io/managed-by": "attacker" } };
|
||||||
|
const { job, skippedLabels } = buildJobManifest({ ctx, selfPod });
|
||||||
|
expect(job.metadata?.labels?.["app.kubernetes.io/managed-by"]).toBe("paperclip");
|
||||||
|
expect(skippedLabels).toContain("app.kubernetes.io/managed-by");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("passes through user label without reserved prefix", () => {
|
||||||
|
ctx.config = { labels: { "custom.io/team": "platform" } };
|
||||||
|
const { job, skippedLabels } = buildJobManifest({ ctx, selfPod });
|
||||||
|
expect(job.metadata?.labels?.["custom.io/team"]).toBe("platform");
|
||||||
|
expect(skippedLabels).not.toContain("custom.io/team");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("populates skippedLabels with all dropped keys", () => {
|
||||||
|
ctx.config = {
|
||||||
|
labels: {
|
||||||
|
"paperclip.io/agent-id": "x",
|
||||||
|
"app.kubernetes.io/component": "y",
|
||||||
|
"safe": "z",
|
||||||
|
},
|
||||||
|
};
|
||||||
|
const { skippedLabels } = buildJobManifest({ ctx, selfPod });
|
||||||
|
expect(skippedLabels).toHaveLength(2);
|
||||||
|
expect(skippedLabels).toContain("paperclip.io/agent-id");
|
||||||
|
expect(skippedLabels).toContain("app.kubernetes.io/component");
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("system label sanitization (N4)", () => {
|
||||||
|
it("sanitizes agent.id with @ to a valid RFC 1123 label", () => {
|
||||||
|
ctx.agent.id = "user@example.com";
|
||||||
|
const { job } = buildJobManifest({ ctx, selfPod });
|
||||||
|
const label = job.metadata?.labels?.["paperclip.io/agent-id"];
|
||||||
|
expect(label).toMatch(/^[a-zA-Z0-9]([a-zA-Z0-9._-]*[a-zA-Z0-9])?$/);
|
||||||
|
expect(label).not.toContain("@");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("sanitizes agent.id with spaces to a valid RFC 1123 label", () => {
|
||||||
|
ctx.agent.id = "my agent id";
|
||||||
|
const { job } = buildJobManifest({ ctx, selfPod });
|
||||||
|
const label = job.metadata?.labels?.["paperclip.io/agent-id"];
|
||||||
|
expect(label).toMatch(/^[a-zA-Z0-9]([a-zA-Z0-9._-]*[a-zA-Z0-9])?$/);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("omits paperclip.io/run-id when sanitized value is null (all-invalid runId)", () => {
|
||||||
|
// inject an all-special-chars runId via context override — buildJobManifest
|
||||||
|
// uses ctx.runId directly
|
||||||
|
const badCtx = makeCtx({ runId: "@@@" });
|
||||||
|
const { job, skippedLabels } = buildJobManifest({ ctx: badCtx, selfPod });
|
||||||
|
expect(job.metadata?.labels?.["paperclip.io/run-id"]).toBeUndefined();
|
||||||
|
expect(skippedLabels).toContain("paperclip.io/run-id");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("selector matches sanitized agent-id label", () => {
|
||||||
|
ctx.agent.id = "Agent@Test";
|
||||||
|
const { job } = buildJobManifest({ ctx, selfPod });
|
||||||
|
const agentLabel = job.metadata?.labels?.["paperclip.io/agent-id"];
|
||||||
|
// the label should equal what sanitizeLabelValue produces
|
||||||
|
expect(agentLabel).toBe("AgentTest");
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("annotations", () => {
|
describe("annotations", () => {
|
||||||
|
|||||||
@@ -200,6 +200,8 @@ export interface JobBuildResult {
|
|||||||
/** Non-null when the prompt is too large for an env var and must be
|
/** Non-null when the prompt is too large for an env var and must be
|
||||||
* staged as a K8s Secret before creating the Job. */
|
* staged as a K8s Secret before creating the Job. */
|
||||||
promptSecret: PromptSecret | null;
|
promptSecret: PromptSecret | null;
|
||||||
|
/** User-supplied extra labels that were dropped because they used a reserved prefix. */
|
||||||
|
skippedLabels: string[];
|
||||||
}
|
}
|
||||||
|
|
||||||
function sanitizeForK8sName(value: string, maxLen = 16): string {
|
function sanitizeForK8sName(value: string, maxLen = 16): string {
|
||||||
@@ -442,15 +444,22 @@ export function buildJobManifest(input: JobBuildInput): JobBuildResult {
|
|||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
// Labels
|
// Labels — system identifiers must pass RFC 1123 label value format.
|
||||||
|
const sanitizedAgentId = sanitizeLabelValue(agent.id);
|
||||||
|
const sanitizedRunId = sanitizeLabelValue(runId);
|
||||||
|
const sanitizedCompanyId = sanitizeLabelValue(agent.companyId);
|
||||||
|
const skippedLabels: string[] = [];
|
||||||
|
if (!sanitizedRunId) skippedLabels.push("paperclip.io/run-id");
|
||||||
|
if (!sanitizedCompanyId) skippedLabels.push("paperclip.io/company-id");
|
||||||
const labels: Record<string, string> = {
|
const labels: Record<string, string> = {
|
||||||
"app.kubernetes.io/managed-by": "paperclip",
|
"app.kubernetes.io/managed-by": "paperclip",
|
||||||
"app.kubernetes.io/component": "agent-job",
|
"app.kubernetes.io/component": "agent-job",
|
||||||
"paperclip.io/agent-id": agent.id,
|
// sanitizedAgentId null-check is enforced in execute.ts before Job creation
|
||||||
"paperclip.io/run-id": runId,
|
"paperclip.io/agent-id": sanitizedAgentId ?? agent.id,
|
||||||
"paperclip.io/company-id": agent.companyId,
|
|
||||||
"paperclip.io/adapter-type": "claude_k8s",
|
"paperclip.io/adapter-type": "claude_k8s",
|
||||||
};
|
};
|
||||||
|
if (sanitizedRunId) labels["paperclip.io/run-id"] = sanitizedRunId;
|
||||||
|
if (sanitizedCompanyId) labels["paperclip.io/company-id"] = sanitizedCompanyId;
|
||||||
// Reattach-target labels: let a future execute() identify this Job as the
|
// Reattach-target labels: let a future execute() identify this Job as the
|
||||||
// continuation of the same logical unit of work (same task + same resume
|
// continuation of the same logical unit of work (same task + same resume
|
||||||
// session) so it can attach to the running pod across a Paperclip restart
|
// session) so it can attach to the running pod across a Paperclip restart
|
||||||
@@ -461,7 +470,11 @@ export function buildJobManifest(input: JobBuildInput): JobBuildResult {
|
|||||||
const sessionLabel = runtimeSessionId ? sanitizeLabelValue(runtimeSessionId) : null;
|
const sessionLabel = runtimeSessionId ? sanitizeLabelValue(runtimeSessionId) : null;
|
||||||
if (sessionLabel) labels["paperclip.io/session-id"] = sessionLabel;
|
if (sessionLabel) labels["paperclip.io/session-id"] = sessionLabel;
|
||||||
for (const [key, value] of Object.entries(extraLabels)) {
|
for (const [key, value] of Object.entries(extraLabels)) {
|
||||||
labels[key] = value;
|
if (key.startsWith("paperclip.io/") || key.startsWith("app.kubernetes.io/")) {
|
||||||
|
skippedLabels.push(key);
|
||||||
|
} else {
|
||||||
|
labels[key] = value;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Volumes
|
// Volumes
|
||||||
@@ -628,5 +641,5 @@ export function buildJobManifest(input: JobBuildInput): JobBuildResult {
|
|||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
return { job, jobName, namespace, prompt, claudeArgs, promptMetrics, promptSecret };
|
return { job, jobName, namespace, prompt, claudeArgs, promptMetrics, promptSecret, skippedLabels };
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,49 @@
|
|||||||
|
import { describe, it, expect, vi } from "vitest";
|
||||||
|
import os from "node:os";
|
||||||
|
import path from "node:path";
|
||||||
|
import { prepareClaudePromptBundle } from "./prompt-cache.js";
|
||||||
|
|
||||||
|
const onLog = vi.fn();
|
||||||
|
|
||||||
|
describe("prepareClaudePromptBundle path traversal validation", () => {
|
||||||
|
const validArgs = {
|
||||||
|
skills: [],
|
||||||
|
instructionsContents: null,
|
||||||
|
onLog,
|
||||||
|
};
|
||||||
|
|
||||||
|
it("rejects companyId containing ..", async () => {
|
||||||
|
await expect(prepareClaudePromptBundle({ ...validArgs, companyId: ".." })).rejects.toThrow(/companyId/);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("rejects companyId containing ../x", async () => {
|
||||||
|
await expect(prepareClaudePromptBundle({ ...validArgs, companyId: "../x" })).rejects.toThrow(/companyId/);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("rejects companyId containing /", async () => {
|
||||||
|
await expect(prepareClaudePromptBundle({ ...validArgs, companyId: "a/b" })).rejects.toThrow(/companyId/);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("rejects companyId containing backslash", async () => {
|
||||||
|
await expect(prepareClaudePromptBundle({ ...validArgs, companyId: "a\\b" })).rejects.toThrow(/companyId/);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("rejects companyId containing null byte", async () => {
|
||||||
|
await expect(prepareClaudePromptBundle({ ...validArgs, companyId: "a\0b" })).rejects.toThrow(/companyId/);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("rejects empty companyId", async () => {
|
||||||
|
await expect(prepareClaudePromptBundle({ ...validArgs, companyId: "" })).rejects.toThrow(/companyId/);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("rejects whitespace-only companyId", async () => {
|
||||||
|
await expect(prepareClaudePromptBundle({ ...validArgs, companyId: " " })).rejects.toThrow(/companyId/);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("accepts a valid companyId", async () => {
|
||||||
|
vi.stubEnv("PAPERCLIP_HOME", path.join(os.tmpdir(), `prompt-cache-test-${process.pid}`));
|
||||||
|
const result = await prepareClaudePromptBundle({ ...validArgs, companyId: "acme-co" });
|
||||||
|
expect(result.rootDir).toContain("acme-co");
|
||||||
|
vi.unstubAllEnvs();
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -21,6 +21,13 @@ export interface ClaudePromptBundle {
|
|||||||
|
|
||||||
const DEFAULT_PAPERCLIP_INSTANCE_ID = "default";
|
const DEFAULT_PAPERCLIP_INSTANCE_ID = "default";
|
||||||
|
|
||||||
|
function validatePathComponent(value: string, fieldName: string): void {
|
||||||
|
if (value.trim().length === 0) throw new Error(`Invalid ${fieldName}: must not be empty`);
|
||||||
|
if (value.includes("/") || value.includes("\\")) throw new Error(`Invalid ${fieldName}: must not contain path separators`);
|
||||||
|
if (value.includes("..")) throw new Error(`Invalid ${fieldName}: must not contain ".."`);
|
||||||
|
if (value.includes("\0")) throw new Error(`Invalid ${fieldName}: must not contain null bytes`);
|
||||||
|
}
|
||||||
|
|
||||||
function resolveManagedClaudePromptCacheRoot(companyId: string): string {
|
function resolveManagedClaudePromptCacheRoot(companyId: string): string {
|
||||||
const paperclipHome =
|
const paperclipHome =
|
||||||
(typeof process.env.PAPERCLIP_HOME === "string" && process.env.PAPERCLIP_HOME.trim().length > 0
|
(typeof process.env.PAPERCLIP_HOME === "string" && process.env.PAPERCLIP_HOME.trim().length > 0
|
||||||
@@ -31,6 +38,8 @@ function resolveManagedClaudePromptCacheRoot(companyId: string): string {
|
|||||||
(typeof process.env.PAPERCLIP_INSTANCE_ID === "string" && process.env.PAPERCLIP_INSTANCE_ID.trim().length > 0
|
(typeof process.env.PAPERCLIP_INSTANCE_ID === "string" && process.env.PAPERCLIP_INSTANCE_ID.trim().length > 0
|
||||||
? process.env.PAPERCLIP_INSTANCE_ID.trim()
|
? process.env.PAPERCLIP_INSTANCE_ID.trim()
|
||||||
: null) ?? DEFAULT_PAPERCLIP_INSTANCE_ID;
|
: null) ?? DEFAULT_PAPERCLIP_INSTANCE_ID;
|
||||||
|
validatePathComponent(companyId, "companyId");
|
||||||
|
validatePathComponent(instanceId, "instanceId");
|
||||||
return path.resolve(paperclipHome, "instances", instanceId, "companies", companyId, "claude-prompt-cache");
|
return path.resolve(paperclipHome, "instances", instanceId, "companies", companyId, "claude-prompt-cache");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user