chore: update lockfile

Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
Gandalf the Greybeard
2026-04-23 22:45:31 +00:00
parent f41ae818ef
commit 4210f51937
9 changed files with 535 additions and 111 deletions
+33
View File
@@ -1,3 +1,36 @@
import type { AdapterExecutionContext, AdapterExecutionResult } from "@paperclipai/adapter-utils"; import type { AdapterExecutionContext, AdapterExecutionResult } from "@paperclipai/adapter-utils";
import type * as k8s from "@kubernetes/client-node";
/**
* Detect a Kubernetes 404 (Not Found) error from @kubernetes/client-node.
* Works for both v0.x (response.statusCode) and v1.0+ (response.status, message).
* Exported for unit tests.
*/
export declare function isK8s404(err: unknown): boolean;
/**
* Build the error message when Claude's stdout contains no result event.
* Skips system/init event lines so the UI doesn't display the raw init JSON.
* Exported for unit tests.
*/
export declare function buildPartialRunError(exitCode: number | null, model: string, stdout: string): string;
/**
* Evaluate an orphaned K8s Job (one whose `paperclip.io/run-id` label does
* not match the current runId) as a potential reattach target. A Job is
* reattachable when it belongs to the same agent, same task, and same resume
* session as the current run — meaning the previous Paperclip instance was
* mid-stream on the exact piece of work this new run was dispatched to do.
* Exported for unit tests.
*/
export declare function isReattachableOrphan(job: k8s.V1Job, expected: {
agentId: string;
taskId: string | null;
sessionId: string | null;
}): boolean;
/**
* Build an error message for a pod that reached phase=Failed before or
* instead of streaming logs. Includes the claude container's terminated exit
* code and reason when available so operators can diagnose crashes without
* needing kubectl. Exported for unit tests.
*/
export declare function describePodTerminatedError(podName: string, phase: string, containerStatuses: k8s.V1ContainerStatus[]): string;
export declare function execute(ctx: AdapterExecutionContext): Promise<AdapterExecutionResult>; export declare function execute(ctx: AdapterExecutionContext): Promise<AdapterExecutionResult>;
//# sourceMappingURL=execute.d.ts.map //# sourceMappingURL=execute.d.ts.map
+1 -1
View File
@@ -1 +1 @@
{"version":3,"file":"execute.d.ts","sourceRoot":"","sources":["../../src/server/execute.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,uBAAuB,EAAE,sBAAsB,EAAE,MAAM,4BAA4B,CAAC;AAiUlG,wBAAsB,OAAO,CAAC,GAAG,EAAE,uBAAuB,GAAG,OAAO,CAAC,sBAAsB,CAAC,CAoc3F"} {"version":3,"file":"execute.d.ts","sourceRoot":"","sources":["../../src/server/execute.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,uBAAuB,EAAE,sBAAsB,EAAE,MAAM,4BAA4B,CAAC;AAWlG,OAAO,KAAK,KAAK,GAAG,MAAM,yBAAyB,CAAC;AAYpD;;;;GAIG;AACH,wBAAgB,QAAQ,CAAC,GAAG,EAAE,OAAO,GAAG,OAAO,CAO9C;AAED;;;;GAIG;AACH,wBAAgB,oBAAoB,CAClC,QAAQ,EAAE,MAAM,GAAG,IAAI,EACvB,KAAK,EAAE,MAAM,EACb,MAAM,EAAE,MAAM,GACb,MAAM,CA4BR;AAED;;;;;;;GAOG;AACH,wBAAgB,oBAAoB,CAClC,GAAG,EAAE,GAAG,CAAC,KAAK,EACd,QAAQ,EAAE;IAAE,OAAO,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IAAC,SAAS,EAAE,MAAM,GAAG,IAAI,CAAA;CAAE,GAC7E,OAAO,CAaT;AAED;;;;;GAKG;AACH,wBAAgB,0BAA0B,CACxC,OAAO,EAAE,MAAM,EACf,KAAK,EAAE,MAAM,EACb,iBAAiB,EAAE,GAAG,CAAC,iBAAiB,EAAE,GACzC,MAAM,CASR;AAkWD,wBAAsB,OAAO,CAAC,GAAG,EAAE,uBAAuB,GAAG,OAAO,CAAC,sBAAsB,CAAC,CAkkB3F"}
+370 -104
View File
@@ -1,12 +1,110 @@
import { asString, asNumber, asBoolean, parseObject } from "@paperclipai/adapter-utils/server-utils"; import { asString, asNumber, asBoolean, parseObject } from "@paperclipai/adapter-utils/server-utils";
import { parseClaudeStreamJson, describeClaudeFailure, isClaudeMaxTurnsResult, isClaudeUnknownSessionError, } from "./parse.js"; import { parseClaudeStreamJson, describeClaudeFailure, isClaudeMaxTurnsResult, isClaudeUnknownSessionError, } from "./parse.js";
import { getSelfPodInfo, getBatchApi, getCoreApi, getLogApi } from "./k8s-client.js"; import { getSelfPodInfo, getBatchApi, getCoreApi, getLogApi } from "./k8s-client.js";
import { buildJobManifest } from "./job-manifest.js"; import { buildJobManifest, sanitizeLabelValue } from "./job-manifest.js";
import { LogLineDedupFilter } from "./log-dedup.js";
import { Writable } from "node:stream"; import { Writable } from "node:stream";
const POLL_INTERVAL_MS = 2000; const POLL_INTERVAL_MS = 2000;
const KEEPALIVE_INTERVAL_MS = 15_000; const KEEPALIVE_INTERVAL_MS = 15_000;
const LOG_STREAM_RECONNECT_DELAY_MS = 3_000; const LOG_STREAM_RECONNECT_DELAY_MS = 3_000;
const MAX_LOG_RECONNECT_ATTEMPTS = 50; const MAX_LOG_RECONNECT_ATTEMPTS = 50;
// How long to keep refreshing onSpawn after the Job reaches a terminal state.
// Covers the cleanup path (delete job, parse stdout) so a slow K8s API call
// doesn't trip the 5-minute reaper staleness window.
const POST_TERMINAL_KEEPALIVE_MS = 90_000;
/**
* Detect a Kubernetes 404 (Not Found) error from @kubernetes/client-node.
* Works for both v0.x (response.statusCode) and v1.0+ (response.status, message).
* Exported for unit tests.
*/
export function isK8s404(err) {
if (!(err instanceof Error))
return false;
const e = err;
const resp = e.response;
if (resp?.statusCode === 404 || resp?.status === 404)
return true;
if (e.statusCode === 404)
return true;
return /HTTP-Code:\s*404\b/.test(err.message);
}
/**
* Build the error message when Claude's stdout contains no result event.
* Skips system/init event lines so the UI doesn't display the raw init JSON.
* Exported for unit tests.
*/
export function buildPartialRunError(exitCode, model, stdout) {
if (exitCode === 0)
return "Failed to parse Claude JSON output";
// Walk stdout lines, skip system events, return the first real content line.
const firstContentLine = stdout.split(/\r?\n/)
.map((l) => l.trim())
.find((l) => {
if (!l)
return false;
try {
const obj = JSON.parse(l);
if (typeof obj === "object" && obj !== null && obj.type === "system")
return false;
}
catch {
// not JSON — treat as content
}
return true;
}) ?? "";
// If we only have system/init events and nothing else, surface the model
// name so the operator can diagnose missing credentials or unsupported model.
const initOnlyOutput = stdout.trim() !== "" && model !== "" && !firstContentLine;
if (initOnlyOutput) {
const modelHint = model ? ` (model: ${model})` : "";
return `Claude started but did not produce a result${modelHint} — check API credentials, model support, and adapter config`;
}
return firstContentLine
? `Claude exited with code ${exitCode ?? -1}: ${firstContentLine}`
: `Claude exited with code ${exitCode ?? -1}`;
}
/**
* Evaluate an orphaned K8s Job (one whose `paperclip.io/run-id` label does
* not match the current runId) as a potential reattach target. A Job is
* reattachable when it belongs to the same agent, same task, and same resume
* session as the current run — meaning the previous Paperclip instance was
* mid-stream on the exact piece of work this new run was dispatched to do.
* Exported for unit tests.
*/
export function isReattachableOrphan(job, expected) {
if (!expected.taskId || !expected.sessionId)
return false;
const labels = job.metadata?.labels ?? {};
if (labels["paperclip.io/adapter-type"] !== "claude_k8s")
return false;
if (labels["paperclip.io/agent-id"] !== expected.agentId)
return false;
if (labels["paperclip.io/task-id"] !== expected.taskId)
return false;
if (labels["paperclip.io/session-id"] !== expected.sessionId)
return false;
const conditions = job.status?.conditions ?? [];
const terminal = conditions.some((c) => (c.type === "Complete" || c.type === "Failed") && c.status === "True");
if (terminal)
return false;
return true;
}
/**
* Build an error message for a pod that reached phase=Failed before or
* instead of streaming logs. Includes the claude container's terminated exit
* code and reason when available so operators can diagnose crashes without
* needing kubectl. Exported for unit tests.
*/
export function describePodTerminatedError(podName, phase, containerStatuses) {
const mainCs = containerStatuses.find((cs) => cs.name === "claude");
const terminated = mainCs?.state?.terminated;
if (terminated) {
const code = terminated.exitCode ?? "unknown";
const reason = terminated.reason ?? terminated.message ?? "no reason";
return `Pod ${podName} reached phase=${phase}: claude exited ${code} (${reason})`;
}
return `Pod ${podName} reached phase=${phase}`;
}
/** /**
* Wait for the Job's pod to reach a terminal or running state. * Wait for the Job's pod to reach a terminal or running state.
* Returns the pod name once logs can be streamed, or throws on failure. * Returns the pod name once logs can be streamed, or throws on failure.
@@ -52,14 +150,22 @@ async function waitForPod(namespace, jobName, timeoutMs, onLog, kubeconfigPath)
details.push(`${cs.name}: waiting (${cs.state.waiting.reason ?? "unknown"})`); details.push(`${cs.name}: waiting (${cs.state.waiting.reason ?? "unknown"})`);
else if (cs.state?.running) else if (cs.state?.running)
details.push(`${cs.name}: running`); details.push(`${cs.name}: running`);
else if (cs.state?.terminated)
details.push(`${cs.name}: terminated (exit ${cs.state.terminated.exitCode ?? "?"}, ${cs.state.terminated.reason ?? "no reason"})`);
} }
await onLog("stdout", `[paperclip] Pod ${podName}: ${details.join(", ")}\n`); await onLog("stdout", `[paperclip] Pod ${podName}: ${details.join(", ")}\n`);
lastStatus = statusKey; lastStatus = statusKey;
} }
// Ready to stream logs // Ready to stream logs
if (phase === "Running" || phase === "Succeeded" || phase === "Failed") { if (phase === "Running" || phase === "Succeeded") {
return podName; return podName;
} }
// phase=Failed means the pod crashed before we could stream logs.
// Throwing here routes the caller into the error path with a structured
// message instead of entering the log-streaming path with a dead pod.
if (phase === "Failed") {
throw new Error(describePodTerminatedError(podName, phase, containerStatuses));
}
// Init containers done + main running (phase may still say Pending briefly) // Init containers done + main running (phase may still say Pending briefly)
const allInitsDone = initStatuses.length > 0 && initStatuses.every((s) => s.state?.terminated?.exitCode === 0); const allInitsDone = initStatuses.length > 0 && initStatuses.every((s) => s.state?.terminated?.exitCode === 0);
const mainRunning = containerStatuses.some((s) => s.state?.running); const mainRunning = containerStatuses.some((s) => s.state?.running);
@@ -104,16 +210,32 @@ async function waitForPod(namespace, jobName, timeoutMs, onLog, kubeconfigPath)
* Stream pod logs once via follow. Returns accumulated stdout when the * Stream pod logs once via follow. Returns accumulated stdout when the
* stream ends (container exit, API disconnect, or abort signal). * stream ends (container exit, API disconnect, or abort signal).
*/ */
async function streamPodLogsOnce(namespace, podName, onLog, kubeconfigPath, sinceSeconds) { async function streamPodLogsOnce(namespace, podName, onLog, kubeconfigPath, sinceSeconds, dedup, stopSignal) {
const logApi = getLogApi(kubeconfigPath); const logApi = getLogApi(kubeconfigPath);
const chunks = []; const chunks = [];
const writable = new Writable({ const writable = new Writable({
write(chunk, _encoding, callback) { write(chunk, _encoding, callback) {
const text = chunk.toString("utf-8"); const text = chunk.toString("utf-8");
chunks.push(text); chunks.push(text);
void onLog("stdout", text).then(() => callback(), callback); const emitted = dedup ? dedup.filter(text) : text;
if (!emitted) {
callback();
return;
}
void onLog("stdout", emitted).then(() => callback(), callback);
}, },
}); });
// When the job completion signal fires, destroy the writable to abort the
// in-flight follow stream. Without this, logApi.log can hang indefinitely
// when the pod terminates without closing the HTTP connection cleanly.
let stopPoller = null;
if (stopSignal) {
stopPoller = setInterval(() => {
if (stopSignal.stopped && !writable.destroyed) {
writable.destroy();
}
}, 200);
}
try { try {
await logApi.log(namespace, podName, "claude", writable, { await logApi.log(namespace, podName, "claude", writable, {
follow: true, follow: true,
@@ -122,8 +244,12 @@ async function streamPodLogsOnce(namespace, podName, onLog, kubeconfigPath, sinc
}); });
} }
catch { catch {
// follow may fail if the container already exited or the API // follow may fail if the container already exited, the API connection
// connection dropped — not fatal, caller decides whether to retry. // dropped, or we aborted via writable.destroy() — not fatal.
}
finally {
if (stopPoller)
clearInterval(stopPoller);
} }
return chunks.join(""); return chunks.join("");
} }
@@ -143,6 +269,9 @@ async function streamPodLogs(namespace, podName, onLog, kubeconfigPath, stopSign
// reconnects use a tight window instead of an ever-growing one anchored // reconnects use a tight window instead of an ever-growing one anchored
// at stream start. This is the primary fix for FAR-105 duplicative logs. // at stream start. This is the primary fix for FAR-105 duplicative logs.
let lastLogReceivedAt = Math.floor(Date.now() / 1000); let lastLogReceivedAt = Math.floor(Date.now() / 1000);
// Shared across reconnects so replayed lines inside the `sinceSeconds`
// overlap window are dropped before they reach the streaming UI (FAR-123).
const dedup = new LogLineDedupFilter();
while (!stopSignal?.stopped) { while (!stopSignal?.stopped) {
if (attempt >= MAX_LOG_RECONNECT_ATTEMPTS) { if (attempt >= MAX_LOG_RECONNECT_ATTEMPTS) {
await onLog("stderr", `[paperclip] Log stream: max reconnect attempts (${MAX_LOG_RECONNECT_ATTEMPTS}) reached — giving up.\n`); await onLog("stderr", `[paperclip] Log stream: max reconnect attempts (${MAX_LOG_RECONNECT_ATTEMPTS}) reached — giving up.\n`);
@@ -158,7 +287,7 @@ async function streamPodLogs(namespace, podName, onLog, kubeconfigPath, stopSign
await onLog("stdout", `[paperclip] Log stream disconnected — reconnecting (attempt ${attempt}/${MAX_LOG_RECONNECT_ATTEMPTS})...\n`); await onLog("stdout", `[paperclip] Log stream disconnected — reconnecting (attempt ${attempt}/${MAX_LOG_RECONNECT_ATTEMPTS})...\n`);
} }
const preStreamTs = Math.floor(Date.now() / 1000); const preStreamTs = Math.floor(Date.now() / 1000);
const result = await streamPodLogsOnce(namespace, podName, onLog, kubeconfigPath, sinceSeconds); const result = await streamPodLogsOnce(namespace, podName, onLog, kubeconfigPath, sinceSeconds, dedup, stopSignal);
if (result) { if (result) {
allChunks.push(result); allChunks.push(result);
// Update last-received timestamp to now (the stream just ended, // Update last-received timestamp to now (the stream just ended,
@@ -177,6 +306,11 @@ async function streamPodLogs(namespace, podName, onLog, kubeconfigPath, stopSign
// Brief pause before reconnecting to avoid tight loops. // Brief pause before reconnecting to avoid tight loops.
await new Promise((resolve) => setTimeout(resolve, LOG_STREAM_RECONNECT_DELAY_MS)); await new Promise((resolve) => setTimeout(resolve, LOG_STREAM_RECONNECT_DELAY_MS));
} }
// Flush any buffered partial line so the final assistant/result chunk
// isn't dropped when the stream ends mid-line.
const tail = dedup.flush();
if (tail)
await onLog("stdout", tail);
return allChunks.join(""); return allChunks.join("");
} }
/** /**
@@ -199,13 +333,27 @@ async function readPodLogs(namespace, podName, kubeconfigPath) {
} }
/** /**
* Wait for the Job to reach a terminal state (Complete or Failed). * Wait for the Job to reach a terminal state (Complete or Failed).
* Returns the Job's final status. * Returns the Job's final status. A 404 (job deleted by TTL or externally)
* is treated as a soft terminal: succeeded=false, timedOut=false, jobGone=true.
* The caller should log this and fall through to stdout parsing.
*/ */
async function waitForJobCompletion(namespace, jobName, timeoutMs, kubeconfigPath) { async function waitForJobCompletion(namespace, jobName, timeoutMs, kubeconfigPath) {
const batchApi = getBatchApi(kubeconfigPath); const batchApi = getBatchApi(kubeconfigPath);
const deadline = timeoutMs > 0 ? Date.now() + timeoutMs : 0; const deadline = timeoutMs > 0 ? Date.now() + timeoutMs : 0;
while (deadline === 0 || Date.now() < deadline) { while (deadline === 0 || Date.now() < deadline) {
const job = await batchApi.readNamespacedJob({ name: jobName, namespace }); let job;
try {
job = await batchApi.readNamespacedJob({ name: jobName, namespace });
}
catch (err) {
if (isK8s404(err)) {
// Job was deleted (TTL garbage collection or external deletion) before
// we detected its terminal condition. The container must have already
// exited for TTL to fire, so log streaming will have captured the output.
return { succeeded: false, timedOut: false, jobGone: true };
}
throw err;
}
const conditions = job.status?.conditions ?? []; const conditions = job.status?.conditions ?? [];
const complete = conditions.find((c) => c.type === "Complete" && c.status === "True"); const complete = conditions.find((c) => c.type === "Complete" && c.status === "True");
if (complete) if (complete)
@@ -261,10 +409,18 @@ export async function execute(ctx) {
// Guard: claude_k8s must not run concurrently for the same agent (shared PVC/session). // Guard: claude_k8s must not run concurrently for the same agent (shared PVC/session).
// After a server restart, orphaned K8s Jobs from previous (now-failed) runs may // After a server restart, orphaned K8s Jobs from previous (now-failed) runs may
// still be running. We detect those by comparing the Job's run-id label against // still be running. We detect those by comparing the Job's run-id label against
// the current runId and clean them up so this execution can proceed. // the current runId. When reattachOrphanedJobs is enabled and the orphan matches
// the current agent+task+session, we attach to it instead of deleting it (FAR-124).
const agentId = ctx.agent.id; const agentId = ctx.agent.id;
const selfPod = await getSelfPodInfo(kubeconfigPath); const selfPod = await getSelfPodInfo(kubeconfigPath);
const guardNamespace = asString(config.namespace, "") || selfPod.namespace; const guardNamespace = asString(config.namespace, "") || selfPod.namespace;
const reattachOrphanedJobs = asBoolean(config.reattachOrphanedJobs, true);
const runtimeSessionParams = parseObject(runtime.sessionParams);
const currentSessionIdRaw = asString(runtimeSessionParams.sessionId, runtime.sessionId ?? "");
const currentSessionLabel = currentSessionIdRaw ? sanitizeLabelValue(currentSessionIdRaw) : null;
const currentTaskIdRaw = asString(ctx.context.taskId, "") || asString(ctx.context.issueId, "");
const currentTaskLabel = currentTaskIdRaw ? sanitizeLabelValue(currentTaskIdRaw) : null;
let reattachTarget = null;
try { try {
const batchApi = getBatchApi(kubeconfigPath); const batchApi = getBatchApi(kubeconfigPath);
const existing = await batchApi.listNamespacedJob({ const existing = await batchApi.listNamespacedJob({
@@ -277,10 +433,37 @@ export async function execute(ctx) {
// concurrent jobs (same runId — shouldn't happen but guard defensively). // concurrent jobs (same runId — shouldn't happen but guard defensively).
const orphaned = running.filter((j) => (j.metadata?.labels?.["paperclip.io/run-id"] ?? "") !== runId); const orphaned = running.filter((j) => (j.metadata?.labels?.["paperclip.io/run-id"] ?? "") !== runId);
const samRun = running.filter((j) => (j.metadata?.labels?.["paperclip.io/run-id"] ?? "") === runId); const samRun = running.filter((j) => (j.metadata?.labels?.["paperclip.io/run-id"] ?? "") === runId);
if (orphaned.length > 0) { // Pick the most recent reattachable orphan — same agent + task + session,
const orphanNames = orphaned.map((j) => j.metadata?.name).join(", "); // not terminal. Only one target is chosen; any other orphans get
await onLog("stdout", `[paperclip] Cleaning up ${orphaned.length} orphaned K8s Job(s) from previous run(s): ${orphanNames}\n`); // cleaned up as before.
for (const j of orphaned) { if (reattachOrphanedJobs && orphaned.length > 0) {
const candidates = orphaned
.filter((j) => isReattachableOrphan(j, {
agentId,
taskId: currentTaskLabel,
sessionId: currentSessionLabel,
}))
.sort((a, b) => {
const at = new Date(a.metadata?.creationTimestamp ?? 0).getTime();
const bt = new Date(b.metadata?.creationTimestamp ?? 0).getTime();
return bt - at;
});
const chosen = candidates[0];
const chosenName = chosen?.metadata?.name;
if (chosen && chosenName) {
reattachTarget = {
jobName: chosenName,
namespace: chosen.metadata?.namespace ?? guardNamespace,
priorRunId: chosen.metadata?.labels?.["paperclip.io/run-id"] ?? "",
image: chosen.spec?.template?.spec?.containers?.[0]?.image ?? "unknown",
};
}
}
const toDelete = orphaned.filter((j) => !reattachTarget || j.metadata?.name !== reattachTarget.jobName);
if (toDelete.length > 0) {
const orphanNames = toDelete.map((j) => j.metadata?.name).join(", ");
await onLog("stdout", `[paperclip] Cleaning up ${toDelete.length} orphaned K8s Job(s) from previous run(s): ${orphanNames}\n`);
for (const j of toDelete) {
const name = j.metadata?.name; const name = j.metadata?.name;
if (name) { if (name) {
await cleanupJob(guardNamespace, name, onLog, kubeconfigPath); await cleanupJob(guardNamespace, name, onLog, kubeconfigPath);
@@ -317,81 +500,114 @@ export async function execute(ctx) {
errorCode: "k8s_concurrency_guard_unreachable", errorCode: "k8s_concurrency_guard_unreachable",
}; };
} }
// Build Job manifest
const { job, jobName, namespace, prompt, claudeArgs, promptMetrics, promptSecret } = buildJobManifest({
ctx,
selfPod,
});
// Report invocation metadata
if (onMeta) {
await onMeta({
adapterType: "claude_k8s",
command: `kubectl job/${jobName}`,
cwd: namespace,
commandArgs: claudeArgs,
commandNotes: [
`Image: ${job.spec?.template.spec?.containers[0]?.image ?? "unknown"}`,
`Namespace: ${namespace}`,
`Timeout: ${timeoutSec}s`,
],
prompt,
...(promptMetrics ? { promptMetrics } : {}),
context: ctx.context,
});
}
// If the prompt is large, create a Secret to hold it (avoids the ~1 MiB
// PodSpec limit). The Secret is cleaned up in the finally block.
const coreApi = getCoreApi(kubeconfigPath); const coreApi = getCoreApi(kubeconfigPath);
if (promptSecret) { const batchApi = getBatchApi(kubeconfigPath);
try { let jobName;
await coreApi.createNamespacedSecret({ let namespace;
namespace: promptSecret.namespace, let promptSecret = null;
body: { if (reattachTarget) {
apiVersion: "v1", jobName = reattachTarget.jobName;
kind: "Secret", namespace = reattachTarget.namespace;
metadata: { // Announce reattach metadata. Prompt and args aren't known here — they
name: promptSecret.name, // belong to the prior run that created this pod and are already present
namespace: promptSecret.namespace, // on the running container.
labels: { if (onMeta) {
"app.kubernetes.io/managed-by": "paperclip", await onMeta({
"paperclip.io/adapter-type": "claude_k8s", adapterType: "claude_k8s",
"paperclip.io/run-id": runId, command: `kubectl job/${jobName}`,
}, cwd: namespace,
}, commandArgs: [],
stringData: promptSecret.data, commandNotes: [
}, `Image: ${reattachTarget.image}`,
`Namespace: ${namespace}`,
`Reattached from prior run: ${reattachTarget.priorRunId || "unknown"}`,
`Timeout: ${timeoutSec}s`,
],
prompt: "",
context: ctx.context,
}); });
await onLog("stdout", `[paperclip] Created prompt Secret: ${promptSecret.name} (${Math.round(Buffer.byteLength(prompt, "utf-8") / 1024)} KiB)\n`); }
await onLog("stdout", `[paperclip] Reattaching to in-flight K8s Job ${jobName} in namespace ${namespace} (prior run ${reattachTarget.priorRunId || "unknown"})\n`);
}
else {
// Build Job manifest
const built = buildJobManifest({ ctx, selfPod });
const job = built.job;
jobName = built.jobName;
namespace = built.namespace;
const prompt = built.prompt;
const claudeArgs = built.claudeArgs;
const promptMetrics = built.promptMetrics;
promptSecret = built.promptSecret;
// Report invocation metadata
if (onMeta) {
await onMeta({
adapterType: "claude_k8s",
command: `kubectl job/${jobName}`,
cwd: namespace,
commandArgs: claudeArgs,
commandNotes: [
`Image: ${job.spec?.template.spec?.containers[0]?.image ?? "unknown"}`,
`Namespace: ${namespace}`,
`Timeout: ${timeoutSec}s`,
],
prompt,
...(promptMetrics ? { promptMetrics } : {}),
context: ctx.context,
});
}
// If the prompt is large, create a Secret to hold it (avoids the ~1 MiB
// PodSpec limit). The Secret is cleaned up in the finally block.
if (promptSecret) {
try {
await coreApi.createNamespacedSecret({
namespace: promptSecret.namespace,
body: {
apiVersion: "v1",
kind: "Secret",
metadata: {
name: promptSecret.name,
namespace: promptSecret.namespace,
labels: {
"app.kubernetes.io/managed-by": "paperclip",
"paperclip.io/adapter-type": "claude_k8s",
"paperclip.io/run-id": runId,
},
},
stringData: promptSecret.data,
},
});
await onLog("stdout", `[paperclip] Created prompt Secret: ${promptSecret.name} (${Math.round(Buffer.byteLength(prompt, "utf-8") / 1024)} KiB)\n`);
}
catch (err) {
const msg = err instanceof Error ? err.message : String(err);
await onLog("stderr", `[paperclip] Failed to create prompt Secret: ${msg}\n`);
return {
exitCode: null,
signal: null,
timedOut: false,
errorMessage: `Failed to create prompt Secret: ${msg}`,
errorCode: "k8s_prompt_secret_create_failed",
};
}
}
// Create the Job
try {
await batchApi.createNamespacedJob({ namespace, body: job });
} }
catch (err) { catch (err) {
const msg = err instanceof Error ? err.message : String(err); const msg = err instanceof Error ? err.message : String(err);
await onLog("stderr", `[paperclip] Failed to create prompt Secret: ${msg}\n`); await onLog("stderr", `[paperclip] Failed to create K8s Job: ${msg}\n`);
return { return {
exitCode: null, exitCode: null,
signal: null, signal: null,
timedOut: false, timedOut: false,
errorMessage: `Failed to create prompt Secret: ${msg}`, errorMessage: `Failed to create Kubernetes Job: ${msg}`,
errorCode: "k8s_prompt_secret_create_failed", errorCode: "k8s_job_create_failed",
}; };
} }
await onLog("stdout", `[paperclip] Created K8s Job: ${jobName} in namespace ${namespace} (deadline: ${timeoutSec > 0 ? `${timeoutSec}s` : "none"})\n`);
} }
// Create the Job
const batchApi = getBatchApi(kubeconfigPath);
try {
await batchApi.createNamespacedJob({ namespace, body: job });
}
catch (err) {
const msg = err instanceof Error ? err.message : String(err);
await onLog("stderr", `[paperclip] Failed to create K8s Job: ${msg}\n`);
return {
exitCode: null,
signal: null,
timedOut: false,
errorMessage: `Failed to create Kubernetes Job: ${msg}`,
errorCode: "k8s_job_create_failed",
};
}
await onLog("stdout", `[paperclip] Created K8s Job: ${jobName} in namespace ${namespace} (deadline: ${timeoutSec > 0 ? `${timeoutSec}s` : "none"})\n`);
let stdout = ""; let stdout = "";
let exitCode = null; let exitCode = null;
let jobTimedOut = false; let jobTimedOut = false;
@@ -404,8 +620,24 @@ export async function execute(ctx) {
const scheduleTimeoutMs = 120_000; // 2 minutes for scheduling const scheduleTimeoutMs = 120_000; // 2 minutes for scheduling
let podName; let podName;
try { try {
podName = await waitForPod(namespace, jobName, scheduleTimeoutMs, onLog, kubeconfigPath); if (reattachTarget) {
await onLog("stdout", `[paperclip] Pod running: ${podName}\n`); // Pod is already running from the prior run — look it up directly.
const podList = await coreApi.listNamespacedPod({
namespace,
labelSelector: `job-name=${jobName}`,
});
const pod = podList.items[0];
const name = pod?.metadata?.name;
if (!name) {
throw new Error(`Reattach target Job ${jobName} has no pod`);
}
podName = name;
await onLog("stdout", `[paperclip] Reattached to pod ${podName}\n`);
}
else {
podName = await waitForPod(namespace, jobName, scheduleTimeoutMs, onLog, kubeconfigPath);
await onLog("stdout", `[paperclip] Pod running: ${podName}\n`);
}
// Notify the server that execution has started. This sets // Notify the server that execution has started. This sets
// processStartedAt and refreshes updatedAt in the DB, which the // processStartedAt and refreshes updatedAt in the DB, which the
// stale-run reaper (reapOrphanedRuns) uses to decide liveness. // stale-run reaper (reapOrphanedRuns) uses to decide liveness.
@@ -419,13 +651,14 @@ export async function execute(ctx) {
} }
catch (err) { catch (err) {
const msg = err instanceof Error ? err.message : String(err); const msg = err instanceof Error ? err.message : String(err);
await onLog("stderr", `[paperclip] Pod scheduling failed: ${msg}\n`); const phase = reattachTarget ? "reattach" : "scheduling";
await onLog("stderr", `[paperclip] Pod ${phase} failed: ${msg}\n`);
return { return {
exitCode: null, exitCode: null,
signal: null, signal: null,
timedOut: false, timedOut: false,
errorMessage: `Pod scheduling failed: ${msg}`, errorMessage: `Pod ${phase} failed: ${msg}`,
errorCode: "k8s_pod_schedule_failed", errorCode: reattachTarget ? "k8s_pod_reattach_failed" : "k8s_pod_schedule_failed",
}; };
} }
// Stream logs and wait for completion concurrently. // Stream logs and wait for completion concurrently.
@@ -457,18 +690,32 @@ export async function execute(ctx) {
let lastLogAt = Date.now(); let lastLogAt = Date.now();
let keepaliveTick = 0; let keepaliveTick = 0;
let keepaliveJobTerminal = false; let keepaliveJobTerminal = false;
let keepaliveJobTerminalAt = null;
keepaliveTimer = setInterval(() => { keepaliveTimer = setInterval(() => {
// Fire-and-forget the async work; setInterval callbacks must be // Fire-and-forget the async work; setInterval callbacks must be
// synchronous or the timer will drift. // synchronous or the timer will drift.
void (async () => { void (async () => {
if (keepaliveJobTerminal) if (keepaliveJobTerminal) {
// Post-terminal window: keep refreshing onSpawn during cleanup
// (job deletion, log parsing, K8s API calls) so the reaper doesn't
// fire a false process_lost while execute() is still running.
if (ctx.onSpawn &&
keepaliveJobTerminalAt !== null &&
Date.now() - keepaliveJobTerminalAt <= POST_TERMINAL_KEEPALIVE_MS) {
keepaliveTick++;
if (keepaliveTick % 6 === 0) {
void ctx.onSpawn({ pid: process.pid, processGroupId: null, startedAt: new Date().toISOString() }).catch(() => { });
}
}
return; return;
}
// Verify the Job is still alive before announcing or refreshing. // Verify the Job is still alive before announcing or refreshing.
try { try {
const job = await batchApi.readNamespacedJob({ name: jobName, namespace }); const job = await batchApi.readNamespacedJob({ name: jobName, namespace });
const terminal = job.status?.conditions?.some((c) => (c.type === "Complete" || c.type === "Failed") && c.status === "True"); const terminal = job.status?.conditions?.some((c) => (c.type === "Complete" || c.type === "Failed") && c.status === "True");
if (terminal) { if (terminal) {
keepaliveJobTerminal = true; keepaliveJobTerminal = true;
keepaliveJobTerminalAt = Date.now();
return; return;
} }
} }
@@ -477,10 +724,9 @@ export async function execute(ctx) {
// connection resets should NOT permanently disable the keepalive — // connection resets should NOT permanently disable the keepalive —
// the next tick will re-check and the reaper uses the staleness // the next tick will re-check and the reaper uses the staleness
// window as a safety net. // window as a safety net.
const statusCode = err?.response?.statusCode if (isK8s404(err)) {
?? err?.statusCode;
if (statusCode === 404) {
keepaliveJobTerminal = true; keepaliveJobTerminal = true;
keepaliveJobTerminalAt = Date.now();
return; return;
} }
// Log transient errors but leave keepaliveJobTerminal false so // Log transient errors but leave keepaliveJobTerminal false so
@@ -525,23 +771,44 @@ export async function execute(ctx) {
if (logResult.status === "fulfilled") { if (logResult.status === "fulfilled") {
stdout = logResult.value; stdout = logResult.value;
} }
// If the follow stream missed output (container exited quickly), do a // One-shot log fallback: handles two failure modes with a single read.
// one-shot log read as fallback before the pod is cleaned up. // Mode 1 — empty stream: the follow stream returned nothing (fast exit before connection).
if (!stdout.trim()) { // Mode 2 — partial stream: we have some output but no result event (follow stream raced
await onLog("stdout", `[paperclip] Log stream returned empty — reading pod logs directly...\n`); // with container exit and captured only the init line before the connection dropped).
stdout = await readPodLogs(namespace, podName, kubeconfigPath); // A one-shot readPodLogs is more reliable for already-terminated containers and reads
if (stdout.trim()) { // from the beginning of the log, giving us the full output.
// We use a cheap string scan for the result-event guard (avoids a full JSON parse here;
// the authoritative parse happens once below after all fallbacks complete).
const hasResultEvent = stdout.includes('"type":"result"');
const needsOneShot = !stdout.trim() || (stdout.trim() && !hasResultEvent);
if (needsOneShot) {
if (!stdout.trim()) {
await onLog("stdout", `[paperclip] Log stream returned empty — reading pod logs directly...\n`);
}
const oneShotLogs = await readPodLogs(namespace, podName, kubeconfigPath);
if (!stdout.trim() && oneShotLogs.trim()) {
stdout = oneShotLogs;
await onLog("stdout", stdout); await onLog("stdout", stdout);
} }
else if (oneShotLogs && oneShotLogs.length > stdout.length) {
await onLog("stdout", `[paperclip] Log stream captured partial output — supplemental one-shot read returned more content.\n`);
stdout = oneShotLogs;
}
} }
if (completionResult.status === "fulfilled") { if (completionResult.status === "fulfilled") {
jobTimedOut = completionResult.value.timedOut; jobTimedOut = completionResult.value.timedOut;
if (completionResult.value.jobGone) {
// Job was deleted by TTL or externally before we observed the Complete/Failed
// condition. The container must have exited first (TTL only fires after
// completion), so log streaming has captured the full output — continue
// to stdout parsing rather than returning an error.
await onLog("stdout", `[paperclip] Job ${jobName} was deleted before terminal condition was observed (TTL or external deletion) — proceeding with captured output.\n`);
}
} }
else { else {
// waitForJobCompletion threw — re-check job state to avoid returning // waitForJobCompletion threw an unexpected error — re-check job state to
// while the job is still running (which would cause UI staleness and // avoid returning while the job is still running. Use a bounded timeout
// concurrency errors on retry). Use a bounded timeout (60s) so we // (60s) so we don't hang the heartbeat indefinitely if the K8s API is degraded.
// don't hang the heartbeat indefinitely if the K8s API is degraded.
jobTimedOut = false; jobTimedOut = false;
const RECHECK_TIMEOUT_MS = 60_000; const RECHECK_TIMEOUT_MS = 60_000;
const actualState = await waitForJobCompletion(namespace, jobName, RECHECK_TIMEOUT_MS, kubeconfigPath); const actualState = await waitForJobCompletion(namespace, jobName, RECHECK_TIMEOUT_MS, kubeconfigPath);
@@ -550,6 +817,11 @@ export async function execute(ctx) {
// Return an error so the UI knows the run is not done. // Return an error so the UI knows the run is not done.
jobTimedOut = true; jobTimedOut = true;
} }
else if (actualState.jobGone) {
// Job was deleted before we could confirm terminal state — same as the
// fulfilled+jobGone case above: proceed with captured output.
await onLog("stdout", `[paperclip] Job ${jobName} was deleted before terminal condition was observed (TTL or external deletion) — proceeding with captured output.\n`);
}
else if (!actualState.succeeded) { else if (!actualState.succeeded) {
// Job still not terminal — the completion error was likely transient. // Job still not terminal — the completion error was likely transient.
// Return an error so the UI knows the run is not done, rather than // Return an error so the UI knows the run is not done, rather than
@@ -615,16 +887,11 @@ export async function execute(ctx) {
}; };
} }
if (!parsed) { if (!parsed) {
const stderrLine = stdout.split(/\r?\n/).map((l) => l.trim()).find(Boolean) ?? "";
return { return {
exitCode, exitCode,
signal: null, signal: null,
timedOut: false, timedOut: false,
errorMessage: exitCode === 0 errorMessage: buildPartialRunError(exitCode, parsedStream.model, stdout),
? "Failed to parse Claude JSON output"
: stderrLine
? `Claude exited with code ${exitCode ?? -1}: ${stderrLine}`
: `Claude exited with code ${exitCode ?? -1}`,
resultJson: { stdout }, resultJson: { stdout },
}; };
} }
@@ -636,8 +903,7 @@ export async function execute(ctx) {
outputTokens: asNumber(usageObj.output_tokens, 0), outputTokens: asNumber(usageObj.output_tokens, 0),
}; };
})(); })();
const runtimeSessionParams = parseObject(runtime.sessionParams); const fallbackSessionId = currentSessionIdRaw;
const fallbackSessionId = asString(runtimeSessionParams.sessionId, runtime.sessionId ?? "");
const resolvedSessionId = parsedStream.sessionId const resolvedSessionId = parsedStream.sessionId
?? (asString(parsed.session_id, fallbackSessionId) || fallbackSessionId); ?? (asString(parsed.session_id, fallbackSessionId) || fallbackSessionId);
const model = asString(config.model, ""); const model = asString(config.model, "");
+1 -1
View File
File diff suppressed because one or more lines are too long
+20
View File
@@ -1,5 +1,19 @@
import type * as k8s from "@kubernetes/client-node"; import type * as k8s from "@kubernetes/client-node";
import type { AdapterExecutionContext } from "@paperclipai/adapter-utils"; import type { AdapterExecutionContext } from "@paperclipai/adapter-utils";
/**
* Build the shell command prefix that installs a native Node.js PostToolUse
* hook into Claude Code's settings. The hook truncates oversized tool outputs
* before they reach the model — replacing the RTK binary init-container
* approach with a self-contained Node.js implementation.
*
* Both scripts are base64-encoded so they can be embedded in a sh -c command
* string without any quoting or escaping issues.
*
* @param maxOutputBytes Byte threshold above which tool output is truncated.
* @returns A shell command string (suitable for "&&"-chaining
* before the claude invocation).
*/
export declare function buildRtkSetupCommands(maxOutputBytes: number): string;
import type { SelfPodInfo } from "./k8s-client.js"; import type { SelfPodInfo } from "./k8s-client.js";
export interface JobBuildInput { export interface JobBuildInput {
ctx: AdapterExecutionContext; ctx: AdapterExecutionContext;
@@ -24,5 +38,11 @@ export interface JobBuildResult {
* staged as a K8s Secret before creating the Job. */ * staged as a K8s Secret before creating the Job. */
promptSecret: PromptSecret | null; promptSecret: PromptSecret | null;
} }
/**
* Sanitize a string for use as a Kubernetes label value (RFC 1123 subset:
* `[a-zA-Z0-9]([-_.a-zA-Z0-9]*[a-zA-Z0-9])?`, max 63 chars). Returns `null`
* when no usable characters remain — the caller should omit the label.
*/
export declare function sanitizeLabelValue(value: string, maxLen?: number): string | null;
export declare function buildJobManifest(input: JobBuildInput): JobBuildResult; export declare function buildJobManifest(input: JobBuildInput): JobBuildResult;
//# sourceMappingURL=job-manifest.d.ts.map //# sourceMappingURL=job-manifest.d.ts.map
+1 -1
View File
@@ -1 +1 @@
{"version":3,"file":"job-manifest.d.ts","sourceRoot":"","sources":["../../src/server/job-manifest.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,KAAK,GAAG,MAAM,yBAAyB,CAAC;AACpD,OAAO,KAAK,EAAE,uBAAuB,EAAE,MAAM,4BAA4B,CAAC;AAgD1E,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAC;AA6CnD,MAAM,WAAW,aAAa;IAC5B,GAAG,EAAE,uBAAuB,CAAC;IAC7B,OAAO,EAAE,WAAW,CAAC;CACtB;AAED;;+EAE+E;AAC/E,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,MAAM,CAAC;IAClB,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CAC9B;AAED,MAAM,WAAW,cAAc;IAC7B,GAAG,EAAE,GAAG,CAAC,KAAK,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,MAAM,EAAE,CAAC;IACrB,aAAa,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACtC;0DACsD;IACtD,YAAY,EAAE,YAAY,GAAG,IAAI,CAAC;CACnC;AAuHD,wBAAgB,gBAAgB,CAAC,KAAK,EAAE,aAAa,GAAG,cAAc,CAkRrE"} {"version":3,"file":"job-manifest.d.ts","sourceRoot":"","sources":["../../src/server/job-manifest.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,KAAK,GAAG,MAAM,yBAAyB,CAAC;AACpD,OAAO,KAAK,EAAE,uBAAuB,EAAE,MAAM,4BAA4B,CAAC;AAY1E;;;;;;;;;;;;GAYG;AACH,wBAAgB,qBAAqB,CAAC,cAAc,EAAE,MAAM,GAAG,MAAM,CAiEpE;AAsCD,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAC;AA6CnD,MAAM,WAAW,aAAa;IAC5B,GAAG,EAAE,uBAAuB,CAAC;IAC7B,OAAO,EAAE,WAAW,CAAC;CACtB;AAED;;+EAE+E;AAC/E,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,MAAM,CAAC;IAClB,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CAC9B;AAED,MAAM,WAAW,cAAc;IAC7B,GAAG,EAAE,GAAG,CAAC,KAAK,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,MAAM,EAAE,CAAC;IACrB,aAAa,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACtC;0DACsD;IACtD,YAAY,EAAE,YAAY,GAAG,IAAI,CAAC;CACnC;AAMD;;;;GAIG;AACH,wBAAgB,kBAAkB,CAAC,KAAK,EAAE,MAAM,EAAE,MAAM,SAAK,GAAG,MAAM,GAAG,IAAI,CAI5E;AAmHD,wBAAgB,gBAAgB,CAAC,KAAK,EAAE,aAAa,GAAG,cAAc,CAmSrE"}
+106 -1
View File
@@ -1,5 +1,81 @@
import { asString, asNumber, asBoolean, asStringArray, parseObject, buildPaperclipEnv, renderTemplate, } from "@paperclipai/adapter-utils/server-utils"; import { asString, asNumber, asBoolean, asStringArray, parseObject, buildPaperclipEnv, renderTemplate, } from "@paperclipai/adapter-utils/server-utils";
import { createHash } from "node:crypto"; import { createHash } from "node:crypto";
/**
* Build the shell command prefix that installs a native Node.js PostToolUse
* hook into Claude Code's settings. The hook truncates oversized tool outputs
* before they reach the model — replacing the RTK binary init-container
* approach with a self-contained Node.js implementation.
*
* Both scripts are base64-encoded so they can be embedded in a sh -c command
* string without any quoting or escaping issues.
*
* @param maxOutputBytes Byte threshold above which tool output is truncated.
* @returns A shell command string (suitable for "&&"-chaining
* before the claude invocation).
*/
export function buildRtkSetupCommands(maxOutputBytes) {
// --- Filter script ----------------------------------------------------------
// This script runs as the PostToolUse hook inside every K8s Job pod.
// Claude Code writes the hook event as JSON to the script's stdin; the script
// truncates the tool_response/tool_result content when it exceeds the
// threshold and writes the (possibly modified) JSON to stdout.
//
// Field-name coverage:
// • tool_response — documented hook event format for PostToolUse
// • tool_result — alternative name seen in some Claude Code versions
// Content may be a plain string or an array of typed blocks (text/image/…).
const filterScript = [
`const c=[];`,
`process.stdin.on('data',d=>c.push(d));`,
`process.stdin.on('end',()=>{`,
`const raw=Buffer.concat(c).toString('utf-8');`,
`let o;try{o=JSON.parse(raw);}catch{process.stdout.write(raw);return;}`,
`const MAX=${maxOutputBytes};`,
`function trunc(s){`,
`if(typeof s!=='string')return s;`,
`const b=Buffer.from(s,'utf-8');`,
`if(b.length<=MAX)return s;`,
`return b.slice(0,MAX).toString('utf-8')+'\\n[...'+(b.length-MAX)+' bytes truncated by paperclip-rtk]';`,
`}`,
`const tr=o&&(o.tool_response||o.tool_result);`,
`if(tr){`,
`if(typeof tr.content==='string'){tr.content=trunc(tr.content);}`,
`else if(Array.isArray(tr.content)){`,
`tr.content=tr.content.map(function(b){`,
`if(b&&typeof b==='object'&&typeof b.text==='string'){`,
`return Object.assign({},b,{text:trunc(b.text)});`,
`}return b;`,
`});`,
`}`,
`}`,
`process.stdout.write(JSON.stringify(o));`,
`});`,
].join("");
// --- Settings script --------------------------------------------------------
// Reads the existing ~/.claude/settings.json (if any), merges in the RTK
// PostToolUse hook, and writes the file back. All other settings sections
// are preserved; only PostToolUse is replaced so we own the full hook list
// for this run.
const settingsScript = [
`const fs=require('fs'),pt=require('path');`,
`const p=pt.join(process.env.HOME,'.claude','settings.json');`,
`let s={};try{s=JSON.parse(fs.readFileSync(p,'utf-8'));}catch(e){}`,
`s.hooks=s.hooks||{};`,
`s.hooks.PostToolUse=[{matcher:'.*',hooks:[{type:'command',command:'node /tmp/.rtk-filter.js'}]}];`,
`fs.mkdirSync(pt.dirname(p),{recursive:true});`,
`fs.writeFileSync(p,JSON.stringify(s));`,
].join("");
// Encode as base64 so the strings can be embedded directly in a shell command
// without any quoting concerns (base64 alphabet: A-Za-z0-9+/=).
const filterB64 = Buffer.from(filterScript, "utf-8").toString("base64");
const settingsB64 = Buffer.from(settingsScript, "utf-8").toString("base64");
return [
// Write the filter script
`node -e "require('fs').writeFileSync('/tmp/.rtk-filter.js',Buffer.from('${filterB64}','base64').toString('utf-8'))"`,
// Install the Claude Code PostToolUse hook (merge into existing settings)
`node -e "eval(Buffer.from('${settingsB64}','base64').toString('utf-8'))"`,
].join(" && ");
}
/** Prompts above this size (bytes) are staged via a Secret instead of an /** Prompts above this size (bytes) are staged via a Secret instead of an
* init container env var, protecting against the ~1 MiB PodSpec limit. */ * init container env var, protecting against the ~1 MiB PodSpec limit. */
const LARGE_PROMPT_THRESHOLD_BYTES = 256 * 1024; const LARGE_PROMPT_THRESHOLD_BYTES = 256 * 1024;
@@ -91,6 +167,16 @@ function parseKeyValueConfig(raw) {
function sanitizeForK8sName(value, maxLen = 16) { function sanitizeForK8sName(value, maxLen = 16) {
return value.toLowerCase().replace(/[^a-z0-9-]/g, "").slice(0, maxLen); return value.toLowerCase().replace(/[^a-z0-9-]/g, "").slice(0, maxLen);
} }
/**
* Sanitize a string for use as a Kubernetes label value (RFC 1123 subset:
* `[a-zA-Z0-9]([-_.a-zA-Z0-9]*[a-zA-Z0-9])?`, max 63 chars). Returns `null`
* when no usable characters remain — the caller should omit the label.
*/
export function sanitizeLabelValue(value, maxLen = 63) {
const cleaned = value.replace(/[^a-zA-Z0-9._-]/g, "").slice(0, maxLen);
const trimmed = cleaned.replace(/^[^a-zA-Z0-9]+/, "").replace(/[^a-zA-Z0-9]+$/, "");
return trimmed.length > 0 ? trimmed : null;
}
/** /**
* Build a short deterministic hash suffix from the raw inputs to avoid * Build a short deterministic hash suffix from the raw inputs to avoid
* collisions when sanitized slugs happen to be identical. * collisions when sanitized slugs happen to be identical.
@@ -202,6 +288,8 @@ export function buildJobManifest(input) {
const nodeSelector = parseKeyValueConfig(config.nodeSelector); const nodeSelector = parseKeyValueConfig(config.nodeSelector);
const tolerations = Array.isArray(config.tolerations) ? config.tolerations : []; const tolerations = Array.isArray(config.tolerations) ? config.tolerations : [];
const extraLabels = parseKeyValueConfig(config.labels); const extraLabels = parseKeyValueConfig(config.labels);
const enableRtk = asBoolean(config.enableRtk, false);
const rtkMaxOutputBytes = asNumber(config.rtkMaxOutputBytes, 50000);
// Resolve working directory — use workspace cwd, fall back to /paperclip // Resolve working directory — use workspace cwd, fall back to /paperclip
const workspaceContext = parseObject(context.paperclipWorkspace); const workspaceContext = parseObject(context.paperclipWorkspace);
const workspaceCwd = asString(workspaceContext.cwd, ""); const workspaceCwd = asString(workspaceContext.cwd, "");
@@ -289,6 +377,17 @@ export function buildJobManifest(input) {
"paperclip.io/company-id": agent.companyId, "paperclip.io/company-id": agent.companyId,
"paperclip.io/adapter-type": "claude_k8s", "paperclip.io/adapter-type": "claude_k8s",
}; };
// Reattach-target labels: let a future execute() identify this Job as the
// continuation of the same logical unit of work (same task + same resume
// session) so it can attach to the running pod across a Paperclip restart
// instead of deleting it and starting over (FAR-124).
const taskIdRaw = asString(context.taskId, "") || asString(context.issueId, "");
const taskLabel = taskIdRaw ? sanitizeLabelValue(taskIdRaw) : null;
if (taskLabel)
labels["paperclip.io/task-id"] = taskLabel;
const sessionLabel = runtimeSessionId ? sanitizeLabelValue(runtimeSessionId) : null;
if (sessionLabel)
labels["paperclip.io/session-id"] = sessionLabel;
for (const [key, value] of Object.entries(extraLabels)) { for (const [key, value] of Object.entries(extraLabels)) {
labels[key] = value; labels[key] = value;
} }
@@ -345,7 +444,13 @@ export function buildJobManifest(input) {
}; };
// Build the claude command string for the main container // Build the claude command string for the main container
const claudeArgsEscaped = claudeArgs.map((a) => `'${a.replace(/'/g, "'\\''")}'`).join(" "); const claudeArgsEscaped = claudeArgs.map((a) => `'${a.replace(/'/g, "'\\''")}'`).join(" ");
const mainCommand = `cat /tmp/prompt/prompt.txt | claude ${claudeArgsEscaped}`; const claudeInvocation = `cat /tmp/prompt/prompt.txt | claude ${claudeArgsEscaped}`;
// When RTK output filtering is enabled, prepend the Node.js hook setup.
// This writes a filter script and a Claude Code settings file that installs
// it as a PostToolUse hook — no external binary or init container required.
const mainCommand = enableRtk
? `${buildRtkSetupCommands(rtkMaxOutputBytes)} && ${claudeInvocation}`
: claudeInvocation;
// Decide prompt delivery strategy: env var (small) or Secret volume (large). // Decide prompt delivery strategy: env var (small) or Secret volume (large).
const promptBytes = Buffer.byteLength(prompt, "utf-8"); const promptBytes = Buffer.byteLength(prompt, "utf-8");
const useLargePromptPath = promptBytes > LARGE_PROMPT_THRESHOLD_BYTES; const useLargePromptPath = promptBytes > LARGE_PROMPT_THRESHOLD_BYTES;
+1 -1
View File
File diff suppressed because one or more lines are too long
+2 -2
View File
@@ -1,12 +1,12 @@
{ {
"name": "paperclip-adapter-claude-k8s", "name": "paperclip-adapter-claude-k8s",
"version": "0.1.31", "version": "0.1.32",
"lockfileVersion": 3, "lockfileVersion": 3,
"requires": true, "requires": true,
"packages": { "packages": {
"": { "": {
"name": "paperclip-adapter-claude-k8s", "name": "paperclip-adapter-claude-k8s",
"version": "0.1.31", "version": "0.1.32",
"license": "MIT", "license": "MIT",
"dependencies": { "dependencies": {
"@kubernetes/client-node": "^1.0.0", "@kubernetes/client-node": "^1.0.0",