Compare commits
20 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| e86b14a677 | |||
| 98f3821f91 | |||
| 21a02da00f | |||
| 346f5cc1df | |||
| ef73586a41 | |||
| 9f79efdf36 | |||
| 4210f51937 | |||
| f41ae818ef | |||
| baf7e2d44d | |||
| 77ed2004f8 | |||
| 69d0f4972f | |||
| c7706d742f | |||
| 8937fb2804 | |||
| 77e9aa9b37 | |||
| 683ea2d8b1 | |||
| dd859c74a8 | |||
| b3c1519cf5 | |||
| 78fd702ccb | |||
| 0bc1bb1dd1 | |||
| c8968598e4 |
@@ -29,24 +29,21 @@ jobs:
|
|||||||
needs: test
|
needs: test
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
if: github.ref == 'refs/heads/master' && github.event_name == 'push'
|
if: github.ref == 'refs/heads/master' && github.event_name == 'push'
|
||||||
|
permissions:
|
||||||
|
id-token: write
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
- uses: actions/setup-node@v4
|
- uses: actions/setup-node@v4
|
||||||
with:
|
with:
|
||||||
node-version: "22"
|
node-version: "22"
|
||||||
|
registry-url: "https://registry.npmjs.org"
|
||||||
cache: "npm"
|
cache: "npm"
|
||||||
|
|
||||||
- run: npm ci
|
- run: npm ci
|
||||||
|
|
||||||
- run: npm run build
|
- run: npm run build
|
||||||
|
|
||||||
- uses: actions/setup-node@v4
|
|
||||||
with:
|
|
||||||
node-version: "22"
|
|
||||||
registry-url: "https://registry.npmjs.org"
|
|
||||||
cache: "npm"
|
|
||||||
|
|
||||||
- name: Publish (skip if version already exists)
|
- name: Publish (skip if version already exists)
|
||||||
run: |
|
run: |
|
||||||
PKG_NAME=$(node -p "require('./package.json').name")
|
PKG_NAME=$(node -p "require('./package.json').name")
|
||||||
@@ -54,7 +51,7 @@ jobs:
|
|||||||
if npm view "${PKG_NAME}@${PKG_VERSION}" version 2>/dev/null; then
|
if npm view "${PKG_NAME}@${PKG_VERSION}" version 2>/dev/null; then
|
||||||
echo "Version ${PKG_VERSION} already published — skipping."
|
echo "Version ${PKG_VERSION} already published — skipping."
|
||||||
else
|
else
|
||||||
npm publish --access public
|
npm publish --provenance --access public
|
||||||
fi
|
fi
|
||||||
env:
|
env:
|
||||||
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
|
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
|
||||||
|
|||||||
Vendored
+33
@@ -1,3 +1,36 @@
|
|||||||
import type { AdapterExecutionContext, AdapterExecutionResult } from "@paperclipai/adapter-utils";
|
import type { AdapterExecutionContext, AdapterExecutionResult } from "@paperclipai/adapter-utils";
|
||||||
|
import type * as k8s from "@kubernetes/client-node";
|
||||||
|
/**
|
||||||
|
* Detect a Kubernetes 404 (Not Found) error from @kubernetes/client-node.
|
||||||
|
* Works for both v0.x (response.statusCode) and v1.0+ (response.status, message).
|
||||||
|
* Exported for unit tests.
|
||||||
|
*/
|
||||||
|
export declare function isK8s404(err: unknown): boolean;
|
||||||
|
/**
|
||||||
|
* Build the error message when Claude's stdout contains no result event.
|
||||||
|
* Skips system/init event lines so the UI doesn't display the raw init JSON.
|
||||||
|
* Exported for unit tests.
|
||||||
|
*/
|
||||||
|
export declare function buildPartialRunError(exitCode: number | null, model: string, stdout: string): string;
|
||||||
|
/**
|
||||||
|
* Evaluate an orphaned K8s Job (one whose `paperclip.io/run-id` label does
|
||||||
|
* not match the current runId) as a potential reattach target. A Job is
|
||||||
|
* reattachable when it belongs to the same agent, same task, and same resume
|
||||||
|
* session as the current run — meaning the previous Paperclip instance was
|
||||||
|
* mid-stream on the exact piece of work this new run was dispatched to do.
|
||||||
|
* Exported for unit tests.
|
||||||
|
*/
|
||||||
|
export declare function isReattachableOrphan(job: k8s.V1Job, expected: {
|
||||||
|
agentId: string;
|
||||||
|
taskId: string | null;
|
||||||
|
sessionId: string | null;
|
||||||
|
}): boolean;
|
||||||
|
/**
|
||||||
|
* Build an error message for a pod that reached phase=Failed before or
|
||||||
|
* instead of streaming logs. Includes the claude container's terminated exit
|
||||||
|
* code and reason when available so operators can diagnose crashes without
|
||||||
|
* needing kubectl. Exported for unit tests.
|
||||||
|
*/
|
||||||
|
export declare function describePodTerminatedError(podName: string, phase: string, containerStatuses: k8s.V1ContainerStatus[]): string;
|
||||||
export declare function execute(ctx: AdapterExecutionContext): Promise<AdapterExecutionResult>;
|
export declare function execute(ctx: AdapterExecutionContext): Promise<AdapterExecutionResult>;
|
||||||
//# sourceMappingURL=execute.d.ts.map
|
//# sourceMappingURL=execute.d.ts.map
|
||||||
Vendored
+1
-1
@@ -1 +1 @@
|
|||||||
{"version":3,"file":"execute.d.ts","sourceRoot":"","sources":["../../src/server/execute.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,uBAAuB,EAAE,sBAAsB,EAAE,MAAM,4BAA4B,CAAC;AAiUlG,wBAAsB,OAAO,CAAC,GAAG,EAAE,uBAAuB,GAAG,OAAO,CAAC,sBAAsB,CAAC,CAoc3F"}
|
{"version":3,"file":"execute.d.ts","sourceRoot":"","sources":["../../src/server/execute.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,uBAAuB,EAAE,sBAAsB,EAAE,MAAM,4BAA4B,CAAC;AAWlG,OAAO,KAAK,KAAK,GAAG,MAAM,yBAAyB,CAAC;AAYpD;;;;GAIG;AACH,wBAAgB,QAAQ,CAAC,GAAG,EAAE,OAAO,GAAG,OAAO,CAO9C;AAED;;;;GAIG;AACH,wBAAgB,oBAAoB,CAClC,QAAQ,EAAE,MAAM,GAAG,IAAI,EACvB,KAAK,EAAE,MAAM,EACb,MAAM,EAAE,MAAM,GACb,MAAM,CA4BR;AAED;;;;;;;GAOG;AACH,wBAAgB,oBAAoB,CAClC,GAAG,EAAE,GAAG,CAAC,KAAK,EACd,QAAQ,EAAE;IAAE,OAAO,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IAAC,SAAS,EAAE,MAAM,GAAG,IAAI,CAAA;CAAE,GAC7E,OAAO,CAaT;AAED;;;;;GAKG;AACH,wBAAgB,0BAA0B,CACxC,OAAO,EAAE,MAAM,EACf,KAAK,EAAE,MAAM,EACb,iBAAiB,EAAE,GAAG,CAAC,iBAAiB,EAAE,GACzC,MAAM,CASR;AAkWD,wBAAsB,OAAO,CAAC,GAAG,EAAE,uBAAuB,GAAG,OAAO,CAAC,sBAAsB,CAAC,CAkkB3F"}
|
||||||
Vendored
+370
-104
@@ -1,12 +1,110 @@
|
|||||||
import { asString, asNumber, asBoolean, parseObject } from "@paperclipai/adapter-utils/server-utils";
|
import { asString, asNumber, asBoolean, parseObject } from "@paperclipai/adapter-utils/server-utils";
|
||||||
import { parseClaudeStreamJson, describeClaudeFailure, isClaudeMaxTurnsResult, isClaudeUnknownSessionError, } from "./parse.js";
|
import { parseClaudeStreamJson, describeClaudeFailure, isClaudeMaxTurnsResult, isClaudeUnknownSessionError, } from "./parse.js";
|
||||||
import { getSelfPodInfo, getBatchApi, getCoreApi, getLogApi } from "./k8s-client.js";
|
import { getSelfPodInfo, getBatchApi, getCoreApi, getLogApi } from "./k8s-client.js";
|
||||||
import { buildJobManifest } from "./job-manifest.js";
|
import { buildJobManifest, sanitizeLabelValue } from "./job-manifest.js";
|
||||||
|
import { LogLineDedupFilter } from "./log-dedup.js";
|
||||||
import { Writable } from "node:stream";
|
import { Writable } from "node:stream";
|
||||||
const POLL_INTERVAL_MS = 2000;
|
const POLL_INTERVAL_MS = 2000;
|
||||||
const KEEPALIVE_INTERVAL_MS = 15_000;
|
const KEEPALIVE_INTERVAL_MS = 15_000;
|
||||||
const LOG_STREAM_RECONNECT_DELAY_MS = 3_000;
|
const LOG_STREAM_RECONNECT_DELAY_MS = 3_000;
|
||||||
const MAX_LOG_RECONNECT_ATTEMPTS = 50;
|
const MAX_LOG_RECONNECT_ATTEMPTS = 50;
|
||||||
|
// How long to keep refreshing onSpawn after the Job reaches a terminal state.
|
||||||
|
// Covers the cleanup path (delete job, parse stdout) so a slow K8s API call
|
||||||
|
// doesn't trip the 5-minute reaper staleness window.
|
||||||
|
const POST_TERMINAL_KEEPALIVE_MS = 90_000;
|
||||||
|
/**
|
||||||
|
* Detect a Kubernetes 404 (Not Found) error from @kubernetes/client-node.
|
||||||
|
* Works for both v0.x (response.statusCode) and v1.0+ (response.status, message).
|
||||||
|
* Exported for unit tests.
|
||||||
|
*/
|
||||||
|
export function isK8s404(err) {
|
||||||
|
if (!(err instanceof Error))
|
||||||
|
return false;
|
||||||
|
const e = err;
|
||||||
|
const resp = e.response;
|
||||||
|
if (resp?.statusCode === 404 || resp?.status === 404)
|
||||||
|
return true;
|
||||||
|
if (e.statusCode === 404)
|
||||||
|
return true;
|
||||||
|
return /HTTP-Code:\s*404\b/.test(err.message);
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Build the error message when Claude's stdout contains no result event.
|
||||||
|
* Skips system/init event lines so the UI doesn't display the raw init JSON.
|
||||||
|
* Exported for unit tests.
|
||||||
|
*/
|
||||||
|
export function buildPartialRunError(exitCode, model, stdout) {
|
||||||
|
if (exitCode === 0)
|
||||||
|
return "Failed to parse Claude JSON output";
|
||||||
|
// Walk stdout lines, skip system events, return the first real content line.
|
||||||
|
const firstContentLine = stdout.split(/\r?\n/)
|
||||||
|
.map((l) => l.trim())
|
||||||
|
.find((l) => {
|
||||||
|
if (!l)
|
||||||
|
return false;
|
||||||
|
try {
|
||||||
|
const obj = JSON.parse(l);
|
||||||
|
if (typeof obj === "object" && obj !== null && obj.type === "system")
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
catch {
|
||||||
|
// not JSON — treat as content
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}) ?? "";
|
||||||
|
// If we only have system/init events and nothing else, surface the model
|
||||||
|
// name so the operator can diagnose missing credentials or unsupported model.
|
||||||
|
const initOnlyOutput = stdout.trim() !== "" && model !== "" && !firstContentLine;
|
||||||
|
if (initOnlyOutput) {
|
||||||
|
const modelHint = model ? ` (model: ${model})` : "";
|
||||||
|
return `Claude started but did not produce a result${modelHint} — check API credentials, model support, and adapter config`;
|
||||||
|
}
|
||||||
|
return firstContentLine
|
||||||
|
? `Claude exited with code ${exitCode ?? -1}: ${firstContentLine}`
|
||||||
|
: `Claude exited with code ${exitCode ?? -1}`;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Evaluate an orphaned K8s Job (one whose `paperclip.io/run-id` label does
|
||||||
|
* not match the current runId) as a potential reattach target. A Job is
|
||||||
|
* reattachable when it belongs to the same agent, same task, and same resume
|
||||||
|
* session as the current run — meaning the previous Paperclip instance was
|
||||||
|
* mid-stream on the exact piece of work this new run was dispatched to do.
|
||||||
|
* Exported for unit tests.
|
||||||
|
*/
|
||||||
|
export function isReattachableOrphan(job, expected) {
|
||||||
|
if (!expected.taskId || !expected.sessionId)
|
||||||
|
return false;
|
||||||
|
const labels = job.metadata?.labels ?? {};
|
||||||
|
if (labels["paperclip.io/adapter-type"] !== "claude_k8s")
|
||||||
|
return false;
|
||||||
|
if (labels["paperclip.io/agent-id"] !== expected.agentId)
|
||||||
|
return false;
|
||||||
|
if (labels["paperclip.io/task-id"] !== expected.taskId)
|
||||||
|
return false;
|
||||||
|
if (labels["paperclip.io/session-id"] !== expected.sessionId)
|
||||||
|
return false;
|
||||||
|
const conditions = job.status?.conditions ?? [];
|
||||||
|
const terminal = conditions.some((c) => (c.type === "Complete" || c.type === "Failed") && c.status === "True");
|
||||||
|
if (terminal)
|
||||||
|
return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Build an error message for a pod that reached phase=Failed before or
|
||||||
|
* instead of streaming logs. Includes the claude container's terminated exit
|
||||||
|
* code and reason when available so operators can diagnose crashes without
|
||||||
|
* needing kubectl. Exported for unit tests.
|
||||||
|
*/
|
||||||
|
export function describePodTerminatedError(podName, phase, containerStatuses) {
|
||||||
|
const mainCs = containerStatuses.find((cs) => cs.name === "claude");
|
||||||
|
const terminated = mainCs?.state?.terminated;
|
||||||
|
if (terminated) {
|
||||||
|
const code = terminated.exitCode ?? "unknown";
|
||||||
|
const reason = terminated.reason ?? terminated.message ?? "no reason";
|
||||||
|
return `Pod ${podName} reached phase=${phase}: claude exited ${code} (${reason})`;
|
||||||
|
}
|
||||||
|
return `Pod ${podName} reached phase=${phase}`;
|
||||||
|
}
|
||||||
/**
|
/**
|
||||||
* Wait for the Job's pod to reach a terminal or running state.
|
* Wait for the Job's pod to reach a terminal or running state.
|
||||||
* Returns the pod name once logs can be streamed, or throws on failure.
|
* Returns the pod name once logs can be streamed, or throws on failure.
|
||||||
@@ -52,14 +150,22 @@ async function waitForPod(namespace, jobName, timeoutMs, onLog, kubeconfigPath)
|
|||||||
details.push(`${cs.name}: waiting (${cs.state.waiting.reason ?? "unknown"})`);
|
details.push(`${cs.name}: waiting (${cs.state.waiting.reason ?? "unknown"})`);
|
||||||
else if (cs.state?.running)
|
else if (cs.state?.running)
|
||||||
details.push(`${cs.name}: running`);
|
details.push(`${cs.name}: running`);
|
||||||
|
else if (cs.state?.terminated)
|
||||||
|
details.push(`${cs.name}: terminated (exit ${cs.state.terminated.exitCode ?? "?"}, ${cs.state.terminated.reason ?? "no reason"})`);
|
||||||
}
|
}
|
||||||
await onLog("stdout", `[paperclip] Pod ${podName}: ${details.join(", ")}\n`);
|
await onLog("stdout", `[paperclip] Pod ${podName}: ${details.join(", ")}\n`);
|
||||||
lastStatus = statusKey;
|
lastStatus = statusKey;
|
||||||
}
|
}
|
||||||
// Ready to stream logs
|
// Ready to stream logs
|
||||||
if (phase === "Running" || phase === "Succeeded" || phase === "Failed") {
|
if (phase === "Running" || phase === "Succeeded") {
|
||||||
return podName;
|
return podName;
|
||||||
}
|
}
|
||||||
|
// phase=Failed means the pod crashed before we could stream logs.
|
||||||
|
// Throwing here routes the caller into the error path with a structured
|
||||||
|
// message instead of entering the log-streaming path with a dead pod.
|
||||||
|
if (phase === "Failed") {
|
||||||
|
throw new Error(describePodTerminatedError(podName, phase, containerStatuses));
|
||||||
|
}
|
||||||
// Init containers done + main running (phase may still say Pending briefly)
|
// Init containers done + main running (phase may still say Pending briefly)
|
||||||
const allInitsDone = initStatuses.length > 0 && initStatuses.every((s) => s.state?.terminated?.exitCode === 0);
|
const allInitsDone = initStatuses.length > 0 && initStatuses.every((s) => s.state?.terminated?.exitCode === 0);
|
||||||
const mainRunning = containerStatuses.some((s) => s.state?.running);
|
const mainRunning = containerStatuses.some((s) => s.state?.running);
|
||||||
@@ -104,16 +210,32 @@ async function waitForPod(namespace, jobName, timeoutMs, onLog, kubeconfigPath)
|
|||||||
* Stream pod logs once via follow. Returns accumulated stdout when the
|
* Stream pod logs once via follow. Returns accumulated stdout when the
|
||||||
* stream ends (container exit, API disconnect, or abort signal).
|
* stream ends (container exit, API disconnect, or abort signal).
|
||||||
*/
|
*/
|
||||||
async function streamPodLogsOnce(namespace, podName, onLog, kubeconfigPath, sinceSeconds) {
|
async function streamPodLogsOnce(namespace, podName, onLog, kubeconfigPath, sinceSeconds, dedup, stopSignal) {
|
||||||
const logApi = getLogApi(kubeconfigPath);
|
const logApi = getLogApi(kubeconfigPath);
|
||||||
const chunks = [];
|
const chunks = [];
|
||||||
const writable = new Writable({
|
const writable = new Writable({
|
||||||
write(chunk, _encoding, callback) {
|
write(chunk, _encoding, callback) {
|
||||||
const text = chunk.toString("utf-8");
|
const text = chunk.toString("utf-8");
|
||||||
chunks.push(text);
|
chunks.push(text);
|
||||||
void onLog("stdout", text).then(() => callback(), callback);
|
const emitted = dedup ? dedup.filter(text) : text;
|
||||||
|
if (!emitted) {
|
||||||
|
callback();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
void onLog("stdout", emitted).then(() => callback(), callback);
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
// When the job completion signal fires, destroy the writable to abort the
|
||||||
|
// in-flight follow stream. Without this, logApi.log can hang indefinitely
|
||||||
|
// when the pod terminates without closing the HTTP connection cleanly.
|
||||||
|
let stopPoller = null;
|
||||||
|
if (stopSignal) {
|
||||||
|
stopPoller = setInterval(() => {
|
||||||
|
if (stopSignal.stopped && !writable.destroyed) {
|
||||||
|
writable.destroy();
|
||||||
|
}
|
||||||
|
}, 200);
|
||||||
|
}
|
||||||
try {
|
try {
|
||||||
await logApi.log(namespace, podName, "claude", writable, {
|
await logApi.log(namespace, podName, "claude", writable, {
|
||||||
follow: true,
|
follow: true,
|
||||||
@@ -122,8 +244,12 @@ async function streamPodLogsOnce(namespace, podName, onLog, kubeconfigPath, sinc
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
catch {
|
catch {
|
||||||
// follow may fail if the container already exited or the API
|
// follow may fail if the container already exited, the API connection
|
||||||
// connection dropped — not fatal, caller decides whether to retry.
|
// dropped, or we aborted via writable.destroy() — not fatal.
|
||||||
|
}
|
||||||
|
finally {
|
||||||
|
if (stopPoller)
|
||||||
|
clearInterval(stopPoller);
|
||||||
}
|
}
|
||||||
return chunks.join("");
|
return chunks.join("");
|
||||||
}
|
}
|
||||||
@@ -143,6 +269,9 @@ async function streamPodLogs(namespace, podName, onLog, kubeconfigPath, stopSign
|
|||||||
// reconnects use a tight window instead of an ever-growing one anchored
|
// reconnects use a tight window instead of an ever-growing one anchored
|
||||||
// at stream start. This is the primary fix for FAR-105 duplicative logs.
|
// at stream start. This is the primary fix for FAR-105 duplicative logs.
|
||||||
let lastLogReceivedAt = Math.floor(Date.now() / 1000);
|
let lastLogReceivedAt = Math.floor(Date.now() / 1000);
|
||||||
|
// Shared across reconnects so replayed lines inside the `sinceSeconds`
|
||||||
|
// overlap window are dropped before they reach the streaming UI (FAR-123).
|
||||||
|
const dedup = new LogLineDedupFilter();
|
||||||
while (!stopSignal?.stopped) {
|
while (!stopSignal?.stopped) {
|
||||||
if (attempt >= MAX_LOG_RECONNECT_ATTEMPTS) {
|
if (attempt >= MAX_LOG_RECONNECT_ATTEMPTS) {
|
||||||
await onLog("stderr", `[paperclip] Log stream: max reconnect attempts (${MAX_LOG_RECONNECT_ATTEMPTS}) reached — giving up.\n`);
|
await onLog("stderr", `[paperclip] Log stream: max reconnect attempts (${MAX_LOG_RECONNECT_ATTEMPTS}) reached — giving up.\n`);
|
||||||
@@ -158,7 +287,7 @@ async function streamPodLogs(namespace, podName, onLog, kubeconfigPath, stopSign
|
|||||||
await onLog("stdout", `[paperclip] Log stream disconnected — reconnecting (attempt ${attempt}/${MAX_LOG_RECONNECT_ATTEMPTS})...\n`);
|
await onLog("stdout", `[paperclip] Log stream disconnected — reconnecting (attempt ${attempt}/${MAX_LOG_RECONNECT_ATTEMPTS})...\n`);
|
||||||
}
|
}
|
||||||
const preStreamTs = Math.floor(Date.now() / 1000);
|
const preStreamTs = Math.floor(Date.now() / 1000);
|
||||||
const result = await streamPodLogsOnce(namespace, podName, onLog, kubeconfigPath, sinceSeconds);
|
const result = await streamPodLogsOnce(namespace, podName, onLog, kubeconfigPath, sinceSeconds, dedup, stopSignal);
|
||||||
if (result) {
|
if (result) {
|
||||||
allChunks.push(result);
|
allChunks.push(result);
|
||||||
// Update last-received timestamp to now (the stream just ended,
|
// Update last-received timestamp to now (the stream just ended,
|
||||||
@@ -177,6 +306,11 @@ async function streamPodLogs(namespace, podName, onLog, kubeconfigPath, stopSign
|
|||||||
// Brief pause before reconnecting to avoid tight loops.
|
// Brief pause before reconnecting to avoid tight loops.
|
||||||
await new Promise((resolve) => setTimeout(resolve, LOG_STREAM_RECONNECT_DELAY_MS));
|
await new Promise((resolve) => setTimeout(resolve, LOG_STREAM_RECONNECT_DELAY_MS));
|
||||||
}
|
}
|
||||||
|
// Flush any buffered partial line so the final assistant/result chunk
|
||||||
|
// isn't dropped when the stream ends mid-line.
|
||||||
|
const tail = dedup.flush();
|
||||||
|
if (tail)
|
||||||
|
await onLog("stdout", tail);
|
||||||
return allChunks.join("");
|
return allChunks.join("");
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
@@ -199,13 +333,27 @@ async function readPodLogs(namespace, podName, kubeconfigPath) {
|
|||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Wait for the Job to reach a terminal state (Complete or Failed).
|
* Wait for the Job to reach a terminal state (Complete or Failed).
|
||||||
* Returns the Job's final status.
|
* Returns the Job's final status. A 404 (job deleted by TTL or externally)
|
||||||
|
* is treated as a soft terminal: succeeded=false, timedOut=false, jobGone=true.
|
||||||
|
* The caller should log this and fall through to stdout parsing.
|
||||||
*/
|
*/
|
||||||
async function waitForJobCompletion(namespace, jobName, timeoutMs, kubeconfigPath) {
|
async function waitForJobCompletion(namespace, jobName, timeoutMs, kubeconfigPath) {
|
||||||
const batchApi = getBatchApi(kubeconfigPath);
|
const batchApi = getBatchApi(kubeconfigPath);
|
||||||
const deadline = timeoutMs > 0 ? Date.now() + timeoutMs : 0;
|
const deadline = timeoutMs > 0 ? Date.now() + timeoutMs : 0;
|
||||||
while (deadline === 0 || Date.now() < deadline) {
|
while (deadline === 0 || Date.now() < deadline) {
|
||||||
const job = await batchApi.readNamespacedJob({ name: jobName, namespace });
|
let job;
|
||||||
|
try {
|
||||||
|
job = await batchApi.readNamespacedJob({ name: jobName, namespace });
|
||||||
|
}
|
||||||
|
catch (err) {
|
||||||
|
if (isK8s404(err)) {
|
||||||
|
// Job was deleted (TTL garbage collection or external deletion) before
|
||||||
|
// we detected its terminal condition. The container must have already
|
||||||
|
// exited for TTL to fire, so log streaming will have captured the output.
|
||||||
|
return { succeeded: false, timedOut: false, jobGone: true };
|
||||||
|
}
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
const conditions = job.status?.conditions ?? [];
|
const conditions = job.status?.conditions ?? [];
|
||||||
const complete = conditions.find((c) => c.type === "Complete" && c.status === "True");
|
const complete = conditions.find((c) => c.type === "Complete" && c.status === "True");
|
||||||
if (complete)
|
if (complete)
|
||||||
@@ -261,10 +409,18 @@ export async function execute(ctx) {
|
|||||||
// Guard: claude_k8s must not run concurrently for the same agent (shared PVC/session).
|
// Guard: claude_k8s must not run concurrently for the same agent (shared PVC/session).
|
||||||
// After a server restart, orphaned K8s Jobs from previous (now-failed) runs may
|
// After a server restart, orphaned K8s Jobs from previous (now-failed) runs may
|
||||||
// still be running. We detect those by comparing the Job's run-id label against
|
// still be running. We detect those by comparing the Job's run-id label against
|
||||||
// the current runId and clean them up so this execution can proceed.
|
// the current runId. When reattachOrphanedJobs is enabled and the orphan matches
|
||||||
|
// the current agent+task+session, we attach to it instead of deleting it (FAR-124).
|
||||||
const agentId = ctx.agent.id;
|
const agentId = ctx.agent.id;
|
||||||
const selfPod = await getSelfPodInfo(kubeconfigPath);
|
const selfPod = await getSelfPodInfo(kubeconfigPath);
|
||||||
const guardNamespace = asString(config.namespace, "") || selfPod.namespace;
|
const guardNamespace = asString(config.namespace, "") || selfPod.namespace;
|
||||||
|
const reattachOrphanedJobs = asBoolean(config.reattachOrphanedJobs, true);
|
||||||
|
const runtimeSessionParams = parseObject(runtime.sessionParams);
|
||||||
|
const currentSessionIdRaw = asString(runtimeSessionParams.sessionId, runtime.sessionId ?? "");
|
||||||
|
const currentSessionLabel = currentSessionIdRaw ? sanitizeLabelValue(currentSessionIdRaw) : null;
|
||||||
|
const currentTaskIdRaw = asString(ctx.context.taskId, "") || asString(ctx.context.issueId, "");
|
||||||
|
const currentTaskLabel = currentTaskIdRaw ? sanitizeLabelValue(currentTaskIdRaw) : null;
|
||||||
|
let reattachTarget = null;
|
||||||
try {
|
try {
|
||||||
const batchApi = getBatchApi(kubeconfigPath);
|
const batchApi = getBatchApi(kubeconfigPath);
|
||||||
const existing = await batchApi.listNamespacedJob({
|
const existing = await batchApi.listNamespacedJob({
|
||||||
@@ -277,10 +433,37 @@ export async function execute(ctx) {
|
|||||||
// concurrent jobs (same runId — shouldn't happen but guard defensively).
|
// concurrent jobs (same runId — shouldn't happen but guard defensively).
|
||||||
const orphaned = running.filter((j) => (j.metadata?.labels?.["paperclip.io/run-id"] ?? "") !== runId);
|
const orphaned = running.filter((j) => (j.metadata?.labels?.["paperclip.io/run-id"] ?? "") !== runId);
|
||||||
const samRun = running.filter((j) => (j.metadata?.labels?.["paperclip.io/run-id"] ?? "") === runId);
|
const samRun = running.filter((j) => (j.metadata?.labels?.["paperclip.io/run-id"] ?? "") === runId);
|
||||||
if (orphaned.length > 0) {
|
// Pick the most recent reattachable orphan — same agent + task + session,
|
||||||
const orphanNames = orphaned.map((j) => j.metadata?.name).join(", ");
|
// not terminal. Only one target is chosen; any other orphans get
|
||||||
await onLog("stdout", `[paperclip] Cleaning up ${orphaned.length} orphaned K8s Job(s) from previous run(s): ${orphanNames}\n`);
|
// cleaned up as before.
|
||||||
for (const j of orphaned) {
|
if (reattachOrphanedJobs && orphaned.length > 0) {
|
||||||
|
const candidates = orphaned
|
||||||
|
.filter((j) => isReattachableOrphan(j, {
|
||||||
|
agentId,
|
||||||
|
taskId: currentTaskLabel,
|
||||||
|
sessionId: currentSessionLabel,
|
||||||
|
}))
|
||||||
|
.sort((a, b) => {
|
||||||
|
const at = new Date(a.metadata?.creationTimestamp ?? 0).getTime();
|
||||||
|
const bt = new Date(b.metadata?.creationTimestamp ?? 0).getTime();
|
||||||
|
return bt - at;
|
||||||
|
});
|
||||||
|
const chosen = candidates[0];
|
||||||
|
const chosenName = chosen?.metadata?.name;
|
||||||
|
if (chosen && chosenName) {
|
||||||
|
reattachTarget = {
|
||||||
|
jobName: chosenName,
|
||||||
|
namespace: chosen.metadata?.namespace ?? guardNamespace,
|
||||||
|
priorRunId: chosen.metadata?.labels?.["paperclip.io/run-id"] ?? "",
|
||||||
|
image: chosen.spec?.template?.spec?.containers?.[0]?.image ?? "unknown",
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const toDelete = orphaned.filter((j) => !reattachTarget || j.metadata?.name !== reattachTarget.jobName);
|
||||||
|
if (toDelete.length > 0) {
|
||||||
|
const orphanNames = toDelete.map((j) => j.metadata?.name).join(", ");
|
||||||
|
await onLog("stdout", `[paperclip] Cleaning up ${toDelete.length} orphaned K8s Job(s) from previous run(s): ${orphanNames}\n`);
|
||||||
|
for (const j of toDelete) {
|
||||||
const name = j.metadata?.name;
|
const name = j.metadata?.name;
|
||||||
if (name) {
|
if (name) {
|
||||||
await cleanupJob(guardNamespace, name, onLog, kubeconfigPath);
|
await cleanupJob(guardNamespace, name, onLog, kubeconfigPath);
|
||||||
@@ -317,81 +500,114 @@ export async function execute(ctx) {
|
|||||||
errorCode: "k8s_concurrency_guard_unreachable",
|
errorCode: "k8s_concurrency_guard_unreachable",
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
// Build Job manifest
|
|
||||||
const { job, jobName, namespace, prompt, claudeArgs, promptMetrics, promptSecret } = buildJobManifest({
|
|
||||||
ctx,
|
|
||||||
selfPod,
|
|
||||||
});
|
|
||||||
// Report invocation metadata
|
|
||||||
if (onMeta) {
|
|
||||||
await onMeta({
|
|
||||||
adapterType: "claude_k8s",
|
|
||||||
command: `kubectl job/${jobName}`,
|
|
||||||
cwd: namespace,
|
|
||||||
commandArgs: claudeArgs,
|
|
||||||
commandNotes: [
|
|
||||||
`Image: ${job.spec?.template.spec?.containers[0]?.image ?? "unknown"}`,
|
|
||||||
`Namespace: ${namespace}`,
|
|
||||||
`Timeout: ${timeoutSec}s`,
|
|
||||||
],
|
|
||||||
prompt,
|
|
||||||
...(promptMetrics ? { promptMetrics } : {}),
|
|
||||||
context: ctx.context,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
// If the prompt is large, create a Secret to hold it (avoids the ~1 MiB
|
|
||||||
// PodSpec limit). The Secret is cleaned up in the finally block.
|
|
||||||
const coreApi = getCoreApi(kubeconfigPath);
|
const coreApi = getCoreApi(kubeconfigPath);
|
||||||
if (promptSecret) {
|
const batchApi = getBatchApi(kubeconfigPath);
|
||||||
try {
|
let jobName;
|
||||||
await coreApi.createNamespacedSecret({
|
let namespace;
|
||||||
namespace: promptSecret.namespace,
|
let promptSecret = null;
|
||||||
body: {
|
if (reattachTarget) {
|
||||||
apiVersion: "v1",
|
jobName = reattachTarget.jobName;
|
||||||
kind: "Secret",
|
namespace = reattachTarget.namespace;
|
||||||
metadata: {
|
// Announce reattach metadata. Prompt and args aren't known here — they
|
||||||
name: promptSecret.name,
|
// belong to the prior run that created this pod and are already present
|
||||||
namespace: promptSecret.namespace,
|
// on the running container.
|
||||||
labels: {
|
if (onMeta) {
|
||||||
"app.kubernetes.io/managed-by": "paperclip",
|
await onMeta({
|
||||||
"paperclip.io/adapter-type": "claude_k8s",
|
adapterType: "claude_k8s",
|
||||||
"paperclip.io/run-id": runId,
|
command: `kubectl job/${jobName}`,
|
||||||
},
|
cwd: namespace,
|
||||||
},
|
commandArgs: [],
|
||||||
stringData: promptSecret.data,
|
commandNotes: [
|
||||||
},
|
`Image: ${reattachTarget.image}`,
|
||||||
|
`Namespace: ${namespace}`,
|
||||||
|
`Reattached from prior run: ${reattachTarget.priorRunId || "unknown"}`,
|
||||||
|
`Timeout: ${timeoutSec}s`,
|
||||||
|
],
|
||||||
|
prompt: "",
|
||||||
|
context: ctx.context,
|
||||||
});
|
});
|
||||||
await onLog("stdout", `[paperclip] Created prompt Secret: ${promptSecret.name} (${Math.round(Buffer.byteLength(prompt, "utf-8") / 1024)} KiB)\n`);
|
}
|
||||||
|
await onLog("stdout", `[paperclip] Reattaching to in-flight K8s Job ${jobName} in namespace ${namespace} (prior run ${reattachTarget.priorRunId || "unknown"})\n`);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// Build Job manifest
|
||||||
|
const built = buildJobManifest({ ctx, selfPod });
|
||||||
|
const job = built.job;
|
||||||
|
jobName = built.jobName;
|
||||||
|
namespace = built.namespace;
|
||||||
|
const prompt = built.prompt;
|
||||||
|
const claudeArgs = built.claudeArgs;
|
||||||
|
const promptMetrics = built.promptMetrics;
|
||||||
|
promptSecret = built.promptSecret;
|
||||||
|
// Report invocation metadata
|
||||||
|
if (onMeta) {
|
||||||
|
await onMeta({
|
||||||
|
adapterType: "claude_k8s",
|
||||||
|
command: `kubectl job/${jobName}`,
|
||||||
|
cwd: namespace,
|
||||||
|
commandArgs: claudeArgs,
|
||||||
|
commandNotes: [
|
||||||
|
`Image: ${job.spec?.template.spec?.containers[0]?.image ?? "unknown"}`,
|
||||||
|
`Namespace: ${namespace}`,
|
||||||
|
`Timeout: ${timeoutSec}s`,
|
||||||
|
],
|
||||||
|
prompt,
|
||||||
|
...(promptMetrics ? { promptMetrics } : {}),
|
||||||
|
context: ctx.context,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
// If the prompt is large, create a Secret to hold it (avoids the ~1 MiB
|
||||||
|
// PodSpec limit). The Secret is cleaned up in the finally block.
|
||||||
|
if (promptSecret) {
|
||||||
|
try {
|
||||||
|
await coreApi.createNamespacedSecret({
|
||||||
|
namespace: promptSecret.namespace,
|
||||||
|
body: {
|
||||||
|
apiVersion: "v1",
|
||||||
|
kind: "Secret",
|
||||||
|
metadata: {
|
||||||
|
name: promptSecret.name,
|
||||||
|
namespace: promptSecret.namespace,
|
||||||
|
labels: {
|
||||||
|
"app.kubernetes.io/managed-by": "paperclip",
|
||||||
|
"paperclip.io/adapter-type": "claude_k8s",
|
||||||
|
"paperclip.io/run-id": runId,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
stringData: promptSecret.data,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
await onLog("stdout", `[paperclip] Created prompt Secret: ${promptSecret.name} (${Math.round(Buffer.byteLength(prompt, "utf-8") / 1024)} KiB)\n`);
|
||||||
|
}
|
||||||
|
catch (err) {
|
||||||
|
const msg = err instanceof Error ? err.message : String(err);
|
||||||
|
await onLog("stderr", `[paperclip] Failed to create prompt Secret: ${msg}\n`);
|
||||||
|
return {
|
||||||
|
exitCode: null,
|
||||||
|
signal: null,
|
||||||
|
timedOut: false,
|
||||||
|
errorMessage: `Failed to create prompt Secret: ${msg}`,
|
||||||
|
errorCode: "k8s_prompt_secret_create_failed",
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Create the Job
|
||||||
|
try {
|
||||||
|
await batchApi.createNamespacedJob({ namespace, body: job });
|
||||||
}
|
}
|
||||||
catch (err) {
|
catch (err) {
|
||||||
const msg = err instanceof Error ? err.message : String(err);
|
const msg = err instanceof Error ? err.message : String(err);
|
||||||
await onLog("stderr", `[paperclip] Failed to create prompt Secret: ${msg}\n`);
|
await onLog("stderr", `[paperclip] Failed to create K8s Job: ${msg}\n`);
|
||||||
return {
|
return {
|
||||||
exitCode: null,
|
exitCode: null,
|
||||||
signal: null,
|
signal: null,
|
||||||
timedOut: false,
|
timedOut: false,
|
||||||
errorMessage: `Failed to create prompt Secret: ${msg}`,
|
errorMessage: `Failed to create Kubernetes Job: ${msg}`,
|
||||||
errorCode: "k8s_prompt_secret_create_failed",
|
errorCode: "k8s_job_create_failed",
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
await onLog("stdout", `[paperclip] Created K8s Job: ${jobName} in namespace ${namespace} (deadline: ${timeoutSec > 0 ? `${timeoutSec}s` : "none"})\n`);
|
||||||
}
|
}
|
||||||
// Create the Job
|
|
||||||
const batchApi = getBatchApi(kubeconfigPath);
|
|
||||||
try {
|
|
||||||
await batchApi.createNamespacedJob({ namespace, body: job });
|
|
||||||
}
|
|
||||||
catch (err) {
|
|
||||||
const msg = err instanceof Error ? err.message : String(err);
|
|
||||||
await onLog("stderr", `[paperclip] Failed to create K8s Job: ${msg}\n`);
|
|
||||||
return {
|
|
||||||
exitCode: null,
|
|
||||||
signal: null,
|
|
||||||
timedOut: false,
|
|
||||||
errorMessage: `Failed to create Kubernetes Job: ${msg}`,
|
|
||||||
errorCode: "k8s_job_create_failed",
|
|
||||||
};
|
|
||||||
}
|
|
||||||
await onLog("stdout", `[paperclip] Created K8s Job: ${jobName} in namespace ${namespace} (deadline: ${timeoutSec > 0 ? `${timeoutSec}s` : "none"})\n`);
|
|
||||||
let stdout = "";
|
let stdout = "";
|
||||||
let exitCode = null;
|
let exitCode = null;
|
||||||
let jobTimedOut = false;
|
let jobTimedOut = false;
|
||||||
@@ -404,8 +620,24 @@ export async function execute(ctx) {
|
|||||||
const scheduleTimeoutMs = 120_000; // 2 minutes for scheduling
|
const scheduleTimeoutMs = 120_000; // 2 minutes for scheduling
|
||||||
let podName;
|
let podName;
|
||||||
try {
|
try {
|
||||||
podName = await waitForPod(namespace, jobName, scheduleTimeoutMs, onLog, kubeconfigPath);
|
if (reattachTarget) {
|
||||||
await onLog("stdout", `[paperclip] Pod running: ${podName}\n`);
|
// Pod is already running from the prior run — look it up directly.
|
||||||
|
const podList = await coreApi.listNamespacedPod({
|
||||||
|
namespace,
|
||||||
|
labelSelector: `job-name=${jobName}`,
|
||||||
|
});
|
||||||
|
const pod = podList.items[0];
|
||||||
|
const name = pod?.metadata?.name;
|
||||||
|
if (!name) {
|
||||||
|
throw new Error(`Reattach target Job ${jobName} has no pod`);
|
||||||
|
}
|
||||||
|
podName = name;
|
||||||
|
await onLog("stdout", `[paperclip] Reattached to pod ${podName}\n`);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
podName = await waitForPod(namespace, jobName, scheduleTimeoutMs, onLog, kubeconfigPath);
|
||||||
|
await onLog("stdout", `[paperclip] Pod running: ${podName}\n`);
|
||||||
|
}
|
||||||
// Notify the server that execution has started. This sets
|
// Notify the server that execution has started. This sets
|
||||||
// processStartedAt and refreshes updatedAt in the DB, which the
|
// processStartedAt and refreshes updatedAt in the DB, which the
|
||||||
// stale-run reaper (reapOrphanedRuns) uses to decide liveness.
|
// stale-run reaper (reapOrphanedRuns) uses to decide liveness.
|
||||||
@@ -419,13 +651,14 @@ export async function execute(ctx) {
|
|||||||
}
|
}
|
||||||
catch (err) {
|
catch (err) {
|
||||||
const msg = err instanceof Error ? err.message : String(err);
|
const msg = err instanceof Error ? err.message : String(err);
|
||||||
await onLog("stderr", `[paperclip] Pod scheduling failed: ${msg}\n`);
|
const phase = reattachTarget ? "reattach" : "scheduling";
|
||||||
|
await onLog("stderr", `[paperclip] Pod ${phase} failed: ${msg}\n`);
|
||||||
return {
|
return {
|
||||||
exitCode: null,
|
exitCode: null,
|
||||||
signal: null,
|
signal: null,
|
||||||
timedOut: false,
|
timedOut: false,
|
||||||
errorMessage: `Pod scheduling failed: ${msg}`,
|
errorMessage: `Pod ${phase} failed: ${msg}`,
|
||||||
errorCode: "k8s_pod_schedule_failed",
|
errorCode: reattachTarget ? "k8s_pod_reattach_failed" : "k8s_pod_schedule_failed",
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
// Stream logs and wait for completion concurrently.
|
// Stream logs and wait for completion concurrently.
|
||||||
@@ -457,18 +690,32 @@ export async function execute(ctx) {
|
|||||||
let lastLogAt = Date.now();
|
let lastLogAt = Date.now();
|
||||||
let keepaliveTick = 0;
|
let keepaliveTick = 0;
|
||||||
let keepaliveJobTerminal = false;
|
let keepaliveJobTerminal = false;
|
||||||
|
let keepaliveJobTerminalAt = null;
|
||||||
keepaliveTimer = setInterval(() => {
|
keepaliveTimer = setInterval(() => {
|
||||||
// Fire-and-forget the async work; setInterval callbacks must be
|
// Fire-and-forget the async work; setInterval callbacks must be
|
||||||
// synchronous or the timer will drift.
|
// synchronous or the timer will drift.
|
||||||
void (async () => {
|
void (async () => {
|
||||||
if (keepaliveJobTerminal)
|
if (keepaliveJobTerminal) {
|
||||||
|
// Post-terminal window: keep refreshing onSpawn during cleanup
|
||||||
|
// (job deletion, log parsing, K8s API calls) so the reaper doesn't
|
||||||
|
// fire a false process_lost while execute() is still running.
|
||||||
|
if (ctx.onSpawn &&
|
||||||
|
keepaliveJobTerminalAt !== null &&
|
||||||
|
Date.now() - keepaliveJobTerminalAt <= POST_TERMINAL_KEEPALIVE_MS) {
|
||||||
|
keepaliveTick++;
|
||||||
|
if (keepaliveTick % 6 === 0) {
|
||||||
|
void ctx.onSpawn({ pid: process.pid, processGroupId: null, startedAt: new Date().toISOString() }).catch(() => { });
|
||||||
|
}
|
||||||
|
}
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
// Verify the Job is still alive before announcing or refreshing.
|
// Verify the Job is still alive before announcing or refreshing.
|
||||||
try {
|
try {
|
||||||
const job = await batchApi.readNamespacedJob({ name: jobName, namespace });
|
const job = await batchApi.readNamespacedJob({ name: jobName, namespace });
|
||||||
const terminal = job.status?.conditions?.some((c) => (c.type === "Complete" || c.type === "Failed") && c.status === "True");
|
const terminal = job.status?.conditions?.some((c) => (c.type === "Complete" || c.type === "Failed") && c.status === "True");
|
||||||
if (terminal) {
|
if (terminal) {
|
||||||
keepaliveJobTerminal = true;
|
keepaliveJobTerminal = true;
|
||||||
|
keepaliveJobTerminalAt = Date.now();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -477,10 +724,9 @@ export async function execute(ctx) {
|
|||||||
// connection resets should NOT permanently disable the keepalive —
|
// connection resets should NOT permanently disable the keepalive —
|
||||||
// the next tick will re-check and the reaper uses the staleness
|
// the next tick will re-check and the reaper uses the staleness
|
||||||
// window as a safety net.
|
// window as a safety net.
|
||||||
const statusCode = err?.response?.statusCode
|
if (isK8s404(err)) {
|
||||||
?? err?.statusCode;
|
|
||||||
if (statusCode === 404) {
|
|
||||||
keepaliveJobTerminal = true;
|
keepaliveJobTerminal = true;
|
||||||
|
keepaliveJobTerminalAt = Date.now();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
// Log transient errors but leave keepaliveJobTerminal false so
|
// Log transient errors but leave keepaliveJobTerminal false so
|
||||||
@@ -525,23 +771,44 @@ export async function execute(ctx) {
|
|||||||
if (logResult.status === "fulfilled") {
|
if (logResult.status === "fulfilled") {
|
||||||
stdout = logResult.value;
|
stdout = logResult.value;
|
||||||
}
|
}
|
||||||
// If the follow stream missed output (container exited quickly), do a
|
// One-shot log fallback: handles two failure modes with a single read.
|
||||||
// one-shot log read as fallback before the pod is cleaned up.
|
// Mode 1 — empty stream: the follow stream returned nothing (fast exit before connection).
|
||||||
if (!stdout.trim()) {
|
// Mode 2 — partial stream: we have some output but no result event (follow stream raced
|
||||||
await onLog("stdout", `[paperclip] Log stream returned empty — reading pod logs directly...\n`);
|
// with container exit and captured only the init line before the connection dropped).
|
||||||
stdout = await readPodLogs(namespace, podName, kubeconfigPath);
|
// A one-shot readPodLogs is more reliable for already-terminated containers and reads
|
||||||
if (stdout.trim()) {
|
// from the beginning of the log, giving us the full output.
|
||||||
|
// We use a cheap string scan for the result-event guard (avoids a full JSON parse here;
|
||||||
|
// the authoritative parse happens once below after all fallbacks complete).
|
||||||
|
const hasResultEvent = stdout.includes('"type":"result"');
|
||||||
|
const needsOneShot = !stdout.trim() || (stdout.trim() && !hasResultEvent);
|
||||||
|
if (needsOneShot) {
|
||||||
|
if (!stdout.trim()) {
|
||||||
|
await onLog("stdout", `[paperclip] Log stream returned empty — reading pod logs directly...\n`);
|
||||||
|
}
|
||||||
|
const oneShotLogs = await readPodLogs(namespace, podName, kubeconfigPath);
|
||||||
|
if (!stdout.trim() && oneShotLogs.trim()) {
|
||||||
|
stdout = oneShotLogs;
|
||||||
await onLog("stdout", stdout);
|
await onLog("stdout", stdout);
|
||||||
}
|
}
|
||||||
|
else if (oneShotLogs && oneShotLogs.length > stdout.length) {
|
||||||
|
await onLog("stdout", `[paperclip] Log stream captured partial output — supplemental one-shot read returned more content.\n`);
|
||||||
|
stdout = oneShotLogs;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (completionResult.status === "fulfilled") {
|
if (completionResult.status === "fulfilled") {
|
||||||
jobTimedOut = completionResult.value.timedOut;
|
jobTimedOut = completionResult.value.timedOut;
|
||||||
|
if (completionResult.value.jobGone) {
|
||||||
|
// Job was deleted by TTL or externally before we observed the Complete/Failed
|
||||||
|
// condition. The container must have exited first (TTL only fires after
|
||||||
|
// completion), so log streaming has captured the full output — continue
|
||||||
|
// to stdout parsing rather than returning an error.
|
||||||
|
await onLog("stdout", `[paperclip] Job ${jobName} was deleted before terminal condition was observed (TTL or external deletion) — proceeding with captured output.\n`);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
// waitForJobCompletion threw — re-check job state to avoid returning
|
// waitForJobCompletion threw an unexpected error — re-check job state to
|
||||||
// while the job is still running (which would cause UI staleness and
|
// avoid returning while the job is still running. Use a bounded timeout
|
||||||
// concurrency errors on retry). Use a bounded timeout (60s) so we
|
// (60s) so we don't hang the heartbeat indefinitely if the K8s API is degraded.
|
||||||
// don't hang the heartbeat indefinitely if the K8s API is degraded.
|
|
||||||
jobTimedOut = false;
|
jobTimedOut = false;
|
||||||
const RECHECK_TIMEOUT_MS = 60_000;
|
const RECHECK_TIMEOUT_MS = 60_000;
|
||||||
const actualState = await waitForJobCompletion(namespace, jobName, RECHECK_TIMEOUT_MS, kubeconfigPath);
|
const actualState = await waitForJobCompletion(namespace, jobName, RECHECK_TIMEOUT_MS, kubeconfigPath);
|
||||||
@@ -550,6 +817,11 @@ export async function execute(ctx) {
|
|||||||
// Return an error so the UI knows the run is not done.
|
// Return an error so the UI knows the run is not done.
|
||||||
jobTimedOut = true;
|
jobTimedOut = true;
|
||||||
}
|
}
|
||||||
|
else if (actualState.jobGone) {
|
||||||
|
// Job was deleted before we could confirm terminal state — same as the
|
||||||
|
// fulfilled+jobGone case above: proceed with captured output.
|
||||||
|
await onLog("stdout", `[paperclip] Job ${jobName} was deleted before terminal condition was observed (TTL or external deletion) — proceeding with captured output.\n`);
|
||||||
|
}
|
||||||
else if (!actualState.succeeded) {
|
else if (!actualState.succeeded) {
|
||||||
// Job still not terminal — the completion error was likely transient.
|
// Job still not terminal — the completion error was likely transient.
|
||||||
// Return an error so the UI knows the run is not done, rather than
|
// Return an error so the UI knows the run is not done, rather than
|
||||||
@@ -615,16 +887,11 @@ export async function execute(ctx) {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
if (!parsed) {
|
if (!parsed) {
|
||||||
const stderrLine = stdout.split(/\r?\n/).map((l) => l.trim()).find(Boolean) ?? "";
|
|
||||||
return {
|
return {
|
||||||
exitCode,
|
exitCode,
|
||||||
signal: null,
|
signal: null,
|
||||||
timedOut: false,
|
timedOut: false,
|
||||||
errorMessage: exitCode === 0
|
errorMessage: buildPartialRunError(exitCode, parsedStream.model, stdout),
|
||||||
? "Failed to parse Claude JSON output"
|
|
||||||
: stderrLine
|
|
||||||
? `Claude exited with code ${exitCode ?? -1}: ${stderrLine}`
|
|
||||||
: `Claude exited with code ${exitCode ?? -1}`,
|
|
||||||
resultJson: { stdout },
|
resultJson: { stdout },
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@@ -636,8 +903,7 @@ export async function execute(ctx) {
|
|||||||
outputTokens: asNumber(usageObj.output_tokens, 0),
|
outputTokens: asNumber(usageObj.output_tokens, 0),
|
||||||
};
|
};
|
||||||
})();
|
})();
|
||||||
const runtimeSessionParams = parseObject(runtime.sessionParams);
|
const fallbackSessionId = currentSessionIdRaw;
|
||||||
const fallbackSessionId = asString(runtimeSessionParams.sessionId, runtime.sessionId ?? "");
|
|
||||||
const resolvedSessionId = parsedStream.sessionId
|
const resolvedSessionId = parsedStream.sessionId
|
||||||
?? (asString(parsed.session_id, fallbackSessionId) || fallbackSessionId);
|
?? (asString(parsed.session_id, fallbackSessionId) || fallbackSessionId);
|
||||||
const model = asString(config.model, "");
|
const model = asString(config.model, "");
|
||||||
|
|||||||
Vendored
+1
-1
File diff suppressed because one or more lines are too long
Vendored
+20
@@ -1,5 +1,19 @@
|
|||||||
import type * as k8s from "@kubernetes/client-node";
|
import type * as k8s from "@kubernetes/client-node";
|
||||||
import type { AdapterExecutionContext } from "@paperclipai/adapter-utils";
|
import type { AdapterExecutionContext } from "@paperclipai/adapter-utils";
|
||||||
|
/**
|
||||||
|
* Build the shell command prefix that installs a native Node.js PostToolUse
|
||||||
|
* hook into Claude Code's settings. The hook truncates oversized tool outputs
|
||||||
|
* before they reach the model — replacing the RTK binary init-container
|
||||||
|
* approach with a self-contained Node.js implementation.
|
||||||
|
*
|
||||||
|
* Both scripts are base64-encoded so they can be embedded in a sh -c command
|
||||||
|
* string without any quoting or escaping issues.
|
||||||
|
*
|
||||||
|
* @param maxOutputBytes Byte threshold above which tool output is truncated.
|
||||||
|
* @returns A shell command string (suitable for "&&"-chaining
|
||||||
|
* before the claude invocation).
|
||||||
|
*/
|
||||||
|
export declare function buildRtkSetupCommands(maxOutputBytes: number): string;
|
||||||
import type { SelfPodInfo } from "./k8s-client.js";
|
import type { SelfPodInfo } from "./k8s-client.js";
|
||||||
export interface JobBuildInput {
|
export interface JobBuildInput {
|
||||||
ctx: AdapterExecutionContext;
|
ctx: AdapterExecutionContext;
|
||||||
@@ -24,5 +38,11 @@ export interface JobBuildResult {
|
|||||||
* staged as a K8s Secret before creating the Job. */
|
* staged as a K8s Secret before creating the Job. */
|
||||||
promptSecret: PromptSecret | null;
|
promptSecret: PromptSecret | null;
|
||||||
}
|
}
|
||||||
|
/**
|
||||||
|
* Sanitize a string for use as a Kubernetes label value (RFC 1123 subset:
|
||||||
|
* `[a-zA-Z0-9]([-_.a-zA-Z0-9]*[a-zA-Z0-9])?`, max 63 chars). Returns `null`
|
||||||
|
* when no usable characters remain — the caller should omit the label.
|
||||||
|
*/
|
||||||
|
export declare function sanitizeLabelValue(value: string, maxLen?: number): string | null;
|
||||||
export declare function buildJobManifest(input: JobBuildInput): JobBuildResult;
|
export declare function buildJobManifest(input: JobBuildInput): JobBuildResult;
|
||||||
//# sourceMappingURL=job-manifest.d.ts.map
|
//# sourceMappingURL=job-manifest.d.ts.map
|
||||||
Vendored
+1
-1
@@ -1 +1 @@
|
|||||||
{"version":3,"file":"job-manifest.d.ts","sourceRoot":"","sources":["../../src/server/job-manifest.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,KAAK,GAAG,MAAM,yBAAyB,CAAC;AACpD,OAAO,KAAK,EAAE,uBAAuB,EAAE,MAAM,4BAA4B,CAAC;AAgD1E,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAC;AA6CnD,MAAM,WAAW,aAAa;IAC5B,GAAG,EAAE,uBAAuB,CAAC;IAC7B,OAAO,EAAE,WAAW,CAAC;CACtB;AAED;;+EAE+E;AAC/E,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,MAAM,CAAC;IAClB,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CAC9B;AAED,MAAM,WAAW,cAAc;IAC7B,GAAG,EAAE,GAAG,CAAC,KAAK,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,MAAM,EAAE,CAAC;IACrB,aAAa,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACtC;0DACsD;IACtD,YAAY,EAAE,YAAY,GAAG,IAAI,CAAC;CACnC;AAuHD,wBAAgB,gBAAgB,CAAC,KAAK,EAAE,aAAa,GAAG,cAAc,CAkRrE"}
|
{"version":3,"file":"job-manifest.d.ts","sourceRoot":"","sources":["../../src/server/job-manifest.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,KAAK,GAAG,MAAM,yBAAyB,CAAC;AACpD,OAAO,KAAK,EAAE,uBAAuB,EAAE,MAAM,4BAA4B,CAAC;AAY1E;;;;;;;;;;;;GAYG;AACH,wBAAgB,qBAAqB,CAAC,cAAc,EAAE,MAAM,GAAG,MAAM,CAiEpE;AAsCD,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAC;AA6CnD,MAAM,WAAW,aAAa;IAC5B,GAAG,EAAE,uBAAuB,CAAC;IAC7B,OAAO,EAAE,WAAW,CAAC;CACtB;AAED;;+EAE+E;AAC/E,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,MAAM,CAAC;IAClB,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CAC9B;AAED,MAAM,WAAW,cAAc;IAC7B,GAAG,EAAE,GAAG,CAAC,KAAK,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,MAAM,EAAE,CAAC;IACrB,aAAa,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACtC;0DACsD;IACtD,YAAY,EAAE,YAAY,GAAG,IAAI,CAAC;CACnC;AAMD;;;;GAIG;AACH,wBAAgB,kBAAkB,CAAC,KAAK,EAAE,MAAM,EAAE,MAAM,SAAK,GAAG,MAAM,GAAG,IAAI,CAI5E;AAmHD,wBAAgB,gBAAgB,CAAC,KAAK,EAAE,aAAa,GAAG,cAAc,CAmSrE"}
|
||||||
Vendored
+106
-1
@@ -1,5 +1,81 @@
|
|||||||
import { asString, asNumber, asBoolean, asStringArray, parseObject, buildPaperclipEnv, renderTemplate, } from "@paperclipai/adapter-utils/server-utils";
|
import { asString, asNumber, asBoolean, asStringArray, parseObject, buildPaperclipEnv, renderTemplate, } from "@paperclipai/adapter-utils/server-utils";
|
||||||
import { createHash } from "node:crypto";
|
import { createHash } from "node:crypto";
|
||||||
|
/**
|
||||||
|
* Build the shell command prefix that installs a native Node.js PostToolUse
|
||||||
|
* hook into Claude Code's settings. The hook truncates oversized tool outputs
|
||||||
|
* before they reach the model — replacing the RTK binary init-container
|
||||||
|
* approach with a self-contained Node.js implementation.
|
||||||
|
*
|
||||||
|
* Both scripts are base64-encoded so they can be embedded in a sh -c command
|
||||||
|
* string without any quoting or escaping issues.
|
||||||
|
*
|
||||||
|
* @param maxOutputBytes Byte threshold above which tool output is truncated.
|
||||||
|
* @returns A shell command string (suitable for "&&"-chaining
|
||||||
|
* before the claude invocation).
|
||||||
|
*/
|
||||||
|
export function buildRtkSetupCommands(maxOutputBytes) {
|
||||||
|
// --- Filter script ----------------------------------------------------------
|
||||||
|
// This script runs as the PostToolUse hook inside every K8s Job pod.
|
||||||
|
// Claude Code writes the hook event as JSON to the script's stdin; the script
|
||||||
|
// truncates the tool_response/tool_result content when it exceeds the
|
||||||
|
// threshold and writes the (possibly modified) JSON to stdout.
|
||||||
|
//
|
||||||
|
// Field-name coverage:
|
||||||
|
// • tool_response — documented hook event format for PostToolUse
|
||||||
|
// • tool_result — alternative name seen in some Claude Code versions
|
||||||
|
// Content may be a plain string or an array of typed blocks (text/image/…).
|
||||||
|
const filterScript = [
|
||||||
|
`const c=[];`,
|
||||||
|
`process.stdin.on('data',d=>c.push(d));`,
|
||||||
|
`process.stdin.on('end',()=>{`,
|
||||||
|
`const raw=Buffer.concat(c).toString('utf-8');`,
|
||||||
|
`let o;try{o=JSON.parse(raw);}catch{process.stdout.write(raw);return;}`,
|
||||||
|
`const MAX=${maxOutputBytes};`,
|
||||||
|
`function trunc(s){`,
|
||||||
|
`if(typeof s!=='string')return s;`,
|
||||||
|
`const b=Buffer.from(s,'utf-8');`,
|
||||||
|
`if(b.length<=MAX)return s;`,
|
||||||
|
`return b.slice(0,MAX).toString('utf-8')+'\\n[...'+(b.length-MAX)+' bytes truncated by paperclip-rtk]';`,
|
||||||
|
`}`,
|
||||||
|
`const tr=o&&(o.tool_response||o.tool_result);`,
|
||||||
|
`if(tr){`,
|
||||||
|
`if(typeof tr.content==='string'){tr.content=trunc(tr.content);}`,
|
||||||
|
`else if(Array.isArray(tr.content)){`,
|
||||||
|
`tr.content=tr.content.map(function(b){`,
|
||||||
|
`if(b&&typeof b==='object'&&typeof b.text==='string'){`,
|
||||||
|
`return Object.assign({},b,{text:trunc(b.text)});`,
|
||||||
|
`}return b;`,
|
||||||
|
`});`,
|
||||||
|
`}`,
|
||||||
|
`}`,
|
||||||
|
`process.stdout.write(JSON.stringify(o));`,
|
||||||
|
`});`,
|
||||||
|
].join("");
|
||||||
|
// --- Settings script --------------------------------------------------------
|
||||||
|
// Reads the existing ~/.claude/settings.json (if any), merges in the RTK
|
||||||
|
// PostToolUse hook, and writes the file back. All other settings sections
|
||||||
|
// are preserved; only PostToolUse is replaced so we own the full hook list
|
||||||
|
// for this run.
|
||||||
|
const settingsScript = [
|
||||||
|
`const fs=require('fs'),pt=require('path');`,
|
||||||
|
`const p=pt.join(process.env.HOME,'.claude','settings.json');`,
|
||||||
|
`let s={};try{s=JSON.parse(fs.readFileSync(p,'utf-8'));}catch(e){}`,
|
||||||
|
`s.hooks=s.hooks||{};`,
|
||||||
|
`s.hooks.PostToolUse=[{matcher:'.*',hooks:[{type:'command',command:'node /tmp/.rtk-filter.js'}]}];`,
|
||||||
|
`fs.mkdirSync(pt.dirname(p),{recursive:true});`,
|
||||||
|
`fs.writeFileSync(p,JSON.stringify(s));`,
|
||||||
|
].join("");
|
||||||
|
// Encode as base64 so the strings can be embedded directly in a shell command
|
||||||
|
// without any quoting concerns (base64 alphabet: A-Za-z0-9+/=).
|
||||||
|
const filterB64 = Buffer.from(filterScript, "utf-8").toString("base64");
|
||||||
|
const settingsB64 = Buffer.from(settingsScript, "utf-8").toString("base64");
|
||||||
|
return [
|
||||||
|
// Write the filter script
|
||||||
|
`node -e "require('fs').writeFileSync('/tmp/.rtk-filter.js',Buffer.from('${filterB64}','base64').toString('utf-8'))"`,
|
||||||
|
// Install the Claude Code PostToolUse hook (merge into existing settings)
|
||||||
|
`node -e "eval(Buffer.from('${settingsB64}','base64').toString('utf-8'))"`,
|
||||||
|
].join(" && ");
|
||||||
|
}
|
||||||
/** Prompts above this size (bytes) are staged via a Secret instead of an
|
/** Prompts above this size (bytes) are staged via a Secret instead of an
|
||||||
* init container env var, protecting against the ~1 MiB PodSpec limit. */
|
* init container env var, protecting against the ~1 MiB PodSpec limit. */
|
||||||
const LARGE_PROMPT_THRESHOLD_BYTES = 256 * 1024;
|
const LARGE_PROMPT_THRESHOLD_BYTES = 256 * 1024;
|
||||||
@@ -91,6 +167,16 @@ function parseKeyValueConfig(raw) {
|
|||||||
function sanitizeForK8sName(value, maxLen = 16) {
|
function sanitizeForK8sName(value, maxLen = 16) {
|
||||||
return value.toLowerCase().replace(/[^a-z0-9-]/g, "").slice(0, maxLen);
|
return value.toLowerCase().replace(/[^a-z0-9-]/g, "").slice(0, maxLen);
|
||||||
}
|
}
|
||||||
|
/**
|
||||||
|
* Sanitize a string for use as a Kubernetes label value (RFC 1123 subset:
|
||||||
|
* `[a-zA-Z0-9]([-_.a-zA-Z0-9]*[a-zA-Z0-9])?`, max 63 chars). Returns `null`
|
||||||
|
* when no usable characters remain — the caller should omit the label.
|
||||||
|
*/
|
||||||
|
export function sanitizeLabelValue(value, maxLen = 63) {
|
||||||
|
const cleaned = value.replace(/[^a-zA-Z0-9._-]/g, "").slice(0, maxLen);
|
||||||
|
const trimmed = cleaned.replace(/^[^a-zA-Z0-9]+/, "").replace(/[^a-zA-Z0-9]+$/, "");
|
||||||
|
return trimmed.length > 0 ? trimmed : null;
|
||||||
|
}
|
||||||
/**
|
/**
|
||||||
* Build a short deterministic hash suffix from the raw inputs to avoid
|
* Build a short deterministic hash suffix from the raw inputs to avoid
|
||||||
* collisions when sanitized slugs happen to be identical.
|
* collisions when sanitized slugs happen to be identical.
|
||||||
@@ -202,6 +288,8 @@ export function buildJobManifest(input) {
|
|||||||
const nodeSelector = parseKeyValueConfig(config.nodeSelector);
|
const nodeSelector = parseKeyValueConfig(config.nodeSelector);
|
||||||
const tolerations = Array.isArray(config.tolerations) ? config.tolerations : [];
|
const tolerations = Array.isArray(config.tolerations) ? config.tolerations : [];
|
||||||
const extraLabels = parseKeyValueConfig(config.labels);
|
const extraLabels = parseKeyValueConfig(config.labels);
|
||||||
|
const enableRtk = asBoolean(config.enableRtk, false);
|
||||||
|
const rtkMaxOutputBytes = asNumber(config.rtkMaxOutputBytes, 50000);
|
||||||
// Resolve working directory — use workspace cwd, fall back to /paperclip
|
// Resolve working directory — use workspace cwd, fall back to /paperclip
|
||||||
const workspaceContext = parseObject(context.paperclipWorkspace);
|
const workspaceContext = parseObject(context.paperclipWorkspace);
|
||||||
const workspaceCwd = asString(workspaceContext.cwd, "");
|
const workspaceCwd = asString(workspaceContext.cwd, "");
|
||||||
@@ -289,6 +377,17 @@ export function buildJobManifest(input) {
|
|||||||
"paperclip.io/company-id": agent.companyId,
|
"paperclip.io/company-id": agent.companyId,
|
||||||
"paperclip.io/adapter-type": "claude_k8s",
|
"paperclip.io/adapter-type": "claude_k8s",
|
||||||
};
|
};
|
||||||
|
// Reattach-target labels: let a future execute() identify this Job as the
|
||||||
|
// continuation of the same logical unit of work (same task + same resume
|
||||||
|
// session) so it can attach to the running pod across a Paperclip restart
|
||||||
|
// instead of deleting it and starting over (FAR-124).
|
||||||
|
const taskIdRaw = asString(context.taskId, "") || asString(context.issueId, "");
|
||||||
|
const taskLabel = taskIdRaw ? sanitizeLabelValue(taskIdRaw) : null;
|
||||||
|
if (taskLabel)
|
||||||
|
labels["paperclip.io/task-id"] = taskLabel;
|
||||||
|
const sessionLabel = runtimeSessionId ? sanitizeLabelValue(runtimeSessionId) : null;
|
||||||
|
if (sessionLabel)
|
||||||
|
labels["paperclip.io/session-id"] = sessionLabel;
|
||||||
for (const [key, value] of Object.entries(extraLabels)) {
|
for (const [key, value] of Object.entries(extraLabels)) {
|
||||||
labels[key] = value;
|
labels[key] = value;
|
||||||
}
|
}
|
||||||
@@ -345,7 +444,13 @@ export function buildJobManifest(input) {
|
|||||||
};
|
};
|
||||||
// Build the claude command string for the main container
|
// Build the claude command string for the main container
|
||||||
const claudeArgsEscaped = claudeArgs.map((a) => `'${a.replace(/'/g, "'\\''")}'`).join(" ");
|
const claudeArgsEscaped = claudeArgs.map((a) => `'${a.replace(/'/g, "'\\''")}'`).join(" ");
|
||||||
const mainCommand = `cat /tmp/prompt/prompt.txt | claude ${claudeArgsEscaped}`;
|
const claudeInvocation = `cat /tmp/prompt/prompt.txt | claude ${claudeArgsEscaped}`;
|
||||||
|
// When RTK output filtering is enabled, prepend the Node.js hook setup.
|
||||||
|
// This writes a filter script and a Claude Code settings file that installs
|
||||||
|
// it as a PostToolUse hook — no external binary or init container required.
|
||||||
|
const mainCommand = enableRtk
|
||||||
|
? `${buildRtkSetupCommands(rtkMaxOutputBytes)} && ${claudeInvocation}`
|
||||||
|
: claudeInvocation;
|
||||||
// Decide prompt delivery strategy: env var (small) or Secret volume (large).
|
// Decide prompt delivery strategy: env var (small) or Secret volume (large).
|
||||||
const promptBytes = Buffer.byteLength(prompt, "utf-8");
|
const promptBytes = Buffer.byteLength(prompt, "utf-8");
|
||||||
const useLargePromptPath = promptBytes > LARGE_PROMPT_THRESHOLD_BYTES;
|
const useLargePromptPath = promptBytes > LARGE_PROMPT_THRESHOLD_BYTES;
|
||||||
|
|||||||
Vendored
+1
-1
File diff suppressed because one or more lines are too long
Generated
+2
-2
@@ -1,12 +1,12 @@
|
|||||||
{
|
{
|
||||||
"name": "paperclip-adapter-claude-k8s",
|
"name": "paperclip-adapter-claude-k8s",
|
||||||
"version": "0.1.28",
|
"version": "0.1.34",
|
||||||
"lockfileVersion": 3,
|
"lockfileVersion": 3,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"packages": {
|
"packages": {
|
||||||
"": {
|
"": {
|
||||||
"name": "paperclip-adapter-claude-k8s",
|
"name": "paperclip-adapter-claude-k8s",
|
||||||
"version": "0.1.28",
|
"version": "0.1.34",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@kubernetes/client-node": "^1.0.0",
|
"@kubernetes/client-node": "^1.0.0",
|
||||||
|
|||||||
+4
-4
@@ -1,16 +1,16 @@
|
|||||||
{
|
{
|
||||||
"name": "paperclip-adapter-claude-k8s",
|
"name": "paperclip-adapter-claude-k8s",
|
||||||
"version": "0.1.28",
|
"version": "0.1.34",
|
||||||
"description": "Paperclip adapter plugin that runs Claude Code agents as Kubernetes Jobs",
|
"description": "Paperclip adapter plugin that runs Claude Code agents as Kubernetes Jobs",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"repository": {
|
"repository": {
|
||||||
"type": "git",
|
"type": "git",
|
||||||
"url": "https://github.com/farhoodliquor/paperclip-adapter-claude-k8s"
|
"url": "https://github.com/farhoodlabs/paperclip-adapter-claude-k8s"
|
||||||
},
|
},
|
||||||
"bugs": {
|
"bugs": {
|
||||||
"url": "https://github.com/farhoodliquor/paperclip-adapter-claude-k8s/issues"
|
"url": "https://github.com/farhoodlabs/paperclip-adapter-claude-k8s/issues"
|
||||||
},
|
},
|
||||||
"homepage": "https://github.com/farhoodliquor/paperclip-adapter-claude-k8s#readme",
|
"homepage": "https://github.com/farhoodlabs/paperclip-adapter-claude-k8s#readme",
|
||||||
"type": "module",
|
"type": "module",
|
||||||
"paperclip": {
|
"paperclip": {
|
||||||
"adapterUiParser": "1.0.0"
|
"adapterUiParser": "1.0.0"
|
||||||
|
|||||||
@@ -42,6 +42,14 @@ describe("getConfigSchema", () => {
|
|||||||
expect(field!.default).toBe(true);
|
expect(field!.default).toBe(true);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("reattachOrphanedJobs defaults to true", () => {
|
||||||
|
const schema = getConfigSchema();
|
||||||
|
const field = schema.fields.find((f: ConfigFieldSchema) => f.key === "reattachOrphanedJobs");
|
||||||
|
expect(field).toBeDefined();
|
||||||
|
expect(field!.type).toBe("toggle");
|
||||||
|
expect(field!.default).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
it("has imagePullPolicy as select with correct options", () => {
|
it("has imagePullPolicy as select with correct options", () => {
|
||||||
const schema = getConfigSchema();
|
const schema = getConfigSchema();
|
||||||
const field = schema.fields.find((f: ConfigFieldSchema) => f.key === "imagePullPolicy");
|
const field = schema.fields.find((f: ConfigFieldSchema) => f.key === "imagePullPolicy");
|
||||||
|
|||||||
@@ -89,6 +89,13 @@ export function getConfigSchema(): AdapterConfigSchema {
|
|||||||
label: "Retain Jobs",
|
label: "Retain Jobs",
|
||||||
hint: "Skip cleanup of completed Jobs for debugging purposes.",
|
hint: "Skip cleanup of completed Jobs for debugging purposes.",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
type: "toggle",
|
||||||
|
key: "reattachOrphanedJobs",
|
||||||
|
label: "Reattach to Orphaned Jobs",
|
||||||
|
hint: "If a prior K8s Job for the same agent/task/session is still running (e.g. Paperclip restarted mid-run), attach to it and stream its output instead of deleting it and starting a new pod. Default: on.",
|
||||||
|
default: true,
|
||||||
|
},
|
||||||
// Resource Limits
|
// Resource Limits
|
||||||
{
|
{
|
||||||
type: "text",
|
type: "text",
|
||||||
|
|||||||
+226
-2
@@ -1,5 +1,44 @@
|
|||||||
import { describe, it, expect } from "vitest";
|
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
|
||||||
import { isK8s404, buildPartialRunError } from "./execute.js";
|
import type * as k8s from "@kubernetes/client-node";
|
||||||
|
import type { Writable } from "node:stream";
|
||||||
|
|
||||||
|
// Mock the K8s client before importing execute so streamPodLogsOnce picks up
|
||||||
|
// the mocked getLogApi. The mock's logApi.log never resolves, simulating the
|
||||||
|
// FAR-10 hang: K8s API drops the connection but the client awaits forever.
|
||||||
|
const mockLogFn = vi.fn();
|
||||||
|
vi.mock("./k8s-client.js", () => ({
|
||||||
|
getLogApi: () => ({ log: mockLogFn }),
|
||||||
|
getBatchApi: () => ({}),
|
||||||
|
getCoreApi: () => ({}),
|
||||||
|
getAuthzApi: () => ({}),
|
||||||
|
getSelfPodInfo: vi.fn(),
|
||||||
|
resetCache: vi.fn(),
|
||||||
|
}));
|
||||||
|
|
||||||
|
const { isK8s404, buildPartialRunError, isReattachableOrphan, describePodTerminatedError, streamPodLogsOnce } = await import("./execute.js");
|
||||||
|
|
||||||
|
function makeJob(opts: {
|
||||||
|
runId?: string;
|
||||||
|
agentId?: string;
|
||||||
|
taskId?: string;
|
||||||
|
sessionId?: string;
|
||||||
|
adapterType?: string;
|
||||||
|
terminal?: boolean;
|
||||||
|
}): k8s.V1Job {
|
||||||
|
const labels: Record<string, string> = {
|
||||||
|
"paperclip.io/adapter-type": opts.adapterType ?? "claude_k8s",
|
||||||
|
};
|
||||||
|
if (opts.agentId) labels["paperclip.io/agent-id"] = opts.agentId;
|
||||||
|
if (opts.runId) labels["paperclip.io/run-id"] = opts.runId;
|
||||||
|
if (opts.taskId) labels["paperclip.io/task-id"] = opts.taskId;
|
||||||
|
if (opts.sessionId) labels["paperclip.io/session-id"] = opts.sessionId;
|
||||||
|
return {
|
||||||
|
metadata: { name: "ac-job", namespace: "paperclip", labels },
|
||||||
|
status: opts.terminal
|
||||||
|
? { conditions: [{ type: "Complete", status: "True" }] }
|
||||||
|
: { conditions: [] },
|
||||||
|
} as k8s.V1Job;
|
||||||
|
}
|
||||||
|
|
||||||
describe("isK8s404", () => {
|
describe("isK8s404", () => {
|
||||||
it("returns false for non-Error values", () => {
|
it("returns false for non-Error values", () => {
|
||||||
@@ -106,3 +145,188 @@ describe("buildPartialRunError", () => {
|
|||||||
expect(msg).toBe("Claude exited with code 1: real error line");
|
expect(msg).toBe("Claude exited with code 1: real error line");
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe("isReattachableOrphan", () => {
|
||||||
|
const agentId = "agent-abc";
|
||||||
|
const taskId = "task-xyz";
|
||||||
|
const sessionId = "sess-123";
|
||||||
|
|
||||||
|
it("returns true when agent/task/session all match and Job is not terminal", () => {
|
||||||
|
const job = makeJob({ agentId, taskId, sessionId, runId: "old-run" });
|
||||||
|
expect(isReattachableOrphan(job, { agentId, taskId, sessionId })).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("returns false when the Job is already Complete", () => {
|
||||||
|
const job = makeJob({ agentId, taskId, sessionId, runId: "old-run", terminal: true });
|
||||||
|
expect(isReattachableOrphan(job, { agentId, taskId, sessionId })).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("returns false when expected taskId is null (caller couldn't derive one)", () => {
|
||||||
|
const job = makeJob({ agentId, taskId, sessionId });
|
||||||
|
expect(isReattachableOrphan(job, { agentId, taskId: null, sessionId })).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("returns false when expected sessionId is null", () => {
|
||||||
|
const job = makeJob({ agentId, taskId, sessionId });
|
||||||
|
expect(isReattachableOrphan(job, { agentId, taskId, sessionId: null })).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("returns false when agent id doesn't match", () => {
|
||||||
|
const job = makeJob({ agentId: "agent-other", taskId, sessionId });
|
||||||
|
expect(isReattachableOrphan(job, { agentId, taskId, sessionId })).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("returns false when task id doesn't match", () => {
|
||||||
|
const job = makeJob({ agentId, taskId: "task-other", sessionId });
|
||||||
|
expect(isReattachableOrphan(job, { agentId, taskId, sessionId })).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("returns false when session id doesn't match", () => {
|
||||||
|
const job = makeJob({ agentId, taskId, sessionId: "sess-other" });
|
||||||
|
expect(isReattachableOrphan(job, { agentId, taskId, sessionId })).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("returns false when the Job is from a different adapter type", () => {
|
||||||
|
const job = makeJob({ agentId, taskId, sessionId, adapterType: "claude_local" });
|
||||||
|
expect(isReattachableOrphan(job, { agentId, taskId, sessionId })).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("returns false when Job has no task-id label (labels were introduced in FAR-124)", () => {
|
||||||
|
const job = makeJob({ agentId, sessionId });
|
||||||
|
expect(isReattachableOrphan(job, { agentId, taskId, sessionId })).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("returns false when Job has no session-id label", () => {
|
||||||
|
const job = makeJob({ agentId, taskId });
|
||||||
|
expect(isReattachableOrphan(job, { agentId, taskId, sessionId })).toBe(false);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// Regression: FAR-10 — waitForPod must throw on phase=Failed, not return the pod name.
|
||||||
|
// These tests cover describePodTerminatedError, the helper that waitForPod uses to build
|
||||||
|
// the error message before throwing. Verifies that phase=Failed with no claude logs
|
||||||
|
// produces a structured, actionable error instead of silently entering the log-stream path.
|
||||||
|
describe("describePodTerminatedError", () => {
|
||||||
|
it("includes exit code and reason when claude container status is available", () => {
|
||||||
|
const cs = [
|
||||||
|
{
|
||||||
|
name: "claude",
|
||||||
|
state: { terminated: { exitCode: 137, reason: "OOMKilled" } },
|
||||||
|
},
|
||||||
|
] as k8s.V1ContainerStatus[];
|
||||||
|
const msg = describePodTerminatedError("mypod", "Failed", cs);
|
||||||
|
expect(msg).toContain("137");
|
||||||
|
expect(msg).toContain("OOMKilled");
|
||||||
|
expect(msg).toContain("phase=Failed");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("falls back to message field when reason is absent", () => {
|
||||||
|
const cs = [
|
||||||
|
{
|
||||||
|
name: "claude",
|
||||||
|
state: { terminated: { exitCode: 1, message: "signal: killed" } },
|
||||||
|
},
|
||||||
|
] as k8s.V1ContainerStatus[];
|
||||||
|
const msg = describePodTerminatedError("mypod", "Failed", cs);
|
||||||
|
expect(msg).toContain("signal: killed");
|
||||||
|
expect(msg).toContain("1");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("returns generic message when no claude container status is present", () => {
|
||||||
|
const msg = describePodTerminatedError("mypod", "Failed", []);
|
||||||
|
expect(msg).toBe("Pod mypod reached phase=Failed");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("ignores non-claude containers", () => {
|
||||||
|
const cs = [
|
||||||
|
{
|
||||||
|
name: "sidecar",
|
||||||
|
state: { terminated: { exitCode: 0, reason: "Completed" } },
|
||||||
|
},
|
||||||
|
] as k8s.V1ContainerStatus[];
|
||||||
|
const msg = describePodTerminatedError("mypod", "Failed", cs);
|
||||||
|
expect(msg).toBe("Pod mypod reached phase=Failed");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("handles null exitCode gracefully", () => {
|
||||||
|
const cs = [
|
||||||
|
{
|
||||||
|
name: "claude",
|
||||||
|
state: { terminated: { exitCode: null, reason: "Error" } },
|
||||||
|
},
|
||||||
|
] as unknown as k8s.V1ContainerStatus[];
|
||||||
|
const msg = describePodTerminatedError("mypod", "Failed", cs);
|
||||||
|
expect(msg).toContain("unknown");
|
||||||
|
expect(msg).toContain("Error");
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// Regression: FAR-10 hardening — streamPodLogsOnce must not hang forever when
|
||||||
|
// the K8s client's logApi.log call never resolves. When stopSignal fires, the
|
||||||
|
// bail timer must force-return within LOG_STREAM_BAIL_TIMEOUT_MS (3s in the
|
||||||
|
// implementation) so execute() does not get stuck waiting for a dead stream.
|
||||||
|
describe("streamPodLogsOnce bail timer", () => {
|
||||||
|
beforeEach(() => {
|
||||||
|
mockLogFn.mockReset();
|
||||||
|
vi.useFakeTimers();
|
||||||
|
});
|
||||||
|
afterEach(() => {
|
||||||
|
vi.useRealTimers();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("returns within the bail window when stopSignal fires during a hung log call", async () => {
|
||||||
|
// logApi.log never resolves — simulates the FAR-10 hang where the K8s
|
||||||
|
// response stream stalls without closing the connection.
|
||||||
|
mockLogFn.mockImplementation((_ns, _pod, _ctr, _writable: Writable) => {
|
||||||
|
return new Promise(() => { /* never resolves */ });
|
||||||
|
});
|
||||||
|
|
||||||
|
const stopSignal = { stopped: false };
|
||||||
|
const onLog = vi.fn().mockResolvedValue(undefined);
|
||||||
|
|
||||||
|
const resultPromise = streamPodLogsOnce(
|
||||||
|
"default",
|
||||||
|
"mypod",
|
||||||
|
onLog,
|
||||||
|
undefined,
|
||||||
|
undefined,
|
||||||
|
undefined,
|
||||||
|
stopSignal,
|
||||||
|
);
|
||||||
|
|
||||||
|
// Fire stopSignal; let the 200ms poller tick and start the bail timer.
|
||||||
|
stopSignal.stopped = true;
|
||||||
|
await vi.advanceTimersByTimeAsync(300);
|
||||||
|
|
||||||
|
// Advance past the 3s bail timeout. streamPodLogsOnce must now resolve
|
||||||
|
// with an empty string (no chunks were captured) rather than hanging.
|
||||||
|
await vi.advanceTimersByTimeAsync(3_100);
|
||||||
|
|
||||||
|
const result = await resultPromise;
|
||||||
|
expect(result).toBe("");
|
||||||
|
expect(mockLogFn).toHaveBeenCalledOnce();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("returns promptly if logApi.log resolves before stopSignal fires (happy path, no bail involved)", async () => {
|
||||||
|
mockLogFn.mockImplementation(async (_ns, _pod, _ctr, _writable: Writable) => {
|
||||||
|
// Resolve immediately — normal log-stream completion.
|
||||||
|
return undefined;
|
||||||
|
});
|
||||||
|
|
||||||
|
const onLog = vi.fn().mockResolvedValue(undefined);
|
||||||
|
|
||||||
|
// No stopSignal → no bail machinery engaged.
|
||||||
|
const result = await streamPodLogsOnce(
|
||||||
|
"default",
|
||||||
|
"mypod",
|
||||||
|
onLog,
|
||||||
|
undefined,
|
||||||
|
undefined,
|
||||||
|
undefined,
|
||||||
|
undefined,
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(result).toBe("");
|
||||||
|
expect(mockLogFn).toHaveBeenCalledOnce();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|||||||
+408
-94
@@ -1,5 +1,15 @@
|
|||||||
import type { AdapterExecutionContext, AdapterExecutionResult } from "@paperclipai/adapter-utils";
|
import type { AdapterExecutionContext, AdapterExecutionResult } from "@paperclipai/adapter-utils";
|
||||||
import { asString, asNumber, asBoolean, parseObject } from "@paperclipai/adapter-utils/server-utils";
|
import {
|
||||||
|
asString,
|
||||||
|
asNumber,
|
||||||
|
asBoolean,
|
||||||
|
parseObject,
|
||||||
|
readPaperclipRuntimeSkillEntries,
|
||||||
|
resolvePaperclipDesiredSkillNames,
|
||||||
|
} from "@paperclipai/adapter-utils/server-utils";
|
||||||
|
import fs from "node:fs/promises";
|
||||||
|
import path from "node:path";
|
||||||
|
import { prepareClaudePromptBundle } from "./prompt-cache.js";
|
||||||
import {
|
import {
|
||||||
parseClaudeStreamJson,
|
parseClaudeStreamJson,
|
||||||
describeClaudeFailure,
|
describeClaudeFailure,
|
||||||
@@ -7,7 +17,7 @@ import {
|
|||||||
isClaudeUnknownSessionError,
|
isClaudeUnknownSessionError,
|
||||||
} from "./parse.js";
|
} from "./parse.js";
|
||||||
import { getSelfPodInfo, getBatchApi, getCoreApi, getLogApi } from "./k8s-client.js";
|
import { getSelfPodInfo, getBatchApi, getCoreApi, getLogApi } from "./k8s-client.js";
|
||||||
import { buildJobManifest } from "./job-manifest.js";
|
import { buildJobManifest, sanitizeLabelValue } from "./job-manifest.js";
|
||||||
import { LogLineDedupFilter } from "./log-dedup.js";
|
import { LogLineDedupFilter } from "./log-dedup.js";
|
||||||
import type * as k8s from "@kubernetes/client-node";
|
import type * as k8s from "@kubernetes/client-node";
|
||||||
import { Writable } from "node:stream";
|
import { Writable } from "node:stream";
|
||||||
@@ -16,6 +26,15 @@ const POLL_INTERVAL_MS = 2000;
|
|||||||
const KEEPALIVE_INTERVAL_MS = 15_000;
|
const KEEPALIVE_INTERVAL_MS = 15_000;
|
||||||
const LOG_STREAM_RECONNECT_DELAY_MS = 3_000;
|
const LOG_STREAM_RECONNECT_DELAY_MS = 3_000;
|
||||||
const MAX_LOG_RECONNECT_ATTEMPTS = 50;
|
const MAX_LOG_RECONNECT_ATTEMPTS = 50;
|
||||||
|
// How long to keep refreshing onSpawn after the Job reaches a terminal state.
|
||||||
|
// Covers the cleanup path (delete job, parse stdout) so a slow K8s API call
|
||||||
|
// doesn't trip the 5-minute reaper staleness window.
|
||||||
|
const POST_TERMINAL_KEEPALIVE_MS = 90_000;
|
||||||
|
// Upper bound on how long streamPodLogsOnce will wait after stopSignal fires
|
||||||
|
// before force-returning, even if logApi.log has not yet resolved. Defensive
|
||||||
|
// against the K8s client library not propagating writable.destroy() into an
|
||||||
|
// abort of the underlying HTTP request.
|
||||||
|
const LOG_STREAM_BAIL_TIMEOUT_MS = 3_000;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Detect a Kubernetes 404 (Not Found) error from @kubernetes/client-node.
|
* Detect a Kubernetes 404 (Not Found) error from @kubernetes/client-node.
|
||||||
@@ -70,6 +89,53 @@ export function buildPartialRunError(
|
|||||||
: `Claude exited with code ${exitCode ?? -1}`;
|
: `Claude exited with code ${exitCode ?? -1}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Evaluate an orphaned K8s Job (one whose `paperclip.io/run-id` label does
|
||||||
|
* not match the current runId) as a potential reattach target. A Job is
|
||||||
|
* reattachable when it belongs to the same agent, same task, and same resume
|
||||||
|
* session as the current run — meaning the previous Paperclip instance was
|
||||||
|
* mid-stream on the exact piece of work this new run was dispatched to do.
|
||||||
|
* Exported for unit tests.
|
||||||
|
*/
|
||||||
|
export function isReattachableOrphan(
|
||||||
|
job: k8s.V1Job,
|
||||||
|
expected: { agentId: string; taskId: string | null; sessionId: string | null },
|
||||||
|
): boolean {
|
||||||
|
if (!expected.taskId || !expected.sessionId) return false;
|
||||||
|
const labels = job.metadata?.labels ?? {};
|
||||||
|
if (labels["paperclip.io/adapter-type"] !== "claude_k8s") return false;
|
||||||
|
if (labels["paperclip.io/agent-id"] !== expected.agentId) return false;
|
||||||
|
if (labels["paperclip.io/task-id"] !== expected.taskId) return false;
|
||||||
|
if (labels["paperclip.io/session-id"] !== expected.sessionId) return false;
|
||||||
|
const conditions = job.status?.conditions ?? [];
|
||||||
|
const terminal = conditions.some(
|
||||||
|
(c) => (c.type === "Complete" || c.type === "Failed") && c.status === "True",
|
||||||
|
);
|
||||||
|
if (terminal) return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Build an error message for a pod that reached phase=Failed before or
|
||||||
|
* instead of streaming logs. Includes the claude container's terminated exit
|
||||||
|
* code and reason when available so operators can diagnose crashes without
|
||||||
|
* needing kubectl. Exported for unit tests.
|
||||||
|
*/
|
||||||
|
export function describePodTerminatedError(
|
||||||
|
podName: string,
|
||||||
|
phase: string,
|
||||||
|
containerStatuses: k8s.V1ContainerStatus[],
|
||||||
|
): string {
|
||||||
|
const mainCs = containerStatuses.find((cs) => cs.name === "claude");
|
||||||
|
const terminated = mainCs?.state?.terminated;
|
||||||
|
if (terminated) {
|
||||||
|
const code = terminated.exitCode ?? "unknown";
|
||||||
|
const reason = terminated.reason ?? terminated.message ?? "no reason";
|
||||||
|
return `Pod ${podName} reached phase=${phase}: claude exited ${code} (${reason})`;
|
||||||
|
}
|
||||||
|
return `Pod ${podName} reached phase=${phase}`;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Wait for the Job's pod to reach a terminal or running state.
|
* Wait for the Job's pod to reach a terminal or running state.
|
||||||
* Returns the pod name once logs can be streamed, or throws on failure.
|
* Returns the pod name once logs can be streamed, or throws on failure.
|
||||||
@@ -110,7 +176,7 @@ async function waitForPod(
|
|||||||
const containerStatuses = pod.status?.containerStatuses ?? [];
|
const containerStatuses = pod.status?.containerStatuses ?? [];
|
||||||
|
|
||||||
// Log phase transitions
|
// Log phase transitions
|
||||||
const statusKey = `${phase}:${initStatuses.map((s) => s.state?.waiting?.reason ?? s.state?.terminated?.reason ?? "ok").join(",")}:${containerStatuses.map((s) => s.state?.waiting?.reason ?? s.state?.running ? "running" : "waiting").join(",")}`;
|
const statusKey = `${phase}:${initStatuses.map((s) => s.state?.waiting?.reason ?? s.state?.terminated?.reason ?? "ok").join(",")}:${containerStatuses.map((s) => s.state?.waiting?.reason ?? (s.state?.running ? "running" : "waiting")).join(",")}`;
|
||||||
if (statusKey !== lastStatus) {
|
if (statusKey !== lastStatus) {
|
||||||
const details: string[] = [`phase=${phase}`];
|
const details: string[] = [`phase=${phase}`];
|
||||||
for (const init of initStatuses) {
|
for (const init of initStatuses) {
|
||||||
@@ -121,15 +187,22 @@ async function waitForPod(
|
|||||||
for (const cs of containerStatuses) {
|
for (const cs of containerStatuses) {
|
||||||
if (cs.state?.waiting) details.push(`${cs.name}: waiting (${cs.state.waiting.reason ?? "unknown"})`);
|
if (cs.state?.waiting) details.push(`${cs.name}: waiting (${cs.state.waiting.reason ?? "unknown"})`);
|
||||||
else if (cs.state?.running) details.push(`${cs.name}: running`);
|
else if (cs.state?.running) details.push(`${cs.name}: running`);
|
||||||
|
else if (cs.state?.terminated) details.push(`${cs.name}: terminated (exit ${cs.state.terminated.exitCode ?? "?"}, ${cs.state.terminated.reason ?? "no reason"})`);
|
||||||
}
|
}
|
||||||
await onLog("stdout", `[paperclip] Pod ${podName}: ${details.join(", ")}\n`);
|
await onLog("stdout", `[paperclip] Pod ${podName}: ${details.join(", ")}\n`);
|
||||||
lastStatus = statusKey;
|
lastStatus = statusKey;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Ready to stream logs
|
// Ready to stream logs
|
||||||
if (phase === "Running" || phase === "Succeeded" || phase === "Failed") {
|
if (phase === "Running" || phase === "Succeeded") {
|
||||||
return podName;
|
return podName;
|
||||||
}
|
}
|
||||||
|
// phase=Failed means the pod crashed before we could stream logs.
|
||||||
|
// Throwing here routes the caller into the error path with a structured
|
||||||
|
// message instead of entering the log-streaming path with a dead pod.
|
||||||
|
if (phase === "Failed") {
|
||||||
|
throw new Error(describePodTerminatedError(podName, phase, containerStatuses));
|
||||||
|
}
|
||||||
|
|
||||||
// Init containers done + main running (phase may still say Pending briefly)
|
// Init containers done + main running (phase may still say Pending briefly)
|
||||||
const allInitsDone = initStatuses.length > 0 && initStatuses.every(
|
const allInitsDone = initStatuses.length > 0 && initStatuses.every(
|
||||||
@@ -185,13 +258,14 @@ async function waitForPod(
|
|||||||
* Stream pod logs once via follow. Returns accumulated stdout when the
|
* Stream pod logs once via follow. Returns accumulated stdout when the
|
||||||
* stream ends (container exit, API disconnect, or abort signal).
|
* stream ends (container exit, API disconnect, or abort signal).
|
||||||
*/
|
*/
|
||||||
async function streamPodLogsOnce(
|
export async function streamPodLogsOnce(
|
||||||
namespace: string,
|
namespace: string,
|
||||||
podName: string,
|
podName: string,
|
||||||
onLog: AdapterExecutionContext["onLog"],
|
onLog: AdapterExecutionContext["onLog"],
|
||||||
kubeconfigPath?: string,
|
kubeconfigPath?: string,
|
||||||
sinceSeconds?: number,
|
sinceSeconds?: number,
|
||||||
dedup?: LogLineDedupFilter,
|
dedup?: LogLineDedupFilter,
|
||||||
|
stopSignal?: { stopped: boolean },
|
||||||
): Promise<string> {
|
): Promise<string> {
|
||||||
const logApi = getLogApi(kubeconfigPath);
|
const logApi = getLogApi(kubeconfigPath);
|
||||||
const chunks: string[] = [];
|
const chunks: string[] = [];
|
||||||
@@ -209,15 +283,51 @@ async function streamPodLogsOnce(
|
|||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// When the job completion signal fires, destroy the writable to abort the
|
||||||
|
// in-flight follow stream. Without this, logApi.log can hang indefinitely
|
||||||
|
// when the pod terminates without closing the HTTP connection cleanly.
|
||||||
|
let stopPoller: ReturnType<typeof setInterval> | null = null;
|
||||||
|
let bailTimer: ReturnType<typeof setTimeout> | null = null;
|
||||||
|
let bailResolve: (() => void) | null = null;
|
||||||
|
// Bail promise resolves LOG_STREAM_BAIL_TIMEOUT_MS after stopSignal fires,
|
||||||
|
// even if logApi.log has not resolved by then. This is a safety net for the
|
||||||
|
// case where writable.destroy() fails to propagate to an abort of the HTTP
|
||||||
|
// request (e.g. the K8s client is awaiting a response that never comes).
|
||||||
|
const bailPromise = new Promise<void>((resolve) => {
|
||||||
|
bailResolve = resolve;
|
||||||
|
});
|
||||||
|
if (stopSignal) {
|
||||||
|
stopPoller = setInterval(() => {
|
||||||
|
if (stopSignal.stopped) {
|
||||||
|
if (!writable.destroyed) writable.destroy();
|
||||||
|
if (!bailTimer && bailResolve) {
|
||||||
|
bailTimer = setTimeout(() => {
|
||||||
|
onLog("stderr", "[paperclip] Log stream bail timer fired — forcing return\n").catch(() => {});
|
||||||
|
bailResolve!();
|
||||||
|
}, LOG_STREAM_BAIL_TIMEOUT_MS);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}, 200);
|
||||||
|
}
|
||||||
|
|
||||||
|
const logPromise = logApi.log(namespace, podName, "claude", writable, {
|
||||||
|
follow: true,
|
||||||
|
pretty: false,
|
||||||
|
...(sinceSeconds ? { sinceSeconds } : {}),
|
||||||
|
}).catch(() => {
|
||||||
|
// follow may fail if the container already exited, the API connection
|
||||||
|
// dropped, or we aborted via writable.destroy() — not fatal.
|
||||||
|
});
|
||||||
|
|
||||||
try {
|
try {
|
||||||
await logApi.log(namespace, podName, "claude", writable, {
|
if (stopSignal) {
|
||||||
follow: true,
|
await Promise.race([logPromise, bailPromise]);
|
||||||
pretty: false,
|
} else {
|
||||||
...(sinceSeconds ? { sinceSeconds } : {}),
|
await logPromise;
|
||||||
});
|
}
|
||||||
} catch {
|
} finally {
|
||||||
// follow may fail if the container already exited or the API
|
if (stopPoller) clearInterval(stopPoller);
|
||||||
// connection dropped — not fatal, caller decides whether to retry.
|
if (bailTimer) clearTimeout(bailTimer);
|
||||||
}
|
}
|
||||||
|
|
||||||
return chunks.join("");
|
return chunks.join("");
|
||||||
@@ -238,6 +348,7 @@ async function streamPodLogs(
|
|||||||
onLog: AdapterExecutionContext["onLog"],
|
onLog: AdapterExecutionContext["onLog"],
|
||||||
kubeconfigPath?: string,
|
kubeconfigPath?: string,
|
||||||
stopSignal?: { stopped: boolean },
|
stopSignal?: { stopped: boolean },
|
||||||
|
dedup?: LogLineDedupFilter,
|
||||||
): Promise<string> {
|
): Promise<string> {
|
||||||
const allChunks: string[] = [];
|
const allChunks: string[] = [];
|
||||||
let attempt = 0;
|
let attempt = 0;
|
||||||
@@ -247,7 +358,7 @@ async function streamPodLogs(
|
|||||||
let lastLogReceivedAt = Math.floor(Date.now() / 1000);
|
let lastLogReceivedAt = Math.floor(Date.now() / 1000);
|
||||||
// Shared across reconnects so replayed lines inside the `sinceSeconds`
|
// Shared across reconnects so replayed lines inside the `sinceSeconds`
|
||||||
// overlap window are dropped before they reach the streaming UI (FAR-123).
|
// overlap window are dropped before they reach the streaming UI (FAR-123).
|
||||||
const dedup = new LogLineDedupFilter();
|
if (!dedup) dedup = new LogLineDedupFilter();
|
||||||
|
|
||||||
while (!stopSignal?.stopped) {
|
while (!stopSignal?.stopped) {
|
||||||
if (attempt >= MAX_LOG_RECONNECT_ATTEMPTS) {
|
if (attempt >= MAX_LOG_RECONNECT_ATTEMPTS) {
|
||||||
@@ -267,7 +378,7 @@ async function streamPodLogs(
|
|||||||
}
|
}
|
||||||
|
|
||||||
const preStreamTs = Math.floor(Date.now() / 1000);
|
const preStreamTs = Math.floor(Date.now() / 1000);
|
||||||
const result = await streamPodLogsOnce(namespace, podName, onLog, kubeconfigPath, sinceSeconds, dedup);
|
const result = await streamPodLogsOnce(namespace, podName, onLog, kubeconfigPath, sinceSeconds, dedup, stopSignal);
|
||||||
if (result) {
|
if (result) {
|
||||||
allChunks.push(result);
|
allChunks.push(result);
|
||||||
// Update last-received timestamp to now (the stream just ended,
|
// Update last-received timestamp to now (the stream just ended,
|
||||||
@@ -411,10 +522,18 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
|
|||||||
// Guard: claude_k8s must not run concurrently for the same agent (shared PVC/session).
|
// Guard: claude_k8s must not run concurrently for the same agent (shared PVC/session).
|
||||||
// After a server restart, orphaned K8s Jobs from previous (now-failed) runs may
|
// After a server restart, orphaned K8s Jobs from previous (now-failed) runs may
|
||||||
// still be running. We detect those by comparing the Job's run-id label against
|
// still be running. We detect those by comparing the Job's run-id label against
|
||||||
// the current runId and clean them up so this execution can proceed.
|
// the current runId. When reattachOrphanedJobs is enabled and the orphan matches
|
||||||
|
// the current agent+task+session, we attach to it instead of deleting it (FAR-124).
|
||||||
const agentId = ctx.agent.id;
|
const agentId = ctx.agent.id;
|
||||||
const selfPod = await getSelfPodInfo(kubeconfigPath);
|
const selfPod = await getSelfPodInfo(kubeconfigPath);
|
||||||
const guardNamespace = asString(config.namespace, "") || selfPod.namespace;
|
const guardNamespace = asString(config.namespace, "") || selfPod.namespace;
|
||||||
|
const reattachOrphanedJobs = asBoolean(config.reattachOrphanedJobs, true);
|
||||||
|
const runtimeSessionParams = parseObject(runtime.sessionParams);
|
||||||
|
const currentSessionIdRaw = asString(runtimeSessionParams.sessionId, runtime.sessionId ?? "");
|
||||||
|
const currentSessionLabel = currentSessionIdRaw ? sanitizeLabelValue(currentSessionIdRaw) : null;
|
||||||
|
const currentTaskIdRaw = asString(ctx.context.taskId, "") || asString(ctx.context.issueId, "");
|
||||||
|
const currentTaskLabel = currentTaskIdRaw ? sanitizeLabelValue(currentTaskIdRaw) : null;
|
||||||
|
let reattachTarget: { jobName: string; namespace: string; priorRunId: string; image: string } | null = null;
|
||||||
try {
|
try {
|
||||||
const batchApi = getBatchApi(kubeconfigPath);
|
const batchApi = getBatchApi(kubeconfigPath);
|
||||||
const existing = await batchApi.listNamespacedJob({
|
const existing = await batchApi.listNamespacedJob({
|
||||||
@@ -434,10 +553,42 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
|
|||||||
(j) => (j.metadata?.labels?.["paperclip.io/run-id"] ?? "") === runId,
|
(j) => (j.metadata?.labels?.["paperclip.io/run-id"] ?? "") === runId,
|
||||||
);
|
);
|
||||||
|
|
||||||
if (orphaned.length > 0) {
|
// Pick the most recent reattachable orphan — same agent + task + session,
|
||||||
const orphanNames = orphaned.map((j) => j.metadata?.name).join(", ");
|
// not terminal. Only one target is chosen; any other orphans get
|
||||||
await onLog("stdout", `[paperclip] Cleaning up ${orphaned.length} orphaned K8s Job(s) from previous run(s): ${orphanNames}\n`);
|
// cleaned up as before.
|
||||||
for (const j of orphaned) {
|
if (reattachOrphanedJobs && orphaned.length > 0) {
|
||||||
|
const candidates = orphaned
|
||||||
|
.filter((j) =>
|
||||||
|
isReattachableOrphan(j, {
|
||||||
|
agentId,
|
||||||
|
taskId: currentTaskLabel,
|
||||||
|
sessionId: currentSessionLabel,
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
.sort((a, b) => {
|
||||||
|
const at = new Date(a.metadata?.creationTimestamp ?? 0).getTime();
|
||||||
|
const bt = new Date(b.metadata?.creationTimestamp ?? 0).getTime();
|
||||||
|
return bt - at;
|
||||||
|
});
|
||||||
|
const chosen = candidates[0];
|
||||||
|
const chosenName = chosen?.metadata?.name;
|
||||||
|
if (chosen && chosenName) {
|
||||||
|
reattachTarget = {
|
||||||
|
jobName: chosenName,
|
||||||
|
namespace: chosen.metadata?.namespace ?? guardNamespace,
|
||||||
|
priorRunId: chosen.metadata?.labels?.["paperclip.io/run-id"] ?? "",
|
||||||
|
image: chosen.spec?.template?.spec?.containers?.[0]?.image ?? "unknown",
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const toDelete = orphaned.filter(
|
||||||
|
(j) => !reattachTarget || j.metadata?.name !== reattachTarget.jobName,
|
||||||
|
);
|
||||||
|
if (toDelete.length > 0) {
|
||||||
|
const orphanNames = toDelete.map((j) => j.metadata?.name).join(", ");
|
||||||
|
await onLog("stdout", `[paperclip] Cleaning up ${toDelete.length} orphaned K8s Job(s) from previous run(s): ${orphanNames}\n`);
|
||||||
|
for (const j of toDelete) {
|
||||||
const name = j.metadata?.name;
|
const name = j.metadata?.name;
|
||||||
if (name) {
|
if (name) {
|
||||||
await cleanupJob(guardNamespace, name, onLog, kubeconfigPath);
|
await cleanupJob(guardNamespace, name, onLog, kubeconfigPath);
|
||||||
@@ -475,83 +626,187 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
// Build Job manifest
|
const coreApi = getCoreApi(kubeconfigPath);
|
||||||
const { job, jobName, namespace, prompt, claudeArgs, promptMetrics, promptSecret } = buildJobManifest({
|
const batchApi = getBatchApi(kubeconfigPath);
|
||||||
ctx,
|
|
||||||
selfPod,
|
let jobName: string;
|
||||||
|
let namespace: string;
|
||||||
|
let promptSecret: { name: string; namespace: string; data: Record<string, string> } | null = null;
|
||||||
|
|
||||||
|
// Prepare the prompt bundle (skills + instructions) on the server filesystem.
|
||||||
|
// The K8s Job pod mounts the same PVC at /paperclip, so bundle paths written
|
||||||
|
// here are accessible inside the pod at the identical absolute path.
|
||||||
|
const skillEntries = await readPaperclipRuntimeSkillEntries(config, import.meta.dirname ?? __dirname);
|
||||||
|
const desiredSkillNames = new Set(resolvePaperclipDesiredSkillNames(config, skillEntries));
|
||||||
|
const desiredSkills = skillEntries.filter((e) => desiredSkillNames.has(e.key));
|
||||||
|
const instructionsFilePath = asString(config.instructionsFilePath, "").trim();
|
||||||
|
const instructionsFileDir = instructionsFilePath ? `${path.dirname(instructionsFilePath)}/` : "";
|
||||||
|
let instructionsContents: string | null = null;
|
||||||
|
if (instructionsFilePath) {
|
||||||
|
try {
|
||||||
|
const raw = await fs.readFile(instructionsFilePath, "utf-8");
|
||||||
|
const pathDirective =
|
||||||
|
`\nThe above agent instructions were loaded from ${instructionsFilePath}. ` +
|
||||||
|
`Resolve any relative file references from ${instructionsFileDir}. ` +
|
||||||
|
`This base directory is authoritative for sibling instruction files such as ` +
|
||||||
|
`./HEARTBEAT.md, ./SOUL.md, and ./TOOLS.md; do not resolve those from the parent agent directory.`;
|
||||||
|
instructionsContents = raw + pathDirective;
|
||||||
|
} catch (err) {
|
||||||
|
await onLog(
|
||||||
|
"stderr",
|
||||||
|
`[paperclip] Warning: could not read agent instructions file "${instructionsFilePath}": ${err instanceof Error ? err.message : String(err)}\n`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const promptBundle = await prepareClaudePromptBundle({
|
||||||
|
companyId: ctx.agent.companyId,
|
||||||
|
skills: desiredSkills,
|
||||||
|
instructionsContents,
|
||||||
|
onLog,
|
||||||
});
|
});
|
||||||
|
|
||||||
// Report invocation metadata
|
if (reattachTarget) {
|
||||||
if (onMeta) {
|
jobName = reattachTarget.jobName;
|
||||||
await onMeta({
|
namespace = reattachTarget.namespace;
|
||||||
adapterType: "claude_k8s",
|
|
||||||
command: `kubectl job/${jobName}`,
|
|
||||||
cwd: namespace,
|
|
||||||
commandArgs: claudeArgs,
|
|
||||||
commandNotes: [
|
|
||||||
`Image: ${job.spec?.template.spec?.containers[0]?.image ?? "unknown"}`,
|
|
||||||
`Namespace: ${namespace}`,
|
|
||||||
`Timeout: ${timeoutSec}s`,
|
|
||||||
],
|
|
||||||
prompt,
|
|
||||||
...(promptMetrics ? { promptMetrics } : {}),
|
|
||||||
context: ctx.context,
|
|
||||||
} as Parameters<typeof onMeta>[0]);
|
|
||||||
}
|
|
||||||
|
|
||||||
// If the prompt is large, create a Secret to hold it (avoids the ~1 MiB
|
// Announce reattach metadata. Prompt and args aren't known here — they
|
||||||
// PodSpec limit). The Secret is cleaned up in the finally block.
|
// belong to the prior run that created this pod and are already present
|
||||||
const coreApi = getCoreApi(kubeconfigPath);
|
// on the running container.
|
||||||
if (promptSecret) {
|
if (onMeta) {
|
||||||
try {
|
await onMeta({
|
||||||
await coreApi.createNamespacedSecret({
|
adapterType: "claude_k8s",
|
||||||
namespace: promptSecret.namespace,
|
command: `kubectl job/${jobName}`,
|
||||||
body: {
|
cwd: namespace,
|
||||||
apiVersion: "v1",
|
commandArgs: [],
|
||||||
kind: "Secret",
|
commandNotes: [
|
||||||
metadata: {
|
`Image: ${reattachTarget.image}`,
|
||||||
name: promptSecret.name,
|
`Namespace: ${namespace}`,
|
||||||
namespace: promptSecret.namespace,
|
`Reattached from prior run: ${reattachTarget.priorRunId || "unknown"}`,
|
||||||
labels: {
|
`Timeout: ${timeoutSec}s`,
|
||||||
"app.kubernetes.io/managed-by": "paperclip",
|
],
|
||||||
"paperclip.io/adapter-type": "claude_k8s",
|
prompt: "",
|
||||||
"paperclip.io/run-id": runId,
|
context: ctx.context,
|
||||||
|
} as Parameters<typeof onMeta>[0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
await onLog("stdout", `[paperclip] Reattaching to in-flight K8s Job ${jobName} in namespace ${namespace} (prior run ${reattachTarget.priorRunId || "unknown"})\n`);
|
||||||
|
} else {
|
||||||
|
// Build Job manifest
|
||||||
|
const built = buildJobManifest({ ctx, selfPod, promptBundle });
|
||||||
|
const job = built.job;
|
||||||
|
jobName = built.jobName;
|
||||||
|
namespace = built.namespace;
|
||||||
|
const prompt = built.prompt;
|
||||||
|
const claudeArgs = built.claudeArgs;
|
||||||
|
const promptMetrics = built.promptMetrics;
|
||||||
|
promptSecret = built.promptSecret;
|
||||||
|
|
||||||
|
// Report invocation metadata
|
||||||
|
if (onMeta) {
|
||||||
|
await onMeta({
|
||||||
|
adapterType: "claude_k8s",
|
||||||
|
command: `kubectl job/${jobName}`,
|
||||||
|
cwd: namespace,
|
||||||
|
commandArgs: claudeArgs,
|
||||||
|
commandNotes: [
|
||||||
|
`Image: ${job.spec?.template.spec?.containers[0]?.image ?? "unknown"}`,
|
||||||
|
`Namespace: ${namespace}`,
|
||||||
|
`Timeout: ${timeoutSec}s`,
|
||||||
|
],
|
||||||
|
prompt,
|
||||||
|
...(promptMetrics ? { promptMetrics } : {}),
|
||||||
|
context: ctx.context,
|
||||||
|
} as Parameters<typeof onMeta>[0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the prompt is large, create a Secret to hold it (avoids the ~1 MiB
|
||||||
|
// PodSpec limit). The Secret is cleaned up in the finally block.
|
||||||
|
if (promptSecret) {
|
||||||
|
try {
|
||||||
|
await coreApi.createNamespacedSecret({
|
||||||
|
namespace: promptSecret.namespace,
|
||||||
|
body: {
|
||||||
|
apiVersion: "v1",
|
||||||
|
kind: "Secret",
|
||||||
|
metadata: {
|
||||||
|
name: promptSecret.name,
|
||||||
|
namespace: promptSecret.namespace,
|
||||||
|
labels: {
|
||||||
|
"app.kubernetes.io/managed-by": "paperclip",
|
||||||
|
"paperclip.io/adapter-type": "claude_k8s",
|
||||||
|
"paperclip.io/run-id": runId,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
|
stringData: promptSecret.data,
|
||||||
},
|
},
|
||||||
stringData: promptSecret.data,
|
});
|
||||||
},
|
await onLog("stdout", `[paperclip] Created prompt Secret: ${promptSecret.name} (${Math.round(Buffer.byteLength(prompt, "utf-8") / 1024)} KiB)\n`);
|
||||||
});
|
} catch (err) {
|
||||||
await onLog("stdout", `[paperclip] Created prompt Secret: ${promptSecret.name} (${Math.round(Buffer.byteLength(prompt, "utf-8") / 1024)} KiB)\n`);
|
const msg = err instanceof Error ? err.message : String(err);
|
||||||
|
await onLog("stderr", `[paperclip] Failed to create prompt Secret: ${msg}\n`);
|
||||||
|
return {
|
||||||
|
exitCode: null,
|
||||||
|
signal: null,
|
||||||
|
timedOut: false,
|
||||||
|
errorMessage: `Failed to create prompt Secret: ${msg}`,
|
||||||
|
errorCode: "k8s_prompt_secret_create_failed",
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create the Job
|
||||||
|
let createdJobUid: string | undefined;
|
||||||
|
try {
|
||||||
|
const created = await batchApi.createNamespacedJob({ namespace, body: job });
|
||||||
|
createdJobUid = created.metadata?.uid;
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
const msg = err instanceof Error ? err.message : String(err);
|
const msg = err instanceof Error ? err.message : String(err);
|
||||||
await onLog("stderr", `[paperclip] Failed to create prompt Secret: ${msg}\n`);
|
await onLog("stderr", `[paperclip] Failed to create K8s Job: ${msg}\n`);
|
||||||
|
if (promptSecret) {
|
||||||
|
try {
|
||||||
|
await coreApi.deleteNamespacedSecret({ name: promptSecret.name, namespace: promptSecret.namespace });
|
||||||
|
} catch { /* best-effort */ }
|
||||||
|
}
|
||||||
return {
|
return {
|
||||||
exitCode: null,
|
exitCode: null,
|
||||||
signal: null,
|
signal: null,
|
||||||
timedOut: false,
|
timedOut: false,
|
||||||
errorMessage: `Failed to create prompt Secret: ${msg}`,
|
errorMessage: `Failed to create Kubernetes Job: ${msg}`,
|
||||||
errorCode: "k8s_prompt_secret_create_failed",
|
errorCode: "k8s_job_create_failed",
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// Create the Job
|
// Attach ownerReference so K8s GC cleans up the Secret if the process
|
||||||
const batchApi = getBatchApi(kubeconfigPath);
|
// crashes before the finally block runs.
|
||||||
try {
|
if (promptSecret && createdJobUid) {
|
||||||
await batchApi.createNamespacedJob({ namespace, body: job });
|
try {
|
||||||
} catch (err) {
|
await coreApi.patchNamespacedSecret({
|
||||||
const msg = err instanceof Error ? err.message : String(err);
|
name: promptSecret.name,
|
||||||
await onLog("stderr", `[paperclip] Failed to create K8s Job: ${msg}\n`);
|
namespace: promptSecret.namespace,
|
||||||
return {
|
body: [
|
||||||
exitCode: null,
|
{
|
||||||
signal: null,
|
op: "add",
|
||||||
timedOut: false,
|
path: "/metadata/ownerReferences",
|
||||||
errorMessage: `Failed to create Kubernetes Job: ${msg}`,
|
value: [
|
||||||
errorCode: "k8s_job_create_failed",
|
{
|
||||||
};
|
apiVersion: "batch/v1",
|
||||||
}
|
kind: "Job",
|
||||||
|
name: jobName,
|
||||||
|
uid: createdJobUid,
|
||||||
|
blockOwnerDeletion: false,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
});
|
||||||
|
} catch (err) {
|
||||||
|
const msg = err instanceof Error ? err.message : String(err);
|
||||||
|
await onLog("stderr", `[paperclip] Warning: failed to set ownerReference on prompt Secret: ${msg}\n`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
await onLog("stdout", `[paperclip] Created K8s Job: ${jobName} in namespace ${namespace} (deadline: ${timeoutSec > 0 ? `${timeoutSec}s` : "none"})\n`);
|
await onLog("stdout", `[paperclip] Created K8s Job: ${jobName} in namespace ${namespace} (deadline: ${timeoutSec > 0 ? `${timeoutSec}s` : "none"})\n`);
|
||||||
|
}
|
||||||
|
|
||||||
let stdout = "";
|
let stdout = "";
|
||||||
let exitCode: number | null = null;
|
let exitCode: number | null = null;
|
||||||
@@ -566,8 +821,23 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
|
|||||||
const scheduleTimeoutMs = 120_000; // 2 minutes for scheduling
|
const scheduleTimeoutMs = 120_000; // 2 minutes for scheduling
|
||||||
let podName: string;
|
let podName: string;
|
||||||
try {
|
try {
|
||||||
podName = await waitForPod(namespace, jobName, scheduleTimeoutMs, onLog, kubeconfigPath);
|
if (reattachTarget) {
|
||||||
await onLog("stdout", `[paperclip] Pod running: ${podName}\n`);
|
// Pod is already running from the prior run — look it up directly.
|
||||||
|
const podList = await coreApi.listNamespacedPod({
|
||||||
|
namespace,
|
||||||
|
labelSelector: `job-name=${jobName}`,
|
||||||
|
});
|
||||||
|
const pod = podList.items[0];
|
||||||
|
const name = pod?.metadata?.name;
|
||||||
|
if (!name) {
|
||||||
|
throw new Error(`Reattach target Job ${jobName} has no pod`);
|
||||||
|
}
|
||||||
|
podName = name;
|
||||||
|
await onLog("stdout", `[paperclip] Reattached to pod ${podName}\n`);
|
||||||
|
} else {
|
||||||
|
podName = await waitForPod(namespace, jobName, scheduleTimeoutMs, onLog, kubeconfigPath);
|
||||||
|
await onLog("stdout", `[paperclip] Pod running: ${podName}\n`);
|
||||||
|
}
|
||||||
|
|
||||||
// Notify the server that execution has started. This sets
|
// Notify the server that execution has started. This sets
|
||||||
// processStartedAt and refreshes updatedAt in the DB, which the
|
// processStartedAt and refreshes updatedAt in the DB, which the
|
||||||
@@ -581,13 +851,14 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
|
|||||||
}
|
}
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
const msg = err instanceof Error ? err.message : String(err);
|
const msg = err instanceof Error ? err.message : String(err);
|
||||||
await onLog("stderr", `[paperclip] Pod scheduling failed: ${msg}\n`);
|
const phase = reattachTarget ? "reattach" : "scheduling";
|
||||||
|
await onLog("stderr", `[paperclip] Pod ${phase} failed: ${msg}\n`);
|
||||||
return {
|
return {
|
||||||
exitCode: null,
|
exitCode: null,
|
||||||
signal: null,
|
signal: null,
|
||||||
timedOut: false,
|
timedOut: false,
|
||||||
errorMessage: `Pod scheduling failed: ${msg}`,
|
errorMessage: `Pod ${phase} failed: ${msg}`,
|
||||||
errorCode: "k8s_pod_schedule_failed",
|
errorCode: reattachTarget ? "k8s_pod_reattach_failed" : "k8s_pod_schedule_failed",
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -621,22 +892,56 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
|
|||||||
let lastLogAt = Date.now();
|
let lastLogAt = Date.now();
|
||||||
let keepaliveTick = 0;
|
let keepaliveTick = 0;
|
||||||
let keepaliveJobTerminal = false;
|
let keepaliveJobTerminal = false;
|
||||||
|
let keepaliveJobTerminalAt: number | null = null;
|
||||||
|
let consecutiveTerminalReadings = 0;
|
||||||
keepaliveTimer = setInterval(() => {
|
keepaliveTimer = setInterval(() => {
|
||||||
// Fire-and-forget the async work; setInterval callbacks must be
|
// Fire-and-forget the async work; setInterval callbacks must be
|
||||||
// synchronous or the timer will drift.
|
// synchronous or the timer will drift.
|
||||||
void (async () => {
|
void (async () => {
|
||||||
if (keepaliveJobTerminal) return;
|
if (keepaliveJobTerminal) {
|
||||||
|
// Post-terminal window: keep refreshing onSpawn during cleanup
|
||||||
|
// (job deletion, log parsing, K8s API calls) so the reaper doesn't
|
||||||
|
// fire a false process_lost while execute() is still running.
|
||||||
|
if (
|
||||||
|
ctx.onSpawn &&
|
||||||
|
keepaliveJobTerminalAt !== null &&
|
||||||
|
Date.now() - keepaliveJobTerminalAt <= POST_TERMINAL_KEEPALIVE_MS
|
||||||
|
) {
|
||||||
|
keepaliveTick++;
|
||||||
|
if (keepaliveTick % 6 === 0) {
|
||||||
|
void ctx.onSpawn({ pid: process.pid, processGroupId: null, startedAt: new Date().toISOString() }).catch(() => {});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// Verify the Job is still alive before announcing or refreshing.
|
// Verify the Job is still alive before announcing or refreshing.
|
||||||
|
// Require two consecutive terminal readings before latching to
|
||||||
|
// guard against a stale K8s API cache returning a false terminal
|
||||||
|
// status on a single read (finding #5, FAR-15).
|
||||||
try {
|
try {
|
||||||
const job = await batchApi.readNamespacedJob({ name: jobName, namespace });
|
const job = await batchApi.readNamespacedJob({ name: jobName, namespace });
|
||||||
const terminal = job.status?.conditions?.some(
|
const terminal = job.status?.conditions?.some(
|
||||||
(c) => (c.type === "Complete" || c.type === "Failed") && c.status === "True",
|
(c) => (c.type === "Complete" || c.type === "Failed") && c.status === "True",
|
||||||
);
|
);
|
||||||
if (terminal) {
|
if (terminal) {
|
||||||
keepaliveJobTerminal = true;
|
consecutiveTerminalReadings++;
|
||||||
|
if (consecutiveTerminalReadings >= 2) {
|
||||||
|
keepaliveJobTerminal = true;
|
||||||
|
keepaliveJobTerminalAt = Date.now();
|
||||||
|
if (ctx.onSpawn) {
|
||||||
|
void ctx.onSpawn({ pid: process.pid, processGroupId: null, startedAt: new Date().toISOString() }).catch(() => {});
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// First terminal reading — do not latch yet; next tick confirms.
|
||||||
|
keepaliveTick++;
|
||||||
|
if (ctx.onSpawn && (keepaliveTick === 1 || keepaliveTick % 12 === 0)) {
|
||||||
|
void ctx.onSpawn({ pid: process.pid, processGroupId: null, startedAt: new Date().toISOString() }).catch(() => {});
|
||||||
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
consecutiveTerminalReadings = 0;
|
||||||
} catch (err: unknown) {
|
} catch (err: unknown) {
|
||||||
// Only treat 404 (Job deleted) as terminal. Transient 5xx or
|
// Only treat 404 (Job deleted) as terminal. Transient 5xx or
|
||||||
// connection resets should NOT permanently disable the keepalive —
|
// connection resets should NOT permanently disable the keepalive —
|
||||||
@@ -644,6 +949,10 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
|
|||||||
// window as a safety net.
|
// window as a safety net.
|
||||||
if (isK8s404(err)) {
|
if (isK8s404(err)) {
|
||||||
keepaliveJobTerminal = true;
|
keepaliveJobTerminal = true;
|
||||||
|
keepaliveJobTerminalAt = Date.now();
|
||||||
|
if (ctx.onSpawn) {
|
||||||
|
void ctx.onSpawn({ pid: process.pid, processGroupId: null, startedAt: new Date().toISOString() }).catch(() => {});
|
||||||
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
// Log transient errors but leave keepaliveJobTerminal false so
|
// Log transient errors but leave keepaliveJobTerminal false so
|
||||||
@@ -673,9 +982,12 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
|
|||||||
// Shared signal: when job completion resolves, tell the log
|
// Shared signal: when job completion resolves, tell the log
|
||||||
// streamer to stop reconnecting.
|
// streamer to stop reconnecting.
|
||||||
const logStopSignal = { stopped: false };
|
const logStopSignal = { stopped: false };
|
||||||
|
// Shared dedup filter: created here so the one-shot fallback can
|
||||||
|
// reuse it and avoid pushing already-sent lines to the UI (finding #6, FAR-15).
|
||||||
|
const logDedup = new LogLineDedupFilter();
|
||||||
|
|
||||||
const [logResult, completionResult] = await Promise.allSettled([
|
const [logResult, completionResult] = await Promise.allSettled([
|
||||||
streamPodLogs(namespace, podName, wrappedOnLog, kubeconfigPath, logStopSignal),
|
streamPodLogs(namespace, podName, wrappedOnLog, kubeconfigPath, logStopSignal, logDedup),
|
||||||
waitForJobCompletion(namespace, jobName, completionTimeoutMs, kubeconfigPath).then((r) => {
|
waitForJobCompletion(namespace, jobName, completionTimeoutMs, kubeconfigPath).then((r) => {
|
||||||
logStopSignal.stopped = true;
|
logStopSignal.stopped = true;
|
||||||
return r;
|
return r;
|
||||||
@@ -703,7 +1015,7 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
|
|||||||
// from the beginning of the log, giving us the full output.
|
// from the beginning of the log, giving us the full output.
|
||||||
// We use a cheap string scan for the result-event guard (avoids a full JSON parse here;
|
// We use a cheap string scan for the result-event guard (avoids a full JSON parse here;
|
||||||
// the authoritative parse happens once below after all fallbacks complete).
|
// the authoritative parse happens once below after all fallbacks complete).
|
||||||
const hasResultEvent = stdout.includes('"type":"result"');
|
const hasResultEvent = stdout.split("\n").some((l) => { try { return JSON.parse(l).type === "result"; } catch { return false; } });
|
||||||
const needsOneShot = !stdout.trim() || (stdout.trim() && !hasResultEvent);
|
const needsOneShot = !stdout.trim() || (stdout.trim() && !hasResultEvent);
|
||||||
if (needsOneShot) {
|
if (needsOneShot) {
|
||||||
if (!stdout.trim()) {
|
if (!stdout.trim()) {
|
||||||
@@ -712,9 +1024,12 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
|
|||||||
const oneShotLogs = await readPodLogs(namespace, podName, kubeconfigPath);
|
const oneShotLogs = await readPodLogs(namespace, podName, kubeconfigPath);
|
||||||
if (!stdout.trim() && oneShotLogs.trim()) {
|
if (!stdout.trim() && oneShotLogs.trim()) {
|
||||||
stdout = oneShotLogs;
|
stdout = oneShotLogs;
|
||||||
await onLog("stdout", stdout);
|
const deduped = logDedup.filter(stdout) + logDedup.flush();
|
||||||
|
if (deduped) await onLog("stdout", deduped);
|
||||||
} else if (oneShotLogs && oneShotLogs.length > stdout.length) {
|
} else if (oneShotLogs && oneShotLogs.length > stdout.length) {
|
||||||
await onLog("stdout", `[paperclip] Log stream captured partial output — supplemental one-shot read returned more content.\n`);
|
await onLog("stdout", `[paperclip] Log stream captured partial output — supplemental one-shot read returned more content.\n`);
|
||||||
|
const deduped = logDedup.filter(oneShotLogs) + logDedup.flush();
|
||||||
|
if (deduped) await onLog("stdout", deduped);
|
||||||
stdout = oneShotLogs;
|
stdout = oneShotLogs;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -826,8 +1141,7 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
|
|||||||
};
|
};
|
||||||
})();
|
})();
|
||||||
|
|
||||||
const runtimeSessionParams = parseObject(runtime.sessionParams);
|
const fallbackSessionId = currentSessionIdRaw;
|
||||||
const fallbackSessionId = asString(runtimeSessionParams.sessionId, runtime.sessionId ?? "");
|
|
||||||
const resolvedSessionId = parsedStream.sessionId
|
const resolvedSessionId = parsedStream.sessionId
|
||||||
?? (asString(parsed.session_id as string, fallbackSessionId) || fallbackSessionId);
|
?? (asString(parsed.session_id as string, fallbackSessionId) || fallbackSessionId);
|
||||||
const model = asString(config.model, "");
|
const model = asString(config.model, "");
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
import { describe, it, expect, beforeEach } from "vitest";
|
import { describe, it, expect, beforeEach } from "vitest";
|
||||||
import type { AdapterExecutionContext } from "@paperclipai/adapter-utils";
|
import type { AdapterExecutionContext } from "@paperclipai/adapter-utils";
|
||||||
import { buildJobManifest, buildRtkSetupCommands } from "./job-manifest.js";
|
import { buildJobManifest, buildRtkSetupCommands, sanitizeLabelValue } from "./job-manifest.js";
|
||||||
import type { SelfPodInfo } from "./k8s-client.js";
|
import type { SelfPodInfo } from "./k8s-client.js";
|
||||||
|
|
||||||
function makeCtx(overrides: Partial<AdapterExecutionContext> = {}): AdapterExecutionContext {
|
function makeCtx(overrides: Partial<AdapterExecutionContext> = {}): AdapterExecutionContext {
|
||||||
@@ -136,6 +136,36 @@ describe("buildJobManifest", () => {
|
|||||||
expect(job.metadata?.labels?.env).toBe("prod");
|
expect(job.metadata?.labels?.env).toBe("prod");
|
||||||
expect(job.metadata?.labels?.["paperclip.io/adapter-type"]).toBe("claude_k8s");
|
expect(job.metadata?.labels?.["paperclip.io/adapter-type"]).toBe("claude_k8s");
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("adds task-id label when context provides taskId", () => {
|
||||||
|
ctx.context = { taskId: "task-xyz-789" };
|
||||||
|
const { job } = buildJobManifest({ ctx, selfPod });
|
||||||
|
expect(job.metadata?.labels?.["paperclip.io/task-id"]).toBe("task-xyz-789");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("falls back to issueId when taskId absent", () => {
|
||||||
|
ctx.context = { issueId: "issue-42" };
|
||||||
|
const { job } = buildJobManifest({ ctx, selfPod });
|
||||||
|
expect(job.metadata?.labels?.["paperclip.io/task-id"]).toBe("issue-42");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("adds session-id label when runtime provides sessionId", () => {
|
||||||
|
ctx.runtime = { ...ctx.runtime, sessionId: "sess-abc-1234" };
|
||||||
|
const { job } = buildJobManifest({ ctx, selfPod });
|
||||||
|
expect(job.metadata?.labels?.["paperclip.io/session-id"]).toBe("sess-abc-1234");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("reads sessionId from runtime.sessionParams when sessionId prop missing", () => {
|
||||||
|
ctx.runtime = { ...ctx.runtime, sessionParams: { sessionId: "sess-from-params" } };
|
||||||
|
const { job } = buildJobManifest({ ctx, selfPod });
|
||||||
|
expect(job.metadata?.labels?.["paperclip.io/session-id"]).toBe("sess-from-params");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("omits task-id and session-id labels when neither is provided", () => {
|
||||||
|
const { job } = buildJobManifest({ ctx, selfPod });
|
||||||
|
expect(job.metadata?.labels?.["paperclip.io/task-id"]).toBeUndefined();
|
||||||
|
expect(job.metadata?.labels?.["paperclip.io/session-id"]).toBeUndefined();
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("annotations", () => {
|
describe("annotations", () => {
|
||||||
@@ -408,10 +438,10 @@ describe("buildJobManifest", () => {
|
|||||||
|
|
||||||
it("uses configured resource overrides", () => {
|
it("uses configured resource overrides", () => {
|
||||||
ctx.config = {
|
ctx.config = {
|
||||||
resources: {
|
"resources.requests.cpu": "500m",
|
||||||
requests: { cpu: "500m", memory: "1Gi" },
|
"resources.requests.memory": "1Gi",
|
||||||
limits: { cpu: "2000m", memory: "4Gi" },
|
"resources.limits.cpu": "2000m",
|
||||||
},
|
"resources.limits.memory": "4Gi",
|
||||||
};
|
};
|
||||||
const { job } = buildJobManifest({ ctx, selfPod });
|
const { job } = buildJobManifest({ ctx, selfPod });
|
||||||
const resources = job.spec?.template?.spec?.containers[0]?.resources;
|
const resources = job.spec?.template?.spec?.containers[0]?.resources;
|
||||||
@@ -487,13 +517,66 @@ describe("buildJobManifest", () => {
|
|||||||
expect(claudeArgs).toContain("--dangerously-skip-permissions");
|
expect(claudeArgs).toContain("--dangerously-skip-permissions");
|
||||||
});
|
});
|
||||||
|
|
||||||
it("adds --append-system-prompt-file when instructionsFilePath set", () => {
|
it("adds --append-system-prompt-file (config fallback) when instructionsFilePath set and no session", () => {
|
||||||
ctx.config = { instructionsFilePath: "/paperclip/instructions.md" };
|
ctx.config = { instructionsFilePath: "/paperclip/instructions.md" };
|
||||||
const { claudeArgs } = buildJobManifest({ ctx, selfPod });
|
const { claudeArgs } = buildJobManifest({ ctx, selfPod });
|
||||||
expect(claudeArgs).toContain("--append-system-prompt-file");
|
expect(claudeArgs).toContain("--append-system-prompt-file");
|
||||||
expect(claudeArgs).toContain("/paperclip/instructions.md");
|
expect(claudeArgs).toContain("/paperclip/instructions.md");
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("omits --append-system-prompt-file on session resume (avoids token waste)", () => {
|
||||||
|
ctx.config = { instructionsFilePath: "/paperclip/instructions.md" };
|
||||||
|
ctx.runtime.sessionId = "sess_existing";
|
||||||
|
const { claudeArgs } = buildJobManifest({ ctx, selfPod });
|
||||||
|
expect(claudeArgs).not.toContain("--append-system-prompt-file");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("adds --add-dir when promptBundle is provided", () => {
|
||||||
|
const promptBundle = {
|
||||||
|
bundleKey: "abc123",
|
||||||
|
rootDir: "/paperclip/instances/default/companies/co1/claude-prompt-cache/abc123",
|
||||||
|
addDir: "/paperclip/instances/default/companies/co1/claude-prompt-cache/abc123",
|
||||||
|
instructionsFilePath: null,
|
||||||
|
};
|
||||||
|
const { claudeArgs } = buildJobManifest({ ctx, selfPod, promptBundle });
|
||||||
|
expect(claudeArgs).toContain("--add-dir");
|
||||||
|
expect(claudeArgs).toContain(promptBundle.addDir);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("uses bundle instructionsFilePath for --append-system-prompt-file when promptBundle provided", () => {
|
||||||
|
const promptBundle = {
|
||||||
|
bundleKey: "abc123",
|
||||||
|
rootDir: "/paperclip/instances/default/companies/co1/claude-prompt-cache/abc123",
|
||||||
|
addDir: "/paperclip/instances/default/companies/co1/claude-prompt-cache/abc123",
|
||||||
|
instructionsFilePath: "/paperclip/instances/default/companies/co1/claude-prompt-cache/abc123/agent-instructions.md",
|
||||||
|
};
|
||||||
|
ctx.config = { instructionsFilePath: "/raw/path/AGENTS.md" };
|
||||||
|
const { claudeArgs } = buildJobManifest({ ctx, selfPod, promptBundle });
|
||||||
|
expect(claudeArgs).toContain("--append-system-prompt-file");
|
||||||
|
const idx = claudeArgs.indexOf("--append-system-prompt-file");
|
||||||
|
expect(claudeArgs[idx + 1]).toBe(promptBundle.instructionsFilePath);
|
||||||
|
expect(claudeArgs).not.toContain("/raw/path/AGENTS.md");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("omits --append-system-prompt-file from bundle on session resume", () => {
|
||||||
|
const promptBundle = {
|
||||||
|
bundleKey: "abc123",
|
||||||
|
rootDir: "/paperclip/instances/default/companies/co1/claude-prompt-cache/abc123",
|
||||||
|
addDir: "/paperclip/instances/default/companies/co1/claude-prompt-cache/abc123",
|
||||||
|
instructionsFilePath: "/paperclip/instances/default/companies/co1/claude-prompt-cache/abc123/agent-instructions.md",
|
||||||
|
};
|
||||||
|
ctx.runtime.sessionId = "sess_existing";
|
||||||
|
const { claudeArgs } = buildJobManifest({ ctx, selfPod, promptBundle });
|
||||||
|
expect(claudeArgs).not.toContain("--append-system-prompt-file");
|
||||||
|
// --add-dir must still be present even on resume
|
||||||
|
expect(claudeArgs).toContain("--add-dir");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("omits --add-dir when no promptBundle", () => {
|
||||||
|
const { claudeArgs } = buildJobManifest({ ctx, selfPod });
|
||||||
|
expect(claudeArgs).not.toContain("--add-dir");
|
||||||
|
});
|
||||||
|
|
||||||
it("appends extraArgs when configured", () => {
|
it("appends extraArgs when configured", () => {
|
||||||
ctx.config = { extraArgs: ["--no-input", "--verbose"] };
|
ctx.config = { extraArgs: ["--no-input", "--verbose"] };
|
||||||
const { claudeArgs } = buildJobManifest({ ctx, selfPod });
|
const { claudeArgs } = buildJobManifest({ ctx, selfPod });
|
||||||
@@ -719,6 +802,28 @@ describe("buildJobManifest", () => {
|
|||||||
expect(filterScript).toContain("tool_result");
|
expect(filterScript).toContain("tool_result");
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("filter script truncates without corrupting multi-byte UTF-8", () => {
|
||||||
|
// "中" is U+4E2D, 3 bytes in UTF-8: E4 B8 AD
|
||||||
|
// With MAX=5, two "中" (6 bytes) should truncate to one (3 bytes), not
|
||||||
|
// produce a replacement character from slicing mid-codepoint.
|
||||||
|
const setup = buildRtkSetupCommands(5);
|
||||||
|
const b64Matches = [...setup.matchAll(/Buffer\.from\('([A-Za-z0-9+/=]+)','base64'\)/g)];
|
||||||
|
const filterScript = Buffer.from(b64Matches[0]![1], "base64").toString("utf-8");
|
||||||
|
|
||||||
|
// Extract the trunc function from the filter script and evaluate it
|
||||||
|
const fnMatch = filterScript.match(/(function trunc\(s\)\{.*\})(?=const tr=)/);
|
||||||
|
expect(fnMatch).toBeTruthy();
|
||||||
|
// eslint-disable-next-line no-eval
|
||||||
|
const trunc = eval(`(()=>{const MAX=5;${fnMatch![1]};return trunc;})()`);
|
||||||
|
|
||||||
|
const result = trunc("中中");
|
||||||
|
expect(result).not.toContain("�");
|
||||||
|
expect(result).toContain("中");
|
||||||
|
expect(result).toContain("truncated by paperclip-rtk");
|
||||||
|
// Should report bytes from the actual truncation point, not MAX
|
||||||
|
expect(result).toContain("3 bytes truncated");
|
||||||
|
});
|
||||||
|
|
||||||
it("filter script handles array content (block format)", () => {
|
it("filter script handles array content (block format)", () => {
|
||||||
const setup = buildRtkSetupCommands(50000);
|
const setup = buildRtkSetupCommands(50000);
|
||||||
const b64Matches = [...setup.matchAll(/Buffer\.from\('([A-Za-z0-9+/=]+)','base64'\)/g)];
|
const b64Matches = [...setup.matchAll(/Buffer\.from\('([A-Za-z0-9+/=]+)','base64'\)/g)];
|
||||||
@@ -729,3 +834,32 @@ describe("buildJobManifest", () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe("sanitizeLabelValue", () => {
|
||||||
|
it("passes through already-valid UUIDs and slugs", () => {
|
||||||
|
expect(sanitizeLabelValue("abc-123-def")).toBe("abc-123-def");
|
||||||
|
expect(sanitizeLabelValue("0d8b4472-c42c-4052-aab1-e32897909afa")).toBe("0d8b4472-c42c-4052-aab1-e32897909afa");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("strips characters outside [a-zA-Z0-9._-]", () => {
|
||||||
|
expect(sanitizeLabelValue("task:xyz/123")).toBe("taskxyz123");
|
||||||
|
expect(sanitizeLabelValue("abc 123")).toBe("abc123");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("trims leading/trailing non-alphanumeric characters", () => {
|
||||||
|
expect(sanitizeLabelValue("--abc--")).toBe("abc");
|
||||||
|
expect(sanitizeLabelValue("...123...")).toBe("123");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("truncates to the configured maxLen", () => {
|
||||||
|
const long = "a".repeat(200);
|
||||||
|
const out = sanitizeLabelValue(long, 63);
|
||||||
|
expect(out?.length).toBe(63);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("returns null when no alphanumeric characters remain", () => {
|
||||||
|
expect(sanitizeLabelValue("---")).toBeNull();
|
||||||
|
expect(sanitizeLabelValue("")).toBeNull();
|
||||||
|
expect(sanitizeLabelValue(" ")).toBeNull();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|||||||
+45
-13
@@ -10,6 +10,7 @@ import {
|
|||||||
renderTemplate,
|
renderTemplate,
|
||||||
} from "@paperclipai/adapter-utils/server-utils";
|
} from "@paperclipai/adapter-utils/server-utils";
|
||||||
import { createHash } from "node:crypto";
|
import { createHash } from "node:crypto";
|
||||||
|
import type { ClaudePromptBundle } from "./prompt-cache.js";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Build the shell command prefix that installs a native Node.js PostToolUse
|
* Build the shell command prefix that installs a native Node.js PostToolUse
|
||||||
@@ -46,7 +47,8 @@ export function buildRtkSetupCommands(maxOutputBytes: number): string {
|
|||||||
`if(typeof s!=='string')return s;`,
|
`if(typeof s!=='string')return s;`,
|
||||||
`const b=Buffer.from(s,'utf-8');`,
|
`const b=Buffer.from(s,'utf-8');`,
|
||||||
`if(b.length<=MAX)return s;`,
|
`if(b.length<=MAX)return s;`,
|
||||||
`return b.slice(0,MAX).toString('utf-8')+'\\n[...'+(b.length-MAX)+' bytes truncated by paperclip-rtk]';`,
|
`let e=MAX;if(e>0){let p=e-1;while(p>0&&(b[p]&0xC0)===0x80)p--;const l=b[p];let n=1;if((l&0xE0)===0xC0)n=2;else if((l&0xF0)===0xE0)n=3;else if((l&0xF8)===0xF0)n=4;if(p+n>e)e=p;}`,
|
||||||
|
`return b.slice(0,e).toString('utf-8')+'\\n[...'+(b.length-e)+' bytes truncated by paperclip-rtk]';`,
|
||||||
`}`,
|
`}`,
|
||||||
`const tr=o&&(o.tool_response||o.tool_result);`,
|
`const tr=o&&(o.tool_response||o.tool_result);`,
|
||||||
`if(tr){`,
|
`if(tr){`,
|
||||||
@@ -175,6 +177,8 @@ function parseKeyValueConfig(raw: unknown): Record<string, string> {
|
|||||||
export interface JobBuildInput {
|
export interface JobBuildInput {
|
||||||
ctx: AdapterExecutionContext;
|
ctx: AdapterExecutionContext;
|
||||||
selfPod: SelfPodInfo;
|
selfPod: SelfPodInfo;
|
||||||
|
/** Prepared prompt bundle (skills + instructions). When provided, --add-dir and --append-system-prompt-file use bundle paths. */
|
||||||
|
promptBundle?: ClaudePromptBundle | null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** When the prompt exceeds the env-var size limit, the manifest uses a
|
/** When the prompt exceeds the env-var size limit, the manifest uses a
|
||||||
@@ -199,7 +203,20 @@ export interface JobBuildResult {
|
|||||||
}
|
}
|
||||||
|
|
||||||
function sanitizeForK8sName(value: string, maxLen = 16): string {
|
function sanitizeForK8sName(value: string, maxLen = 16): string {
|
||||||
return value.toLowerCase().replace(/[^a-z0-9-]/g, "").slice(0, maxLen);
|
// Trim trailing hyphens after slicing so names don't end with `-` when
|
||||||
|
// truncation lands on a hyphen boundary (finding #16, FAR-15).
|
||||||
|
return value.toLowerCase().replace(/[^a-z0-9-]/g, "").slice(0, maxLen).replace(/-+$/, "");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sanitize a string for use as a Kubernetes label value (RFC 1123 subset:
|
||||||
|
* `[a-zA-Z0-9]([-_.a-zA-Z0-9]*[a-zA-Z0-9])?`, max 63 chars). Returns `null`
|
||||||
|
* when no usable characters remain — the caller should omit the label.
|
||||||
|
*/
|
||||||
|
export function sanitizeLabelValue(value: string, maxLen = 63): string | null {
|
||||||
|
const cleaned = value.replace(/[^a-zA-Z0-9._-]/g, "").slice(0, maxLen);
|
||||||
|
const trimmed = cleaned.replace(/^[^a-zA-Z0-9]+/, "").replace(/[^a-zA-Z0-9]+$/, "");
|
||||||
|
return trimmed.length > 0 ? trimmed : null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -316,7 +333,7 @@ function buildEnvVars(
|
|||||||
}
|
}
|
||||||
|
|
||||||
export function buildJobManifest(input: JobBuildInput): JobBuildResult {
|
export function buildJobManifest(input: JobBuildInput): JobBuildResult {
|
||||||
const { ctx, selfPod } = input;
|
const { ctx, selfPod, promptBundle } = input;
|
||||||
const { runId, agent, runtime, config: rawConfig, context } = ctx;
|
const { runId, agent, runtime, config: rawConfig, context } = ctx;
|
||||||
const config = parseObject(rawConfig);
|
const config = parseObject(rawConfig);
|
||||||
|
|
||||||
@@ -331,7 +348,6 @@ export function buildJobManifest(input: JobBuildInput): JobBuildResult {
|
|||||||
const extraArgs = asStringArray(config.extraArgs);
|
const extraArgs = asStringArray(config.extraArgs);
|
||||||
const timeoutSec = asNumber(config.timeoutSec, 0);
|
const timeoutSec = asNumber(config.timeoutSec, 0);
|
||||||
const ttlSeconds = asNumber(config.ttlSecondsAfterFinished, 300);
|
const ttlSeconds = asNumber(config.ttlSecondsAfterFinished, 300);
|
||||||
const resources = parseObject(config.resources);
|
|
||||||
const nodeSelector = parseKeyValueConfig(config.nodeSelector);
|
const nodeSelector = parseKeyValueConfig(config.nodeSelector);
|
||||||
const tolerations = Array.isArray(config.tolerations) ? config.tolerations : [];
|
const tolerations = Array.isArray(config.tolerations) ? config.tolerations : [];
|
||||||
const extraLabels = parseKeyValueConfig(config.labels);
|
const extraLabels = parseKeyValueConfig(config.labels);
|
||||||
@@ -392,30 +408,37 @@ export function buildJobManifest(input: JobBuildInput): JobBuildResult {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// Build Claude CLI args
|
// Build Claude CLI args
|
||||||
const instructionsFilePath = asString(config.instructionsFilePath, "").trim();
|
// Prefer the bundle's materialized instructions file over the raw config path.
|
||||||
|
// Never inject --append-system-prompt-file on session resumes — the instructions
|
||||||
|
// are already in the session cache and re-injecting wastes tokens.
|
||||||
|
const rawInstructionsFilePath = asString(config.instructionsFilePath, "").trim();
|
||||||
|
const effectiveInstructionsFilePath =
|
||||||
|
promptBundle?.instructionsFilePath ?? (rawInstructionsFilePath || null);
|
||||||
const claudeArgs = ["--print", "-", "--output-format", "stream-json", "--verbose"];
|
const claudeArgs = ["--print", "-", "--output-format", "stream-json", "--verbose"];
|
||||||
if (runtimeSessionId) claudeArgs.push("--resume", runtimeSessionId);
|
if (runtimeSessionId) claudeArgs.push("--resume", runtimeSessionId);
|
||||||
if (dangerouslySkipPermissions) claudeArgs.push("--dangerously-skip-permissions");
|
if (dangerouslySkipPermissions) claudeArgs.push("--dangerously-skip-permissions");
|
||||||
if (model) claudeArgs.push("--model", model);
|
if (model) claudeArgs.push("--model", model);
|
||||||
if (effort) claudeArgs.push("--effort", effort);
|
if (effort) claudeArgs.push("--effort", effort);
|
||||||
if (maxTurns > 0) claudeArgs.push("--max-turns", String(maxTurns));
|
if (maxTurns > 0) claudeArgs.push("--max-turns", String(maxTurns));
|
||||||
if (instructionsFilePath) claudeArgs.push("--append-system-prompt-file", instructionsFilePath);
|
if (effectiveInstructionsFilePath && !runtimeSessionId) {
|
||||||
|
claudeArgs.push("--append-system-prompt-file", effectiveInstructionsFilePath);
|
||||||
|
}
|
||||||
|
if (promptBundle) claudeArgs.push("--add-dir", promptBundle.addDir);
|
||||||
if (extraArgs.length > 0) claudeArgs.push(...extraArgs);
|
if (extraArgs.length > 0) claudeArgs.push(...extraArgs);
|
||||||
|
|
||||||
// Build env vars
|
// Build env vars
|
||||||
const envVars = buildEnvVars(ctx, selfPod, config);
|
const envVars = buildEnvVars(ctx, selfPod, config);
|
||||||
|
|
||||||
// Resource defaults
|
// Resource defaults — UI stores dotted keys (e.g. "resources.requests.cpu")
|
||||||
const resourceRequests = parseObject(resources.requests);
|
// as flat config entries, so read them directly from config with the dotted key.
|
||||||
const resourceLimits = parseObject(resources.limits);
|
|
||||||
const containerResources: k8s.V1ResourceRequirements = {
|
const containerResources: k8s.V1ResourceRequirements = {
|
||||||
requests: {
|
requests: {
|
||||||
cpu: asString(resourceRequests.cpu, "1000m"),
|
cpu: asString(config["resources.requests.cpu"], "1000m"),
|
||||||
memory: asString(resourceRequests.memory, "2Gi"),
|
memory: asString(config["resources.requests.memory"], "2Gi"),
|
||||||
},
|
},
|
||||||
limits: {
|
limits: {
|
||||||
cpu: asString(resourceLimits.cpu, "4000m"),
|
cpu: asString(config["resources.limits.cpu"], "4000m"),
|
||||||
memory: asString(resourceLimits.memory, "8Gi"),
|
memory: asString(config["resources.limits.memory"], "8Gi"),
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -428,6 +451,15 @@ export function buildJobManifest(input: JobBuildInput): JobBuildResult {
|
|||||||
"paperclip.io/company-id": agent.companyId,
|
"paperclip.io/company-id": agent.companyId,
|
||||||
"paperclip.io/adapter-type": "claude_k8s",
|
"paperclip.io/adapter-type": "claude_k8s",
|
||||||
};
|
};
|
||||||
|
// Reattach-target labels: let a future execute() identify this Job as the
|
||||||
|
// continuation of the same logical unit of work (same task + same resume
|
||||||
|
// session) so it can attach to the running pod across a Paperclip restart
|
||||||
|
// instead of deleting it and starting over (FAR-124).
|
||||||
|
const taskIdRaw = asString(context.taskId, "") || asString(context.issueId, "");
|
||||||
|
const taskLabel = taskIdRaw ? sanitizeLabelValue(taskIdRaw) : null;
|
||||||
|
if (taskLabel) labels["paperclip.io/task-id"] = taskLabel;
|
||||||
|
const sessionLabel = runtimeSessionId ? sanitizeLabelValue(runtimeSessionId) : null;
|
||||||
|
if (sessionLabel) labels["paperclip.io/session-id"] = sessionLabel;
|
||||||
for (const [key, value] of Object.entries(extraLabels)) {
|
for (const [key, value] of Object.entries(extraLabels)) {
|
||||||
labels[key] = value;
|
labels[key] = value;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -106,7 +106,12 @@ export async function getSelfPodInfo(kubeconfigPath?: string): Promise<SelfPodIn
|
|||||||
throw new Error(`claude_k8s: pod ${hostname} has no spec`);
|
throw new Error(`claude_k8s: pod ${hostname} has no spec`);
|
||||||
}
|
}
|
||||||
|
|
||||||
const mainContainer = spec.containers[0];
|
// Match the Paperclip container by name ("paperclip") to avoid service-mesh
|
||||||
|
// sidecars or other injected containers being picked up as the source of
|
||||||
|
// truth for the Job spec (finding #9, FAR-15). Fall back to the first
|
||||||
|
// container if no name match is found (matches prior behavior).
|
||||||
|
const mainContainer =
|
||||||
|
spec.containers.find((c) => c.name === "paperclip") ?? spec.containers[0];
|
||||||
if (!mainContainer?.image) {
|
if (!mainContainer?.image) {
|
||||||
throw new Error(`claude_k8s: pod ${hostname} has no container image`);
|
throw new Error(`claude_k8s: pod ${hostname} has no container image`);
|
||||||
}
|
}
|
||||||
|
|||||||
+15
-6
@@ -9,9 +9,12 @@ export function parseClaudeStreamJson(stdout: string) {
|
|||||||
let model = "";
|
let model = "";
|
||||||
let finalResult: Record<string, unknown> | null = null;
|
let finalResult: Record<string, unknown> | null = null;
|
||||||
const assistantTexts: string[] = [];
|
const assistantTexts: string[] = [];
|
||||||
// Belt-and-braces dedup: track seen text blocks to filter duplicates
|
// Belt-and-braces dedup: key by (message.id, textIndex) so a session that
|
||||||
// caused by log stream reconnects replaying overlapping windows.
|
// legitimately emits the same text twice in different turns isn't collapsed
|
||||||
const seenTexts = new Set<string>();
|
// (finding #11, FAR-15). The log-dedup filter handles reconnect overlaps
|
||||||
|
// at the line level; this guard only needs to protect against the same
|
||||||
|
// message block being parsed twice.
|
||||||
|
const seenBlocks = new Set<string>();
|
||||||
|
|
||||||
for (const rawLine of stdout.split(/\r?\n/)) {
|
for (const rawLine of stdout.split(/\r?\n/)) {
|
||||||
const line = rawLine.trim();
|
const line = rawLine.trim();
|
||||||
@@ -29,14 +32,20 @@ export function parseClaudeStreamJson(stdout: string) {
|
|||||||
if (type === "assistant") {
|
if (type === "assistant") {
|
||||||
sessionId = asString(event.session_id, sessionId ?? "") || sessionId;
|
sessionId = asString(event.session_id, sessionId ?? "") || sessionId;
|
||||||
const message = parseObject(event.message);
|
const message = parseObject(event.message);
|
||||||
|
const messageId = asString(message.id, "");
|
||||||
const content = Array.isArray(message.content) ? message.content : [];
|
const content = Array.isArray(message.content) ? message.content : [];
|
||||||
for (const entry of content) {
|
for (let i = 0; i < content.length; i++) {
|
||||||
|
const entry = content[i];
|
||||||
if (typeof entry !== "object" || entry === null || Array.isArray(entry)) continue;
|
if (typeof entry !== "object" || entry === null || Array.isArray(entry)) continue;
|
||||||
const block = entry as Record<string, unknown>;
|
const block = entry as Record<string, unknown>;
|
||||||
if (asString(block.type, "") === "text") {
|
if (asString(block.type, "") === "text") {
|
||||||
const text = asString(block.text, "");
|
const text = asString(block.text, "");
|
||||||
if (text && !seenTexts.has(text)) {
|
if (!text) continue;
|
||||||
seenTexts.add(text);
|
// Prefer (messageId, index) when the message has an id; fall back
|
||||||
|
// to text content when it doesn't (legacy/partial events).
|
||||||
|
const key = messageId ? `${messageId}:${i}` : `text:${text}`;
|
||||||
|
if (!seenBlocks.has(key)) {
|
||||||
|
seenBlocks.add(key);
|
||||||
assistantTexts.push(text);
|
assistantTexts.push(text);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,150 @@
|
|||||||
|
import { constants as fsConstants } from "node:fs";
|
||||||
|
import fs from "node:fs/promises";
|
||||||
|
import os from "node:os";
|
||||||
|
import path from "node:path";
|
||||||
|
import { createHash } from "node:crypto";
|
||||||
|
import type { AdapterExecutionContext } from "@paperclipai/adapter-utils";
|
||||||
|
import {
|
||||||
|
type PaperclipSkillEntry,
|
||||||
|
ensurePaperclipSkillSymlink,
|
||||||
|
} from "@paperclipai/adapter-utils/server-utils";
|
||||||
|
|
||||||
|
export interface ClaudePromptBundle {
|
||||||
|
bundleKey: string;
|
||||||
|
/** Absolute path to the bundle root directory (contains .claude/skills/ and agent-instructions.md). */
|
||||||
|
rootDir: string;
|
||||||
|
/** Value to pass as --add-dir to the Claude CLI. */
|
||||||
|
addDir: string;
|
||||||
|
/** Path to the materialized instructions file, or null if no instructions were provided. */
|
||||||
|
instructionsFilePath: string | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const DEFAULT_PAPERCLIP_INSTANCE_ID = "default";
|
||||||
|
|
||||||
|
function resolveManagedClaudePromptCacheRoot(companyId: string): string {
|
||||||
|
const paperclipHome =
|
||||||
|
(typeof process.env.PAPERCLIP_HOME === "string" && process.env.PAPERCLIP_HOME.trim().length > 0
|
||||||
|
? process.env.PAPERCLIP_HOME.trim()
|
||||||
|
: null) ??
|
||||||
|
path.resolve(os.homedir(), ".paperclip");
|
||||||
|
const instanceId =
|
||||||
|
(typeof process.env.PAPERCLIP_INSTANCE_ID === "string" && process.env.PAPERCLIP_INSTANCE_ID.trim().length > 0
|
||||||
|
? process.env.PAPERCLIP_INSTANCE_ID.trim()
|
||||||
|
: null) ?? DEFAULT_PAPERCLIP_INSTANCE_ID;
|
||||||
|
return path.resolve(paperclipHome, "instances", instanceId, "companies", companyId, "claude-prompt-cache");
|
||||||
|
}
|
||||||
|
|
||||||
|
async function hashPathContents(
|
||||||
|
candidate: string,
|
||||||
|
hash: ReturnType<typeof createHash>,
|
||||||
|
relativePath: string,
|
||||||
|
seenDirectories: Set<string>,
|
||||||
|
): Promise<void> {
|
||||||
|
const stat = await fs.lstat(candidate);
|
||||||
|
if (stat.isSymbolicLink()) {
|
||||||
|
hash.update(`symlink:${relativePath}\n`);
|
||||||
|
const resolved = await fs.realpath(candidate).catch(() => null);
|
||||||
|
if (!resolved) {
|
||||||
|
hash.update("missing\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
await hashPathContents(resolved, hash, relativePath, seenDirectories);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (stat.isDirectory()) {
|
||||||
|
const realDir = await fs.realpath(candidate).catch(() => candidate);
|
||||||
|
hash.update(`dir:${relativePath}\n`);
|
||||||
|
if (seenDirectories.has(realDir)) {
|
||||||
|
hash.update("loop\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
seenDirectories.add(realDir);
|
||||||
|
const entries = await fs.readdir(candidate, { withFileTypes: true });
|
||||||
|
entries.sort((a, b) => a.name.localeCompare(b.name));
|
||||||
|
for (const entry of entries) {
|
||||||
|
const childRelativePath = relativePath.length > 0 ? `${relativePath}/${entry.name}` : entry.name;
|
||||||
|
await hashPathContents(path.join(candidate, entry.name), hash, childRelativePath, seenDirectories);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (stat.isFile()) {
|
||||||
|
hash.update(`file:${relativePath}\n`);
|
||||||
|
hash.update(await fs.readFile(candidate));
|
||||||
|
hash.update("\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
hash.update(`other:${relativePath}:${stat.mode}\n`);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function buildClaudePromptBundleKey(input: {
|
||||||
|
skills: PaperclipSkillEntry[];
|
||||||
|
instructionsContents: string | null;
|
||||||
|
}): Promise<string> {
|
||||||
|
const hash = createHash("sha256");
|
||||||
|
hash.update("paperclip-claude-prompt-bundle:v1\n");
|
||||||
|
if (input.instructionsContents) {
|
||||||
|
hash.update("instructions\n");
|
||||||
|
hash.update(input.instructionsContents);
|
||||||
|
hash.update("\n");
|
||||||
|
} else {
|
||||||
|
hash.update("instructions:none\n");
|
||||||
|
}
|
||||||
|
const sortedSkills = [...input.skills].sort((a, b) => a.runtimeName.localeCompare(b.runtimeName));
|
||||||
|
for (const entry of sortedSkills) {
|
||||||
|
hash.update(`skill:${entry.key}:${entry.runtimeName}\n`);
|
||||||
|
await hashPathContents(entry.source, hash, entry.runtimeName, new Set());
|
||||||
|
}
|
||||||
|
return hash.digest("hex");
|
||||||
|
}
|
||||||
|
|
||||||
|
async function ensureReadableFile(targetPath: string, contents: string): Promise<void> {
|
||||||
|
try {
|
||||||
|
await fs.access(targetPath, fsConstants.R_OK);
|
||||||
|
return;
|
||||||
|
} catch {
|
||||||
|
// Fall through and materialize the file.
|
||||||
|
}
|
||||||
|
await fs.mkdir(path.dirname(targetPath), { recursive: true });
|
||||||
|
const tempPath = `${targetPath}.${process.pid}.${Date.now()}.tmp`;
|
||||||
|
try {
|
||||||
|
await fs.writeFile(tempPath, contents, "utf8");
|
||||||
|
await fs.rename(tempPath, targetPath);
|
||||||
|
} catch (err) {
|
||||||
|
const targetReadable = await fs.access(targetPath, fsConstants.R_OK).then(() => true).catch(() => false);
|
||||||
|
if (!targetReadable) throw err;
|
||||||
|
} finally {
|
||||||
|
await fs.rm(tempPath, { force: true }).catch(() => {});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function prepareClaudePromptBundle(input: {
|
||||||
|
companyId: string;
|
||||||
|
skills: PaperclipSkillEntry[];
|
||||||
|
instructionsContents: string | null;
|
||||||
|
onLog: AdapterExecutionContext["onLog"];
|
||||||
|
}): Promise<ClaudePromptBundle> {
|
||||||
|
const { companyId, skills, instructionsContents, onLog } = input;
|
||||||
|
const bundleKey = await buildClaudePromptBundleKey({ skills, instructionsContents });
|
||||||
|
const rootDir = path.join(resolveManagedClaudePromptCacheRoot(companyId), bundleKey);
|
||||||
|
const skillsHome = path.join(rootDir, ".claude", "skills");
|
||||||
|
await fs.mkdir(skillsHome, { recursive: true });
|
||||||
|
|
||||||
|
for (const entry of skills) {
|
||||||
|
const target = path.join(skillsHome, entry.runtimeName);
|
||||||
|
try {
|
||||||
|
await ensurePaperclipSkillSymlink(entry.source, target);
|
||||||
|
} catch (err) {
|
||||||
|
await onLog(
|
||||||
|
"stderr",
|
||||||
|
`[paperclip] Failed to materialize Claude skill "${entry.key}" into ${skillsHome}: ${err instanceof Error ? err.message : String(err)}\n`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const instructionsFilePath = instructionsContents ? path.join(rootDir, "agent-instructions.md") : null;
|
||||||
|
if (instructionsFilePath && instructionsContents) {
|
||||||
|
await ensureReadableFile(instructionsFilePath, instructionsContents);
|
||||||
|
}
|
||||||
|
|
||||||
|
return { bundleKey, rootDir, addDir: rootDir, instructionsFilePath };
|
||||||
|
}
|
||||||
@@ -33,7 +33,7 @@ async function buildK8sSkillSnapshot(
|
|||||||
sourcePath: entry.source,
|
sourcePath: entry.source,
|
||||||
targetPath: null,
|
targetPath: null,
|
||||||
detail: desiredSet.has(entry.key)
|
detail: desiredSet.has(entry.key)
|
||||||
? "Injected via prompt bundle into ephemeral K8s Job pods."
|
? "Materialized into the PVC-backed Claude prompt bundle before each K8s Job run."
|
||||||
: null,
|
: null,
|
||||||
required: Boolean(entry.required),
|
required: Boolean(entry.required),
|
||||||
requiredReason: entry.requiredReason ?? null,
|
requiredReason: entry.requiredReason ?? null,
|
||||||
|
|||||||
+16
-9
@@ -85,8 +85,13 @@ async function checkRbac(
|
|||||||
{ resource: "jobs", group: "batch", verb: "create", code: "k8s_rbac_job_create", label: "create Jobs" },
|
{ resource: "jobs", group: "batch", verb: "create", code: "k8s_rbac_job_create", label: "create Jobs" },
|
||||||
{ resource: "jobs", group: "batch", verb: "delete", code: "k8s_rbac_job_delete", label: "delete Jobs" },
|
{ resource: "jobs", group: "batch", verb: "delete", code: "k8s_rbac_job_delete", label: "delete Jobs" },
|
||||||
{ resource: "jobs", group: "batch", verb: "get", code: "k8s_rbac_job_get", label: "get Jobs" },
|
{ resource: "jobs", group: "batch", verb: "get", code: "k8s_rbac_job_get", label: "get Jobs" },
|
||||||
|
{ resource: "jobs", group: "batch", verb: "list", code: "k8s_rbac_job_list", label: "list Jobs" },
|
||||||
{ resource: "pods", group: "", verb: "list", code: "k8s_rbac_pod_list", label: "list Pods" },
|
{ resource: "pods", group: "", verb: "list", code: "k8s_rbac_pod_list", label: "list Pods" },
|
||||||
{ resource: "pods/log", group: "", verb: "get", code: "k8s_rbac_pod_log", label: "get Pod logs" },
|
{ resource: "pods/log", group: "", verb: "get", code: "k8s_rbac_pod_log", label: "get Pod logs" },
|
||||||
|
{ resource: "secrets", group: "", verb: "create", code: "k8s_rbac_secret_create", label: "create Secrets" },
|
||||||
|
{ resource: "secrets", group: "", verb: "delete", code: "k8s_rbac_secret_delete", label: "delete Secrets" },
|
||||||
|
{ resource: "secrets", group: "", verb: "get", code: "k8s_rbac_secret_get", label: "get Secrets" },
|
||||||
|
{ resource: "persistentvolumeclaims", group: "", verb: "get", code: "k8s_rbac_pvc_get", label: "get PersistentVolumeClaims" },
|
||||||
];
|
];
|
||||||
|
|
||||||
for (const check of rbacChecks) {
|
for (const check of rbacChecks) {
|
||||||
@@ -221,16 +226,18 @@ export async function testEnvironment(
|
|||||||
|
|
||||||
// 2. Target namespace exists
|
// 2. Target namespace exists
|
||||||
const nsOk = await checkNamespace(namespace, selfPod.namespace, checks, kubeconfigPath);
|
const nsOk = await checkNamespace(namespace, selfPod.namespace, checks, kubeconfigPath);
|
||||||
if (!nsOk) {
|
|
||||||
return { adapterType: ctx.adapterType, status: summarizeStatus(checks), checks, testedAt: new Date().toISOString() };
|
|
||||||
}
|
|
||||||
|
|
||||||
// 3-5. Run remaining checks in parallel
|
// 3-5. Run remaining checks even if namespace check failed so operators see
|
||||||
await Promise.all([
|
// all issues at once instead of fixing them one at a time.
|
||||||
checkRbac(namespace, checks, kubeconfigPath),
|
if (nsOk) {
|
||||||
checkSecret(namespace, secretRef, checks, kubeconfigPath),
|
await Promise.all([
|
||||||
checkPvc(selfPod, checks, kubeconfigPath),
|
checkRbac(namespace, checks, kubeconfigPath),
|
||||||
]);
|
checkSecret(namespace, secretRef, checks, kubeconfigPath),
|
||||||
|
checkPvc(selfPod, checks, kubeconfigPath),
|
||||||
|
]);
|
||||||
|
} else {
|
||||||
|
await checkRbac(namespace, checks, kubeconfigPath);
|
||||||
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
adapterType: ctx.adapterType,
|
adapterType: ctx.adapterType,
|
||||||
|
|||||||
Reference in New Issue
Block a user