0.1.32: port prepareClaudePromptBundle to claude_k8s (FAR-12)

Co-Authored-By: Paperclip <noreply@paperclip.ing>
fix: port prepareClaudePromptBundle flow to claude_k8s adapter (FAR-11)
2026-04-23 19:47:26 +00:00 · 2026-04-23 19:34:35 +00:00 · 2026-04-23 16:43:32 +00:00 · 2026-04-23 16:36:51 +00:00 · 2026-04-23 16:20:48 +00:00 · 2026-04-23 16:10:39 +00:00
9 changed files with 520 additions and 34 deletions
@@ -29,24 +29,21 @@ jobs:
    needs: test
    runs-on: ubuntu-latest
    if: github.ref == 'refs/heads/master' && github.event_name == 'push'
+    permissions:
+      id-token: write
    steps:
      - uses: actions/checkout@v4

      - uses: actions/setup-node@v4
        with:
          node-version: "22"
+          registry-url: "https://registry.npmjs.org"
          cache: "npm"

      - run: npm ci

      - run: npm run build

-      - uses: actions/setup-node@v4
-        with:
-          node-version: "22"
-          registry-url: "https://registry.npmjs.org"
-          cache: "npm"
-
      - name: Publish (skip if version already exists)
        run: |
          PKG_NAME=$(node -p "require('./package.json').name")
@@ -54,7 +51,7 @@ jobs:
          if npm view "${PKG_NAME}@${PKG_VERSION}" version 2>/dev/null; then
            echo "Version ${PKG_VERSION} already published — skipping."
          else
-            npm publish --access public
+            npm publish --provenance --access public
          fi
        env:
          NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
@@ -1,12 +1,12 @@
 {
  "name": "paperclip-adapter-claude-k8s",
-  "version": "0.1.29",
+  "version": "0.1.31",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "paperclip-adapter-claude-k8s",
-      "version": "0.1.29",
+      "version": "0.1.31",
      "license": "MIT",
      "dependencies": {
        "@kubernetes/client-node": "^1.0.0",
@@ -1,16 +1,16 @@
 {
  "name": "paperclip-adapter-claude-k8s",
-  "version": "0.1.29",
+  "version": "0.1.32",
  "description": "Paperclip adapter plugin that runs Claude Code agents as Kubernetes Jobs",
  "license": "MIT",
  "repository": {
    "type": "git",
-    "url": "https://github.com/farhoodliquor/paperclip-adapter-claude-k8s"
+    "url": "https://github.com/farhoodlabs/paperclip-adapter-claude-k8s"
  },
  "bugs": {
-    "url": "https://github.com/farhoodliquor/paperclip-adapter-claude-k8s/issues"
+    "url": "https://github.com/farhoodlabs/paperclip-adapter-claude-k8s/issues"
  },
-  "homepage": "https://github.com/farhoodliquor/paperclip-adapter-claude-k8s#readme",
+  "homepage": "https://github.com/farhoodlabs/paperclip-adapter-claude-k8s#readme",
  "type": "module",
  "paperclip": {
    "adapterUiParser": "1.0.0"
@@ -1,6 +1,21 @@
-import { describe, it, expect } from "vitest";
+import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
 import type * as k8s from "@kubernetes/client-node";
-import { isK8s404, buildPartialRunError, isReattachableOrphan } from "./execute.js";
+import type { Writable } from "node:stream";
+
+// Mock the K8s client before importing execute so streamPodLogsOnce picks up
+// the mocked getLogApi.  The mock's logApi.log never resolves, simulating the
+// FAR-10 hang: K8s API drops the connection but the client awaits forever.
+const mockLogFn = vi.fn();
+vi.mock("./k8s-client.js", () => ({
+  getLogApi: () => ({ log: mockLogFn }),
+  getBatchApi: () => ({}),
+  getCoreApi: () => ({}),
+  getAuthzApi: () => ({}),
+  getSelfPodInfo: vi.fn(),
+  resetCache: vi.fn(),
+}));
+
+const { isK8s404, buildPartialRunError, isReattachableOrphan, describePodTerminatedError, streamPodLogsOnce } = await import("./execute.js");

 function makeJob(opts: {
  runId?: string;
@@ -186,3 +201,132 @@ describe("isReattachableOrphan", () => {
    expect(isReattachableOrphan(job, { agentId, taskId, sessionId })).toBe(false);
  });
 });
+
+// Regression: FAR-10 — waitForPod must throw on phase=Failed, not return the pod name.
+// These tests cover describePodTerminatedError, the helper that waitForPod uses to build
+// the error message before throwing.  Verifies that phase=Failed with no claude logs
+// produces a structured, actionable error instead of silently entering the log-stream path.
+describe("describePodTerminatedError", () => {
+  it("includes exit code and reason when claude container status is available", () => {
+    const cs = [
+      {
+        name: "claude",
+        state: { terminated: { exitCode: 137, reason: "OOMKilled" } },
+      },
+    ] as k8s.V1ContainerStatus[];
+    const msg = describePodTerminatedError("mypod", "Failed", cs);
+    expect(msg).toContain("137");
+    expect(msg).toContain("OOMKilled");
+    expect(msg).toContain("phase=Failed");
+  });
+
+  it("falls back to message field when reason is absent", () => {
+    const cs = [
+      {
+        name: "claude",
+        state: { terminated: { exitCode: 1, message: "signal: killed" } },
+      },
+    ] as k8s.V1ContainerStatus[];
+    const msg = describePodTerminatedError("mypod", "Failed", cs);
+    expect(msg).toContain("signal: killed");
+    expect(msg).toContain("1");
+  });
+
+  it("returns generic message when no claude container status is present", () => {
+    const msg = describePodTerminatedError("mypod", "Failed", []);
+    expect(msg).toBe("Pod mypod reached phase=Failed");
+  });
+
+  it("ignores non-claude containers", () => {
+    const cs = [
+      {
+        name: "sidecar",
+        state: { terminated: { exitCode: 0, reason: "Completed" } },
+      },
+    ] as k8s.V1ContainerStatus[];
+    const msg = describePodTerminatedError("mypod", "Failed", cs);
+    expect(msg).toBe("Pod mypod reached phase=Failed");
+  });
+
+  it("handles null exitCode gracefully", () => {
+    const cs = [
+      {
+        name: "claude",
+        state: { terminated: { exitCode: null, reason: "Error" } },
+      },
+    ] as unknown as k8s.V1ContainerStatus[];
+    const msg = describePodTerminatedError("mypod", "Failed", cs);
+    expect(msg).toContain("unknown");
+    expect(msg).toContain("Error");
+  });
+});
+
+// Regression: FAR-10 hardening — streamPodLogsOnce must not hang forever when
+// the K8s client's logApi.log call never resolves.  When stopSignal fires, the
+// bail timer must force-return within LOG_STREAM_BAIL_TIMEOUT_MS (3s in the
+// implementation) so execute() does not get stuck waiting for a dead stream.
+describe("streamPodLogsOnce bail timer", () => {
+  beforeEach(() => {
+    mockLogFn.mockReset();
+    vi.useFakeTimers();
+  });
+  afterEach(() => {
+    vi.useRealTimers();
+  });
+
+  it("returns within the bail window when stopSignal fires during a hung log call", async () => {
+    // logApi.log never resolves — simulates the FAR-10 hang where the K8s
+    // response stream stalls without closing the connection.
+    mockLogFn.mockImplementation((_ns, _pod, _ctr, _writable: Writable) => {
+      return new Promise(() => { /* never resolves */ });
+    });
+
+    const stopSignal = { stopped: false };
+    const onLog = vi.fn().mockResolvedValue(undefined);
+
+    const resultPromise = streamPodLogsOnce(
+      "default",
+      "mypod",
+      onLog,
+      undefined,
+      undefined,
+      undefined,
+      stopSignal,
+    );
+
+    // Fire stopSignal; let the 200ms poller tick and start the bail timer.
+    stopSignal.stopped = true;
+    await vi.advanceTimersByTimeAsync(300);
+
+    // Advance past the 3s bail timeout.  streamPodLogsOnce must now resolve
+    // with an empty string (no chunks were captured) rather than hanging.
+    await vi.advanceTimersByTimeAsync(3_100);
+
+    const result = await resultPromise;
+    expect(result).toBe("");
+    expect(mockLogFn).toHaveBeenCalledOnce();
+  });
+
+  it("returns promptly if logApi.log resolves before stopSignal fires (happy path, no bail involved)", async () => {
+    mockLogFn.mockImplementation(async (_ns, _pod, _ctr, _writable: Writable) => {
+      // Resolve immediately — normal log-stream completion.
+      return undefined;
+    });
+
+    const onLog = vi.fn().mockResolvedValue(undefined);
+
+    // No stopSignal → no bail machinery engaged.
+    const result = await streamPodLogsOnce(
+      "default",
+      "mypod",
+      onLog,
+      undefined,
+      undefined,
+      undefined,
+      undefined,
+    );
+
+    expect(result).toBe("");
+    expect(mockLogFn).toHaveBeenCalledOnce();
+  });
+});
@@ -1,5 +1,15 @@
 import type { AdapterExecutionContext, AdapterExecutionResult } from "@paperclipai/adapter-utils";
-import { asString, asNumber, asBoolean, parseObject } from "@paperclipai/adapter-utils/server-utils";
+import {
+  asString,
+  asNumber,
+  asBoolean,
+  parseObject,
+  readPaperclipRuntimeSkillEntries,
+  resolvePaperclipDesiredSkillNames,
+} from "@paperclipai/adapter-utils/server-utils";
+import fs from "node:fs/promises";
+import path from "node:path";
+import { prepareClaudePromptBundle } from "./prompt-cache.js";
 import {
  parseClaudeStreamJson,
  describeClaudeFailure,
@@ -16,6 +26,15 @@ const POLL_INTERVAL_MS = 2000;
 const KEEPALIVE_INTERVAL_MS = 15_000;
 const LOG_STREAM_RECONNECT_DELAY_MS = 3_000;
 const MAX_LOG_RECONNECT_ATTEMPTS = 50;
+// How long to keep refreshing onSpawn after the Job reaches a terminal state.
+// Covers the cleanup path (delete job, parse stdout) so a slow K8s API call
+// doesn't trip the 5-minute reaper staleness window.
+const POST_TERMINAL_KEEPALIVE_MS = 90_000;
+// Upper bound on how long streamPodLogsOnce will wait after stopSignal fires
+// before force-returning, even if logApi.log has not yet resolved.  Defensive
+// against the K8s client library not propagating writable.destroy() into an
+// abort of the underlying HTTP request.
+const LOG_STREAM_BAIL_TIMEOUT_MS = 3_000;

 /**
 * Detect a Kubernetes 404 (Not Found) error from @kubernetes/client-node.
@@ -96,6 +115,27 @@ export function isReattachableOrphan(
  return true;
 }

+/**
+ * Build an error message for a pod that reached phase=Failed before or
+ * instead of streaming logs. Includes the claude container's terminated exit
+ * code and reason when available so operators can diagnose crashes without
+ * needing kubectl.  Exported for unit tests.
+ */
+export function describePodTerminatedError(
+  podName: string,
+  phase: string,
+  containerStatuses: k8s.V1ContainerStatus[],
+): string {
+  const mainCs = containerStatuses.find((cs) => cs.name === "claude");
+  const terminated = mainCs?.state?.terminated;
+  if (terminated) {
+    const code = terminated.exitCode ?? "unknown";
+    const reason = terminated.reason ?? terminated.message ?? "no reason";
+    return `Pod ${podName} reached phase=${phase}: claude exited ${code} (${reason})`;
+  }
+  return `Pod ${podName} reached phase=${phase}`;
+}
+
 /**
 * Wait for the Job's pod to reach a terminal or running state.
 * Returns the pod name once logs can be streamed, or throws on failure.
@@ -147,15 +187,22 @@ async function waitForPod(
      for (const cs of containerStatuses) {
        if (cs.state?.waiting) details.push(`${cs.name}: waiting (${cs.state.waiting.reason ?? "unknown"})`);
        else if (cs.state?.running) details.push(`${cs.name}: running`);
+        else if (cs.state?.terminated) details.push(`${cs.name}: terminated (exit ${cs.state.terminated.exitCode ?? "?"}, ${cs.state.terminated.reason ?? "no reason"})`);
      }
      await onLog("stdout", `[paperclip] Pod ${podName}: ${details.join(", ")}\n`);
      lastStatus = statusKey;
    }

    // Ready to stream logs
-    if (phase === "Running" || phase === "Succeeded" || phase === "Failed") {
+    if (phase === "Running" || phase === "Succeeded") {
      return podName;
    }
+    // phase=Failed means the pod crashed before we could stream logs.
+    // Throwing here routes the caller into the error path with a structured
+    // message instead of entering the log-streaming path with a dead pod.
+    if (phase === "Failed") {
+      throw new Error(describePodTerminatedError(podName, phase, containerStatuses));
+    }

    // Init containers done + main running (phase may still say Pending briefly)
    const allInitsDone = initStatuses.length > 0 && initStatuses.every(
@@ -211,13 +258,14 @@ async function waitForPod(
 * Stream pod logs once via follow. Returns accumulated stdout when the
 * stream ends (container exit, API disconnect, or abort signal).
 */
-async function streamPodLogsOnce(
+export async function streamPodLogsOnce(
  namespace: string,
  podName: string,
  onLog: AdapterExecutionContext["onLog"],
  kubeconfigPath?: string,
  sinceSeconds?: number,
  dedup?: LogLineDedupFilter,
+  stopSignal?: { stopped: boolean },
 ): Promise<string> {
  const logApi = getLogApi(kubeconfigPath);
  const chunks: string[] = [];
@@ -235,15 +283,48 @@ async function streamPodLogsOnce(
    },
  });

+  // When the job completion signal fires, destroy the writable to abort the
+  // in-flight follow stream.  Without this, logApi.log can hang indefinitely
+  // when the pod terminates without closing the HTTP connection cleanly.
+  let stopPoller: ReturnType<typeof setInterval> | null = null;
+  let bailTimer: ReturnType<typeof setTimeout> | null = null;
+  let bailResolve: (() => void) | null = null;
+  // Bail promise resolves LOG_STREAM_BAIL_TIMEOUT_MS after stopSignal fires,
+  // even if logApi.log has not resolved by then.  This is a safety net for the
+  // case where writable.destroy() fails to propagate to an abort of the HTTP
+  // request (e.g. the K8s client is awaiting a response that never comes).
+  const bailPromise = new Promise<void>((resolve) => {
+    bailResolve = resolve;
+  });
+  if (stopSignal) {
+    stopPoller = setInterval(() => {
+      if (stopSignal.stopped) {
+        if (!writable.destroyed) writable.destroy();
+        if (!bailTimer && bailResolve) {
+          bailTimer = setTimeout(bailResolve, LOG_STREAM_BAIL_TIMEOUT_MS);
+        }
+      }
+    }, 200);
+  }
+
+  const logPromise = logApi.log(namespace, podName, "claude", writable, {
+    follow: true,
+    pretty: false,
+    ...(sinceSeconds ? { sinceSeconds } : {}),
+  }).catch(() => {
+    // follow may fail if the container already exited, the API connection
+    // dropped, or we aborted via writable.destroy() — not fatal.
+  });
+
  try {
-    await logApi.log(namespace, podName, "claude", writable, {
-      follow: true,
-      pretty: false,
-      ...(sinceSeconds ? { sinceSeconds } : {}),
-    });
-  } catch {
-    // follow may fail if the container already exited or the API
-    // connection dropped — not fatal, caller decides whether to retry.
+    if (stopSignal) {
+      await Promise.race([logPromise, bailPromise]);
+    } else {
+      await logPromise;
+    }
+  } finally {
+    if (stopPoller) clearInterval(stopPoller);
+    if (bailTimer) clearTimeout(bailTimer);
  }

  return chunks.join("");
@@ -293,7 +374,7 @@ async function streamPodLogs(
    }

    const preStreamTs = Math.floor(Date.now() / 1000);
-    const result = await streamPodLogsOnce(namespace, podName, onLog, kubeconfigPath, sinceSeconds, dedup);
+    const result = await streamPodLogsOnce(namespace, podName, onLog, kubeconfigPath, sinceSeconds, dedup, stopSignal);
    if (result) {
      allChunks.push(result);
      // Update last-received timestamp to now (the stream just ended,
@@ -548,6 +629,38 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
  let namespace: string;
  let promptSecret: { name: string; namespace: string; data: Record<string, string> } | null = null;

+  // Prepare the prompt bundle (skills + instructions) on the server filesystem.
+  // The K8s Job pod mounts the same PVC at /paperclip, so bundle paths written
+  // here are accessible inside the pod at the identical absolute path.
+  const skillEntries = await readPaperclipRuntimeSkillEntries(config, import.meta.dirname ?? __dirname);
+  const desiredSkillNames = new Set(resolvePaperclipDesiredSkillNames(config, skillEntries));
+  const desiredSkills = skillEntries.filter((e) => desiredSkillNames.has(e.key));
+  const instructionsFilePath = asString(config.instructionsFilePath, "").trim();
+  const instructionsFileDir = instructionsFilePath ? `${path.dirname(instructionsFilePath)}/` : "";
+  let instructionsContents: string | null = null;
+  if (instructionsFilePath) {
+    try {
+      const raw = await fs.readFile(instructionsFilePath, "utf-8");
+      const pathDirective =
+        `\nThe above agent instructions were loaded from ${instructionsFilePath}. ` +
+        `Resolve any relative file references from ${instructionsFileDir}. ` +
+        `This base directory is authoritative for sibling instruction files such as ` +
+        `./HEARTBEAT.md, ./SOUL.md, and ./TOOLS.md; do not resolve those from the parent agent directory.`;
+      instructionsContents = raw + pathDirective;
+    } catch (err) {
+      await onLog(
+        "stderr",
+        `[paperclip] Warning: could not read agent instructions file "${instructionsFilePath}": ${err instanceof Error ? err.message : String(err)}\n`,
+      );
+    }
+  }
+  const promptBundle = await prepareClaudePromptBundle({
+    companyId: ctx.agent.companyId,
+    skills: desiredSkills,
+    instructionsContents,
+    onLog,
+  });
+
  if (reattachTarget) {
    jobName = reattachTarget.jobName;
    namespace = reattachTarget.namespace;
@@ -575,7 +688,7 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
    await onLog("stdout", `[paperclip] Reattaching to in-flight K8s Job ${jobName} in namespace ${namespace} (prior run ${reattachTarget.priorRunId || "unknown"})\n`);
  } else {
    // Build Job manifest
-    const built = buildJobManifest({ ctx, selfPod });
+    const built = buildJobManifest({ ctx, selfPod, promptBundle });
    const job = built.job;
    jobName = built.jobName;
    namespace = built.namespace;
@@ -739,11 +852,27 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
    let lastLogAt = Date.now();
    let keepaliveTick = 0;
    let keepaliveJobTerminal = false;
+    let keepaliveJobTerminalAt: number | null = null;
    keepaliveTimer = setInterval(() => {
      // Fire-and-forget the async work; setInterval callbacks must be
      // synchronous or the timer will drift.
      void (async () => {
-        if (keepaliveJobTerminal) return;
+        if (keepaliveJobTerminal) {
+          // Post-terminal window: keep refreshing onSpawn during cleanup
+          // (job deletion, log parsing, K8s API calls) so the reaper doesn't
+          // fire a false process_lost while execute() is still running.
+          if (
+            ctx.onSpawn &&
+            keepaliveJobTerminalAt !== null &&
+            Date.now() - keepaliveJobTerminalAt <= POST_TERMINAL_KEEPALIVE_MS
+          ) {
+            keepaliveTick++;
+            if (keepaliveTick % 6 === 0) {
+              void ctx.onSpawn({ pid: process.pid, processGroupId: null, startedAt: new Date().toISOString() }).catch(() => {});
+            }
+          }
+          return;
+        }

        // Verify the Job is still alive before announcing or refreshing.
        try {
@@ -753,6 +882,7 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
          );
          if (terminal) {
            keepaliveJobTerminal = true;
+            keepaliveJobTerminalAt = Date.now();
            return;
          }
        } catch (err: unknown) {
@@ -762,6 +892,7 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
          // window as a safety net.
          if (isK8s404(err)) {
            keepaliveJobTerminal = true;
+            keepaliveJobTerminalAt = Date.now();
            return;
          }
          // Log transient errors but leave keepaliveJobTerminal false so
@@ -517,13 +517,66 @@ describe("buildJobManifest", () => {
      expect(claudeArgs).toContain("--dangerously-skip-permissions");
    });

-    it("adds --append-system-prompt-file when instructionsFilePath set", () => {
+    it("adds --append-system-prompt-file (config fallback) when instructionsFilePath set and no session", () => {
      ctx.config = { instructionsFilePath: "/paperclip/instructions.md" };
      const { claudeArgs } = buildJobManifest({ ctx, selfPod });
      expect(claudeArgs).toContain("--append-system-prompt-file");
      expect(claudeArgs).toContain("/paperclip/instructions.md");
    });

+    it("omits --append-system-prompt-file on session resume (avoids token waste)", () => {
+      ctx.config = { instructionsFilePath: "/paperclip/instructions.md" };
+      ctx.runtime.sessionId = "sess_existing";
+      const { claudeArgs } = buildJobManifest({ ctx, selfPod });
+      expect(claudeArgs).not.toContain("--append-system-prompt-file");
+    });
+
+    it("adds --add-dir when promptBundle is provided", () => {
+      const promptBundle = {
+        bundleKey: "abc123",
+        rootDir: "/paperclip/instances/default/companies/co1/claude-prompt-cache/abc123",
+        addDir: "/paperclip/instances/default/companies/co1/claude-prompt-cache/abc123",
+        instructionsFilePath: null,
+      };
+      const { claudeArgs } = buildJobManifest({ ctx, selfPod, promptBundle });
+      expect(claudeArgs).toContain("--add-dir");
+      expect(claudeArgs).toContain(promptBundle.addDir);
+    });
+
+    it("uses bundle instructionsFilePath for --append-system-prompt-file when promptBundle provided", () => {
+      const promptBundle = {
+        bundleKey: "abc123",
+        rootDir: "/paperclip/instances/default/companies/co1/claude-prompt-cache/abc123",
+        addDir: "/paperclip/instances/default/companies/co1/claude-prompt-cache/abc123",
+        instructionsFilePath: "/paperclip/instances/default/companies/co1/claude-prompt-cache/abc123/agent-instructions.md",
+      };
+      ctx.config = { instructionsFilePath: "/raw/path/AGENTS.md" };
+      const { claudeArgs } = buildJobManifest({ ctx, selfPod, promptBundle });
+      expect(claudeArgs).toContain("--append-system-prompt-file");
+      const idx = claudeArgs.indexOf("--append-system-prompt-file");
+      expect(claudeArgs[idx + 1]).toBe(promptBundle.instructionsFilePath);
+      expect(claudeArgs).not.toContain("/raw/path/AGENTS.md");
+    });
+
+    it("omits --append-system-prompt-file from bundle on session resume", () => {
+      const promptBundle = {
+        bundleKey: "abc123",
+        rootDir: "/paperclip/instances/default/companies/co1/claude-prompt-cache/abc123",
+        addDir: "/paperclip/instances/default/companies/co1/claude-prompt-cache/abc123",
+        instructionsFilePath: "/paperclip/instances/default/companies/co1/claude-prompt-cache/abc123/agent-instructions.md",
+      };
+      ctx.runtime.sessionId = "sess_existing";
+      const { claudeArgs } = buildJobManifest({ ctx, selfPod, promptBundle });
+      expect(claudeArgs).not.toContain("--append-system-prompt-file");
+      // --add-dir must still be present even on resume
+      expect(claudeArgs).toContain("--add-dir");
+    });
+
+    it("omits --add-dir when no promptBundle", () => {
+      const { claudeArgs } = buildJobManifest({ ctx, selfPod });
+      expect(claudeArgs).not.toContain("--add-dir");
+    });
+
    it("appends extraArgs when configured", () => {
      ctx.config = { extraArgs: ["--no-input", "--verbose"] };
      const { claudeArgs } = buildJobManifest({ ctx, selfPod });
@@ -10,6 +10,7 @@ import {
  renderTemplate,
 } from "@paperclipai/adapter-utils/server-utils";
 import { createHash } from "node:crypto";
+import type { ClaudePromptBundle } from "./prompt-cache.js";

 /**
 * Build the shell command prefix that installs a native Node.js PostToolUse
@@ -175,6 +176,8 @@ function parseKeyValueConfig(raw: unknown): Record<string, string> {
 export interface JobBuildInput {
  ctx: AdapterExecutionContext;
  selfPod: SelfPodInfo;
+  /** Prepared prompt bundle (skills + instructions). When provided, --add-dir and --append-system-prompt-file use bundle paths. */
+  promptBundle?: ClaudePromptBundle | null;
 }

 /** When the prompt exceeds the env-var size limit, the manifest uses a
@@ -327,7 +330,7 @@ function buildEnvVars(
 }

 export function buildJobManifest(input: JobBuildInput): JobBuildResult {
-  const { ctx, selfPod } = input;
+  const { ctx, selfPod, promptBundle } = input;
  const { runId, agent, runtime, config: rawConfig, context } = ctx;
  const config = parseObject(rawConfig);

@@ -403,14 +406,22 @@ export function buildJobManifest(input: JobBuildInput): JobBuildResult {
  };

  // Build Claude CLI args
-  const instructionsFilePath = asString(config.instructionsFilePath, "").trim();
+  // Prefer the bundle's materialized instructions file over the raw config path.
+  // Never inject --append-system-prompt-file on session resumes — the instructions
+  // are already in the session cache and re-injecting wastes tokens.
+  const rawInstructionsFilePath = asString(config.instructionsFilePath, "").trim();
+  const effectiveInstructionsFilePath =
+    promptBundle?.instructionsFilePath ?? (rawInstructionsFilePath || null);
  const claudeArgs = ["--print", "-", "--output-format", "stream-json", "--verbose"];
  if (runtimeSessionId) claudeArgs.push("--resume", runtimeSessionId);
  if (dangerouslySkipPermissions) claudeArgs.push("--dangerously-skip-permissions");
  if (model) claudeArgs.push("--model", model);
  if (effort) claudeArgs.push("--effort", effort);
  if (maxTurns > 0) claudeArgs.push("--max-turns", String(maxTurns));
-  if (instructionsFilePath) claudeArgs.push("--append-system-prompt-file", instructionsFilePath);
+  if (effectiveInstructionsFilePath && !runtimeSessionId) {
+    claudeArgs.push("--append-system-prompt-file", effectiveInstructionsFilePath);
+  }
+  if (promptBundle) claudeArgs.push("--add-dir", promptBundle.addDir);
  if (extraArgs.length > 0) claudeArgs.push(...extraArgs);

  // Build env vars
@@ -0,0 +1,150 @@
+import { constants as fsConstants } from "node:fs";
+import fs from "node:fs/promises";
+import os from "node:os";
+import path from "node:path";
+import { createHash } from "node:crypto";
+import type { AdapterExecutionContext } from "@paperclipai/adapter-utils";
+import {
+  type PaperclipSkillEntry,
+  ensurePaperclipSkillSymlink,
+} from "@paperclipai/adapter-utils/server-utils";
+
+export interface ClaudePromptBundle {
+  bundleKey: string;
+  /** Absolute path to the bundle root directory (contains .claude/skills/ and agent-instructions.md). */
+  rootDir: string;
+  /** Value to pass as --add-dir to the Claude CLI. */
+  addDir: string;
+  /** Path to the materialized instructions file, or null if no instructions were provided. */
+  instructionsFilePath: string | null;
+}
+
+const DEFAULT_PAPERCLIP_INSTANCE_ID = "default";
+
+function resolveManagedClaudePromptCacheRoot(companyId: string): string {
+  const paperclipHome =
+    (typeof process.env.PAPERCLIP_HOME === "string" && process.env.PAPERCLIP_HOME.trim().length > 0
+      ? process.env.PAPERCLIP_HOME.trim()
+      : null) ??
+    path.resolve(os.homedir(), ".paperclip");
+  const instanceId =
+    (typeof process.env.PAPERCLIP_INSTANCE_ID === "string" && process.env.PAPERCLIP_INSTANCE_ID.trim().length > 0
+      ? process.env.PAPERCLIP_INSTANCE_ID.trim()
+      : null) ?? DEFAULT_PAPERCLIP_INSTANCE_ID;
+  return path.resolve(paperclipHome, "instances", instanceId, "companies", companyId, "claude-prompt-cache");
+}
+
+async function hashPathContents(
+  candidate: string,
+  hash: ReturnType<typeof createHash>,
+  relativePath: string,
+  seenDirectories: Set<string>,
+): Promise<void> {
+  const stat = await fs.lstat(candidate);
+  if (stat.isSymbolicLink()) {
+    hash.update(`symlink:${relativePath}\n`);
+    const resolved = await fs.realpath(candidate).catch(() => null);
+    if (!resolved) {
+      hash.update("missing\n");
+      return;
+    }
+    await hashPathContents(resolved, hash, relativePath, seenDirectories);
+    return;
+  }
+  if (stat.isDirectory()) {
+    const realDir = await fs.realpath(candidate).catch(() => candidate);
+    hash.update(`dir:${relativePath}\n`);
+    if (seenDirectories.has(realDir)) {
+      hash.update("loop\n");
+      return;
+    }
+    seenDirectories.add(realDir);
+    const entries = await fs.readdir(candidate, { withFileTypes: true });
+    entries.sort((a, b) => a.name.localeCompare(b.name));
+    for (const entry of entries) {
+      const childRelativePath = relativePath.length > 0 ? `${relativePath}/${entry.name}` : entry.name;
+      await hashPathContents(path.join(candidate, entry.name), hash, childRelativePath, seenDirectories);
+    }
+    return;
+  }
+  if (stat.isFile()) {
+    hash.update(`file:${relativePath}\n`);
+    hash.update(await fs.readFile(candidate));
+    hash.update("\n");
+    return;
+  }
+  hash.update(`other:${relativePath}:${stat.mode}\n`);
+}
+
+async function buildClaudePromptBundleKey(input: {
+  skills: PaperclipSkillEntry[];
+  instructionsContents: string | null;
+}): Promise<string> {
+  const hash = createHash("sha256");
+  hash.update("paperclip-claude-prompt-bundle:v1\n");
+  if (input.instructionsContents) {
+    hash.update("instructions\n");
+    hash.update(input.instructionsContents);
+    hash.update("\n");
+  } else {
+    hash.update("instructions:none\n");
+  }
+  const sortedSkills = [...input.skills].sort((a, b) => a.runtimeName.localeCompare(b.runtimeName));
+  for (const entry of sortedSkills) {
+    hash.update(`skill:${entry.key}:${entry.runtimeName}\n`);
+    await hashPathContents(entry.source, hash, entry.runtimeName, new Set());
+  }
+  return hash.digest("hex");
+}
+
+async function ensureReadableFile(targetPath: string, contents: string): Promise<void> {
+  try {
+    await fs.access(targetPath, fsConstants.R_OK);
+    return;
+  } catch {
+    // Fall through and materialize the file.
+  }
+  await fs.mkdir(path.dirname(targetPath), { recursive: true });
+  const tempPath = `${targetPath}.${process.pid}.${Date.now()}.tmp`;
+  try {
+    await fs.writeFile(tempPath, contents, "utf8");
+    await fs.rename(tempPath, targetPath);
+  } catch (err) {
+    const targetReadable = await fs.access(targetPath, fsConstants.R_OK).then(() => true).catch(() => false);
+    if (!targetReadable) throw err;
+  } finally {
+    await fs.rm(tempPath, { force: true }).catch(() => {});
+  }
+}
+
+export async function prepareClaudePromptBundle(input: {
+  companyId: string;
+  skills: PaperclipSkillEntry[];
+  instructionsContents: string | null;
+  onLog: AdapterExecutionContext["onLog"];
+}): Promise<ClaudePromptBundle> {
+  const { companyId, skills, instructionsContents, onLog } = input;
+  const bundleKey = await buildClaudePromptBundleKey({ skills, instructionsContents });
+  const rootDir = path.join(resolveManagedClaudePromptCacheRoot(companyId), bundleKey);
+  const skillsHome = path.join(rootDir, ".claude", "skills");
+  await fs.mkdir(skillsHome, { recursive: true });
+
+  for (const entry of skills) {
+    const target = path.join(skillsHome, entry.runtimeName);
+    try {
+      await ensurePaperclipSkillSymlink(entry.source, target);
+    } catch (err) {
+      await onLog(
+        "stderr",
+        `[paperclip] Failed to materialize Claude skill "${entry.key}" into ${skillsHome}: ${err instanceof Error ? err.message : String(err)}\n`,
+      );
+    }
+  }
+
+  const instructionsFilePath = instructionsContents ? path.join(rootDir, "agent-instructions.md") : null;
+  if (instructionsFilePath && instructionsContents) {
+    await ensureReadableFile(instructionsFilePath, instructionsContents);
+  }
+
+  return { bundleKey, rootDir, addDir: rootDir, instructionsFilePath };
+}
@@ -33,7 +33,7 @@ async function buildK8sSkillSnapshot(
    sourcePath: entry.source,
    targetPath: null,
    detail: desiredSet.has(entry.key)
-      ? "Injected via prompt bundle into ephemeral K8s Job pods."
+      ? "Materialized into the PVC-backed Claude prompt bundle before each K8s Job run."
      : null,
    required: Boolean(entry.required),
    requiredReason: entry.requiredReason ?? null,
Author	SHA1	Message	Date
Hugh Commit	baf7e2d44d	0.1.32: port prepareClaudePromptBundle to claude_k8s (FAR-12) Co-Authored-By: Paperclip <noreply@paperclip.ing>	2026-04-23 19:47:26 +00:00
Gandalf the Greybeard	77ed2004f8	fix: port prepareClaudePromptBundle flow to claude_k8s adapter (FAR-11) K8s Job pods were starting without the Paperclip skill loaded, so agents could not find their heartbeat procedure and reported "no issue content in my workspace" on every wake. Root cause: claude_local materialises skills into a PVC-backed prompt-bundle directory and passes --add-dir to Claude, but claude_k8s did neither. Changes: - Add src/server/prompt-cache.ts with prepareClaudePromptBundle (ported from adapter-claude-local). Writes skill symlinks and the agent's instructions file into a content-addressed bundle directory under the shared PVC (/paperclip/instances/.../claude-prompt-cache/<hash>/). - execute.ts: read desired skills and instructions file before building the Job manifest, then call prepareClaudePromptBundle and pass the resulting bundle to buildJobManifest. - job-manifest.ts: accept optional promptBundle in JobBuildInput; when present, pass --add-dir <bundle.addDir> and use bundle.instructionsFilePath for --append-system-prompt-file. Also fix: skip --append-system-prompt-file on session resumes to avoid wasting tokens on re-injection. - skills.ts: correct the detail string to reflect actual materialisation. - job-manifest.test.ts: add 5 new tests covering --add-dir injection, bundle path preference, session-resume skipping, and fallback behaviour. Co-Authored-By: Paperclip <noreply@paperclip.ing>	2026-04-23 19:34:35 +00:00
Gandalf the Greybeard	69d0f4972f	test: regression for streamPodLogsOnce bail timer (FAR-10) Uses vi.mock on k8s-client and vi.useFakeTimers to prove that when logApi.log() never resolves (the FAR-10 hang shape) and stopSignal fires, streamPodLogsOnce still returns within the bail window (LOG_STREAM_BAIL_TIMEOUT_MS). Exports streamPodLogsOnce so the test can call it directly. Also covers the no-stopSignal happy path. 269/269 passing (+2 new). Co-Authored-By: Paperclip <noreply@paperclip.ing>	2026-04-23 16:43:32 +00:00
Gandalf the Greybeard	c7706d742f	0.1.31: harden streamPodLogsOnce with Promise.race bail (FAR-10) Defensive follow-up to the FAR-10 fix. The original patch aborts the in-flight follow stream by destroying the Writable once stopSignal fires, and relies on the @kubernetes/client-node library propagating that destroy into an abort of the underlying HTTP request. If that propagation ever fails (e.g. the client is awaiting a response that never arrives), logApi.log() can still hang forever. Adds a Promise.race with a 3s bail timer that starts when stopSignal fires. In the happy path (destroy-propagation works), logApi.log() resolves first and the bail timer is cleared. In the failure path, the bail timer fires and streamPodLogsOnce returns with whatever chunks were captured — preventing the hang from reaching execute(). No test change: existing 267 tests pass and the race path needs a k8s mock to exercise end-to-end; validated by monitoring real runs. Co-Authored-By: Paperclip <noreply@paperclip.ing>	2026-04-23 16:36:51 +00:00
Gandalf the Greybeard	8937fb2804	chore: fix repo org farhoodliquor→farhoodlabs; wire NPM_TOKEN for publish - Update repository, bugs, and homepage URLs in package.json to use the correct farhoodlabs GitHub org - Add NODE_AUTH_TOKEN: NPM_TOKEN to the CI publish step so the newly added NPM_TOKEN secret is picked up for authentication Co-Authored-By: Paperclip <noreply@paperclip.ing>	2026-04-23 16:20:48 +00:00
Gandalf the Greybeard	77e9aa9b37	ci: switch npm publish to OIDC trusted publishing Replaces NPM_TOKEN secret with id-token: write + --provenance so publishing uses GitHub's OIDC token directly. No repository secret required; provenance attestation is generated automatically. Also collapses the redundant second setup-node step (registry-url is now set on the first one). Co-Authored-By: Paperclip <noreply@paperclip.ing>	2026-04-23 16:10:39 +00:00
Gandalf the Greybeard	683ea2d8b1	0.1.30 Co-Authored-By: Paperclip <noreply@paperclip.ing>	2026-04-23 16:08:22 +00:00
Chris Farhood	dd859c74a8	Merge pull request #9 from farhoodlabs/fix/far-10-process-lost-after-job-complete fix: prevent process_lost when K8s Job completes (FAR-10)	2026-04-23 12:07:33 -04:00
Gandalf the Greybeard	b3c1519cf5	fix: prevent process_lost when K8s Job completes (FAR-10) Four stacked bugs caused the adapter to hang after K8s Job completion, allowing the 5-minute reaper to mark runs process_lost even when the Job actually succeeded. - streamPodLogsOnce: add stopSignal polling loop that destroys the writable every 200ms once the job-completion branch fires, aborting any in-flight follow stream that would otherwise hang indefinitely - waitForPod: treat phase=Failed as a terminal error (throw via describePodTerminatedError) instead of entering the log-stream path with a dead pod (new helper is exported for unit tests) - waitForPod: surface cs.state?.terminated in the per-tick detail line so operators see exit code / reason without needing kubectl - keepalive: add POST_TERMINAL_KEEPALIVE_MS (90s) window after Job goes terminal so onSpawn keeps refreshing updatedAt during cleanup; if execute() genuinely stalls past 90s the reaper will still catch it Regression tests added for describePodTerminatedError (phase=Failed with and without claude container status). Co-Authored-By: Paperclip <noreply@paperclip.ing>	2026-04-23 15:59:51 +00:00