forked from farhoodlabs/paperclip
feat(plugin): add kubernetes sandbox provider
This commit is contained in:
@@ -0,0 +1,61 @@
|
||||
export interface AdapterDefaults {
|
||||
runtimeImage: string;
|
||||
envKeys: string[];
|
||||
allowFqdns: string[];
|
||||
probeCommand: string[];
|
||||
}
|
||||
|
||||
const REGISTRY: Record<string, AdapterDefaults> = {
|
||||
claude_local: {
|
||||
runtimeImage: "ghcr.io/paperclipai/agent-runtime-claude:v1",
|
||||
envKeys: ["ANTHROPIC_API_KEY"],
|
||||
allowFqdns: ["api.anthropic.com"],
|
||||
probeCommand: ["claude", "--version"],
|
||||
},
|
||||
codex_local: {
|
||||
runtimeImage: "ghcr.io/paperclipai/agent-runtime-codex:v1",
|
||||
envKeys: ["OPENAI_API_KEY"],
|
||||
allowFqdns: ["api.openai.com"],
|
||||
probeCommand: ["codex", "--version"],
|
||||
},
|
||||
gemini_local: {
|
||||
runtimeImage: "ghcr.io/paperclipai/agent-runtime-gemini:v1",
|
||||
envKeys: ["GOOGLE_API_KEY", "GEMINI_API_KEY"],
|
||||
allowFqdns: ["generativelanguage.googleapis.com"],
|
||||
probeCommand: ["gemini", "--version"],
|
||||
},
|
||||
cursor_local: {
|
||||
runtimeImage: "ghcr.io/paperclipai/agent-runtime-cursor:v1",
|
||||
envKeys: ["ANTHROPIC_API_KEY", "OPENAI_API_KEY"],
|
||||
allowFqdns: ["api.anthropic.com", "api.openai.com"],
|
||||
probeCommand: ["cursor-agent", "--version"],
|
||||
},
|
||||
opencode_local: {
|
||||
runtimeImage: "ghcr.io/paperclipai/agent-runtime-opencode:v1",
|
||||
envKeys: ["ANTHROPIC_API_KEY", "OPENAI_API_KEY", "OPENROUTER_API_KEY"],
|
||||
allowFqdns: ["api.anthropic.com", "api.openai.com", "openrouter.ai"],
|
||||
probeCommand: ["opencode", "--version"],
|
||||
},
|
||||
acpx_local: {
|
||||
runtimeImage: "ghcr.io/paperclipai/agent-runtime-acpx:v1",
|
||||
envKeys: ["ANTHROPIC_API_KEY", "OPENAI_API_KEY"],
|
||||
allowFqdns: ["api.anthropic.com", "api.openai.com"],
|
||||
probeCommand: ["acpx", "--version"],
|
||||
},
|
||||
pi_local: {
|
||||
runtimeImage: "ghcr.io/paperclipai/agent-runtime-pi:v1",
|
||||
envKeys: ["ANTHROPIC_API_KEY"],
|
||||
allowFqdns: ["api.anthropic.com"],
|
||||
probeCommand: ["pi", "--version"],
|
||||
},
|
||||
};
|
||||
|
||||
export const KNOWN_ADAPTER_TYPES: ReadonlySet<string> = new Set(Object.keys(REGISTRY));
|
||||
|
||||
export function getAdapterDefaults(adapterType: string): AdapterDefaults {
|
||||
const defaults = REGISTRY[adapterType];
|
||||
if (!defaults) {
|
||||
throw new Error(`Unknown adapter type: ${adapterType}`);
|
||||
}
|
||||
return defaults;
|
||||
}
|
||||
@@ -0,0 +1,68 @@
|
||||
export interface BuildCiliumNetworkPolicyInput {
|
||||
namespace: string;
|
||||
paperclipServerNamespace: string;
|
||||
egressAllowFqdns: string[];
|
||||
egressAllowCidrs: string[];
|
||||
}
|
||||
|
||||
// Design note: no ingress rules are defined here. Paperclip-server does NOT
|
||||
// push to agent pods — agents make outbound (egress) callbacks to
|
||||
// paperclip-server on port 3100. If server→agent push is ever needed, add a
|
||||
// targeted ingress rule scoped to the paperclip-server endpoint selector.
|
||||
export function buildCiliumNetworkPolicyManifest(input: BuildCiliumNetworkPolicyInput): Record<string, unknown> {
|
||||
const egress: Record<string, unknown>[] = [];
|
||||
|
||||
egress.push({
|
||||
toEndpoints: [
|
||||
{ matchLabels: { "k8s:io.kubernetes.pod.namespace": "kube-system", "k8s-app": "kube-dns" } },
|
||||
],
|
||||
toPorts: [
|
||||
{
|
||||
ports: [
|
||||
{ port: "53", protocol: "UDP" },
|
||||
{ port: "53", protocol: "TCP" },
|
||||
],
|
||||
rules: { dns: [{ matchPattern: "*" }] },
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
if (input.egressAllowFqdns.length > 0) {
|
||||
egress.push({
|
||||
toFQDNs: input.egressAllowFqdns.map((fqdn) => ({ matchName: fqdn })),
|
||||
toPorts: [{ ports: [{ port: "443", protocol: "TCP" }] }],
|
||||
});
|
||||
}
|
||||
|
||||
egress.push({
|
||||
toEndpoints: [
|
||||
{
|
||||
matchLabels: {
|
||||
"k8s:io.kubernetes.pod.namespace": input.paperclipServerNamespace,
|
||||
app: "paperclip-server",
|
||||
},
|
||||
},
|
||||
],
|
||||
toPorts: [{ ports: [{ port: "3100", protocol: "TCP" }] }],
|
||||
});
|
||||
|
||||
if (input.egressAllowCidrs.length > 0) {
|
||||
egress.push({
|
||||
toCIDRSet: input.egressAllowCidrs.map((cidr) => ({ cidr })),
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
apiVersion: "cilium.io/v2",
|
||||
kind: "CiliumNetworkPolicy",
|
||||
metadata: {
|
||||
name: "paperclip-egress-fqdn",
|
||||
namespace: input.namespace,
|
||||
labels: { "paperclip.io/managed-by": "paperclip-k8s-plugin" },
|
||||
},
|
||||
spec: {
|
||||
endpointSelector: { matchLabels: { "paperclip.io/role": "agent" } },
|
||||
egress,
|
||||
},
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,59 @@
|
||||
/**
|
||||
* Glob matching for image references.
|
||||
* - `*` matches any sequence of characters EXCEPT `/` (so a wildcard doesn't span path segments)
|
||||
* - `?` matches exactly one character (excluding `/`)
|
||||
*/
|
||||
export function globMatch(pattern: string, value: string): boolean {
|
||||
const re = new RegExp(
|
||||
"^" +
|
||||
pattern
|
||||
.replace(/[.+^${}()|[\]\\]/g, "\\$&")
|
||||
.replace(/\*/g, "[^/]*")
|
||||
.replace(/\?/g, "[^/]") +
|
||||
"$",
|
||||
);
|
||||
return re.test(value);
|
||||
}
|
||||
|
||||
export interface ResolveImageInput {
|
||||
imageOverride?: string | null;
|
||||
}
|
||||
|
||||
export interface ResolveImageDefaults {
|
||||
runtimeImage: string;
|
||||
}
|
||||
|
||||
export interface ResolveImageConfig {
|
||||
imageAllowList: string[];
|
||||
imageRegistry?: string;
|
||||
}
|
||||
|
||||
export function resolveImage(
|
||||
target: ResolveImageInput,
|
||||
defaults: ResolveImageDefaults,
|
||||
config: ResolveImageConfig,
|
||||
): string {
|
||||
if (target.imageOverride) {
|
||||
if (!config.imageAllowList.some((p) => globMatch(p, target.imageOverride!))) {
|
||||
throw new Error(`Image override "${target.imageOverride}" is not in allowlist`);
|
||||
}
|
||||
return target.imageOverride;
|
||||
}
|
||||
if (config.imageRegistry) {
|
||||
return rewriteRegistry(defaults.runtimeImage, config.imageRegistry);
|
||||
}
|
||||
return defaults.runtimeImage;
|
||||
}
|
||||
|
||||
function rewriteRegistry(image: string, registry: string): string {
|
||||
// image is like "ghcr.io/paperclipai/agent-runtime-claude:v1"
|
||||
// we want to replace the first two path segments (host + org) with `registry`
|
||||
const cleanRegistry = registry.replace(/\/+$/, "");
|
||||
const colonIdx = image.lastIndexOf(":");
|
||||
const tag = colonIdx >= 0 ? image.slice(colonIdx) : "";
|
||||
const path = colonIdx >= 0 ? image.slice(0, colonIdx) : image;
|
||||
const segments = path.split("/");
|
||||
// Strip the host+org (first two segments), keep the image name
|
||||
const imageName = segments.slice(2).join("/") || segments[segments.length - 1];
|
||||
return `${cleanRegistry}/${imageName}${tag}`;
|
||||
}
|
||||
@@ -0,0 +1,2 @@
|
||||
export { default as manifest } from "./manifest.js";
|
||||
export { default as plugin } from "./plugin.js";
|
||||
@@ -0,0 +1,129 @@
|
||||
import type { KubeClients } from "./kube-client.js";
|
||||
import type { SandboxOrchestrator, SandboxStatus } from "./sandbox-orchestrator.js";
|
||||
|
||||
export class JobTimeoutError extends Error {
|
||||
constructor(namespace: string, name: string, timeoutMs: number) {
|
||||
super(`Job ${namespace}/${name} did not complete within ${timeoutMs}ms`);
|
||||
this.name = "JobTimeoutError";
|
||||
}
|
||||
}
|
||||
|
||||
export async function createJob(
|
||||
clients: KubeClients,
|
||||
namespace: string,
|
||||
manifest: Record<string, unknown>,
|
||||
): Promise<{ uid: string }> {
|
||||
const result = await clients.batch.createNamespacedJob({ namespace, body: manifest as never });
|
||||
const uid = (result as { metadata?: { uid?: string } }).metadata?.uid;
|
||||
if (!uid) throw new Error("Job created without a UID");
|
||||
return { uid };
|
||||
}
|
||||
|
||||
export type JobStatus = SandboxStatus;
|
||||
|
||||
export async function getJobStatus(
|
||||
clients: KubeClients,
|
||||
namespace: string,
|
||||
name: string,
|
||||
): Promise<JobStatus> {
|
||||
const result = await clients.batch.readNamespacedJobStatus({ namespace, name });
|
||||
const body = (result as Record<string, unknown>) ?? {};
|
||||
const status = (body.status as Record<string, unknown>) ?? {};
|
||||
const active = (status.active as number) ?? 0;
|
||||
const succeeded = (status.succeeded as number) ?? 0;
|
||||
const failed = (status.failed as number) ?? 0;
|
||||
const conditions = (status.conditions as { type: string; status: string; reason?: string; message?: string }[]) ?? [];
|
||||
const completed = conditions.find((c) => c.type === "Complete" && c.status === "True");
|
||||
const failedCond = conditions.find((c) => c.type === "Failed" && c.status === "True");
|
||||
if (failedCond || failed > 0) {
|
||||
return { phase: "Failed", complete: false, active, succeeded, failed, reason: failedCond?.reason, message: failedCond?.message };
|
||||
}
|
||||
if (completed || succeeded > 0) {
|
||||
return { phase: "Succeeded", complete: true, active, succeeded, failed };
|
||||
}
|
||||
if (active > 0) {
|
||||
return { phase: "Running", complete: false, active, succeeded, failed };
|
||||
}
|
||||
return { phase: "Pending", complete: false, active, succeeded, failed };
|
||||
}
|
||||
|
||||
export async function findPodForJob(
|
||||
clients: KubeClients,
|
||||
namespace: string,
|
||||
jobName: string,
|
||||
): Promise<string | null> {
|
||||
const result = await clients.core.listNamespacedPod({
|
||||
namespace,
|
||||
labelSelector: `job-name=${jobName}`,
|
||||
});
|
||||
const items = ((result as { items?: { metadata?: { name?: string }; status?: { phase?: string } }[] }).items) ?? [];
|
||||
const running = items.find((p) => p.status?.phase === "Running");
|
||||
return (running ?? items[0])?.metadata?.name ?? null;
|
||||
}
|
||||
|
||||
export async function streamPodLogs(
|
||||
clients: KubeClients,
|
||||
namespace: string,
|
||||
podName: string,
|
||||
onChunk: (stream: "stdout" | "stderr", text: string) => Promise<void>,
|
||||
): Promise<void> {
|
||||
// V1 limitation: the Pod log API returns the container's combined log stream.
|
||||
// Kubernetes does not preserve stdout/stderr channel separation after the
|
||||
// container runtime writes logs, so the Job backend reports combined logs on
|
||||
// stdout. The sandbox-cr backend uses exec and keeps streams separate.
|
||||
const result = await clients.core.readNamespacedPodLog({ namespace, name: podName });
|
||||
const text = readPodLogText(result);
|
||||
if (text.length > 0) await onChunk("stdout", text);
|
||||
}
|
||||
|
||||
function readPodLogText(result: unknown): string {
|
||||
if (typeof result === "string") return result;
|
||||
const body = (result as { body?: unknown })?.body;
|
||||
return typeof body === "string" ? body : "";
|
||||
}
|
||||
|
||||
export async function deleteJob(
|
||||
clients: KubeClients,
|
||||
namespace: string,
|
||||
name: string,
|
||||
): Promise<void> {
|
||||
await clients.batch.deleteNamespacedJob({
|
||||
namespace,
|
||||
name,
|
||||
propagationPolicy: "Foreground",
|
||||
});
|
||||
}
|
||||
|
||||
export async function waitForJobCompletion(
|
||||
clients: KubeClients,
|
||||
namespace: string,
|
||||
name: string,
|
||||
opts: { timeoutMs: number; pollMs?: number } = { timeoutMs: 120_000, pollMs: 2000 },
|
||||
): Promise<JobStatus> {
|
||||
const deadline = Date.now() + opts.timeoutMs;
|
||||
const pollMs = opts.pollMs ?? 2000;
|
||||
while (Date.now() < deadline) {
|
||||
const status = await getJobStatus(clients, namespace, name);
|
||||
if (status.phase === "Succeeded" || status.phase === "Failed") return status;
|
||||
await sleep(pollMs);
|
||||
}
|
||||
throw new JobTimeoutError(namespace, name, opts.timeoutMs);
|
||||
}
|
||||
|
||||
function sleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
/**
|
||||
* Job-backed conformance to SandboxOrchestrator. Plugin.ts imports THIS value
|
||||
* (the swap point) — to use a different backend, swap this import for another
|
||||
* module exposing a SandboxOrchestrator-shaped default export.
|
||||
*/
|
||||
export const jobOrchestrator: SandboxOrchestrator = {
|
||||
claim: createJob,
|
||||
getStatus: getJobStatus,
|
||||
findPod: findPodForJob,
|
||||
streamLogs: streamPodLogs,
|
||||
release: deleteJob,
|
||||
waitForCompletion: waitForJobCompletion,
|
||||
};
|
||||
@@ -0,0 +1,44 @@
|
||||
import {
|
||||
KubeConfig,
|
||||
CoreV1Api,
|
||||
BatchV1Api,
|
||||
CustomObjectsApi,
|
||||
NetworkingV1Api,
|
||||
RbacAuthorizationV1Api,
|
||||
} from "@kubernetes/client-node";
|
||||
|
||||
export interface CreateKubeConfigInput {
|
||||
inCluster?: boolean;
|
||||
kubeconfig?: string;
|
||||
}
|
||||
|
||||
export function createKubeConfig(input: CreateKubeConfigInput): KubeConfig {
|
||||
const kc = new KubeConfig();
|
||||
if (input.inCluster) {
|
||||
kc.loadFromCluster();
|
||||
return kc;
|
||||
}
|
||||
if (input.kubeconfig && input.kubeconfig.trim().length > 0) {
|
||||
kc.loadFromString(input.kubeconfig);
|
||||
return kc;
|
||||
}
|
||||
throw new Error("createKubeConfig requires either inCluster=true or a kubeconfig string");
|
||||
}
|
||||
|
||||
export interface KubeClients {
|
||||
core: CoreV1Api;
|
||||
batch: BatchV1Api;
|
||||
custom: CustomObjectsApi;
|
||||
networking: NetworkingV1Api;
|
||||
rbac: RbacAuthorizationV1Api;
|
||||
}
|
||||
|
||||
export function makeKubeClients(kc: KubeConfig): KubeClients {
|
||||
return {
|
||||
core: kc.makeApiClient(CoreV1Api),
|
||||
batch: kc.makeApiClient(BatchV1Api),
|
||||
custom: kc.makeApiClient(CustomObjectsApi),
|
||||
networking: kc.makeApiClient(NetworkingV1Api),
|
||||
rbac: kc.makeApiClient(RbacAuthorizationV1Api),
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,122 @@
|
||||
import type { PaperclipPluginManifestV1 } from "@paperclipai/plugin-sdk";
|
||||
|
||||
const PLUGIN_ID = "paperclip.kubernetes-sandbox-provider";
|
||||
const PLUGIN_VERSION = "0.1.0-alpha.1";
|
||||
|
||||
const manifest: PaperclipPluginManifestV1 = {
|
||||
id: PLUGIN_ID,
|
||||
apiVersion: 1,
|
||||
version: PLUGIN_VERSION,
|
||||
displayName: "Kubernetes Sandbox (alpha)",
|
||||
description:
|
||||
"Built on kubernetes-sigs/agent-sandbox (v1alpha1). ALPHA — expect breaking changes as the upstream CRD evolves. Falls back to stable batch/v1 Job mode for clusters without agent-sandbox installed. First-party Paperclip sandbox-provider plugin for Kubernetes.",
|
||||
author: "Paperclip",
|
||||
categories: ["automation"],
|
||||
capabilities: ["environment.drivers.register"],
|
||||
entrypoints: {
|
||||
worker: "./dist/worker.js",
|
||||
},
|
||||
environmentDrivers: [
|
||||
{
|
||||
driverKey: "kubernetes",
|
||||
kind: "sandbox_provider",
|
||||
displayName: "Kubernetes",
|
||||
description:
|
||||
"Dispatches agent runs in per-tenant Kubernetes namespaces. Default backend (sandbox-cr, alpha) uses kubernetes-sigs/agent-sandbox for multi-command exec; fallback backend (job) uses stable batch/v1 Job for clusters without agent-sandbox installed.",
|
||||
configSchema: {
|
||||
type: "object",
|
||||
properties: {
|
||||
inCluster: {
|
||||
type: "boolean",
|
||||
description:
|
||||
"When true, the plugin uses the in-pod ServiceAccount credentials. Requires paperclip-server to be running inside the target cluster.",
|
||||
},
|
||||
kubeconfig: {
|
||||
type: "string",
|
||||
format: "secret-ref",
|
||||
description:
|
||||
"Inline kubeconfig YAML. Paste a kubeconfig or an existing Paperclip secret reference; pasted values are stored as company secrets.",
|
||||
},
|
||||
namespacePrefix: {
|
||||
type: "string",
|
||||
description: "Prefix for the per-company tenant namespace (default: paperclip-).",
|
||||
},
|
||||
companySlug: {
|
||||
type: "string",
|
||||
description: "Override the auto-derived company slug used in the tenant namespace name.",
|
||||
},
|
||||
imageRegistry: {
|
||||
type: "string",
|
||||
description: "Override the default registry for agent runtime images (default: ghcr.io/paperclipai).",
|
||||
},
|
||||
imageAllowList: {
|
||||
type: "array",
|
||||
items: { type: "string" },
|
||||
description:
|
||||
"Glob patterns of allowed `target.imageOverride` values. Empty list = no override permitted.",
|
||||
},
|
||||
imagePullSecrets: {
|
||||
type: "array",
|
||||
items: { type: "string" },
|
||||
description: "Names of pre-created Docker image pull secrets in the tenant namespace.",
|
||||
},
|
||||
egressAllowFqdns: {
|
||||
type: "array",
|
||||
items: { type: "string" },
|
||||
description:
|
||||
"Additional FQDNs to allow egress to from agent pods. Adapter-default FQDNs (e.g. api.anthropic.com) are added automatically.",
|
||||
},
|
||||
egressAllowCidrs: {
|
||||
type: "array",
|
||||
items: { type: "string" },
|
||||
description: "Additional CIDRs to allow egress to from agent pods.",
|
||||
},
|
||||
egressMode: {
|
||||
type: "string",
|
||||
enum: ["standard", "cilium"],
|
||||
description:
|
||||
"Network policy mode. `standard` uses NetworkPolicy and allows public HTTPS when adapter FQDNs are configured; `cilium` enables exact FQDN egress filtering via CiliumNetworkPolicy.",
|
||||
},
|
||||
runtimeClassName: {
|
||||
type: "string",
|
||||
description:
|
||||
"Optional RuntimeClass for pod isolation (e.g. `kata-fc` for Firecracker-backed microVMs). Cluster must have the RuntimeClass installed.",
|
||||
},
|
||||
serviceAccountAnnotations: {
|
||||
type: "object",
|
||||
additionalProperties: { type: "string" },
|
||||
description:
|
||||
"Annotations applied to the per-tenant ServiceAccount (e.g. `eks.amazonaws.com/role-arn` for IRSA).",
|
||||
},
|
||||
jobTtlSecondsAfterFinished: {
|
||||
type: "integer",
|
||||
minimum: 0,
|
||||
description: "Seconds after a Job completes before it is garbage-collected (default: 900).",
|
||||
},
|
||||
podActivityDeadlineSec: {
|
||||
type: "integer",
|
||||
minimum: 1,
|
||||
description: "Hard ceiling on a single run's wall-clock time (default: 3600).",
|
||||
},
|
||||
adapterType: {
|
||||
type: "string",
|
||||
description:
|
||||
"The adapter type that Jobs in this environment will run (e.g. `claude_local`, `codex_local`). Defaults to `claude_local`. Each environment is bound to one adapter; create multiple environments for different adapters.",
|
||||
},
|
||||
backend: {
|
||||
type: "string",
|
||||
enum: ["sandbox-cr", "job"],
|
||||
description:
|
||||
"sandbox-cr (default, alpha — requires kubernetes-sigs/agent-sandbox installed) | job (stable fallback — batch/v1 Job, one-shot entrypoint, no multi-command exec)",
|
||||
},
|
||||
},
|
||||
anyOf: [
|
||||
{ required: ["inCluster"] },
|
||||
{ required: ["kubeconfig"] },
|
||||
],
|
||||
},
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
export default manifest;
|
||||
@@ -0,0 +1,101 @@
|
||||
export interface BuildNetworkPolicyInput {
|
||||
namespace: string;
|
||||
paperclipServerNamespace: string;
|
||||
egressAllowFqdns: string[];
|
||||
egressAllowCidrs: string[];
|
||||
}
|
||||
|
||||
const PUBLIC_IPV4_EXCEPTIONS = [
|
||||
"10.0.0.0/8",
|
||||
"100.64.0.0/10",
|
||||
"127.0.0.0/8",
|
||||
"169.254.0.0/16",
|
||||
"172.16.0.0/12",
|
||||
"192.168.0.0/16",
|
||||
];
|
||||
|
||||
// Design note: the deny-all baseline blocks all ingress to agent pods.
|
||||
// Paperclip-server does NOT push to agent pods — the agent shim makes
|
||||
// outbound calls to paperclip-server via the egress allow-list (port 3100).
|
||||
// This pull/callback model means no ingress rule is needed. If a future
|
||||
// feature requires server→agent push (e.g. forced shutdown, live exec),
|
||||
// add a targeted ingress rule here scoped to the paperclip-server pod
|
||||
// selector.
|
||||
//
|
||||
// Standard Kubernetes NetworkPolicy cannot express FQDN allow-lists. When
|
||||
// adapter defaults require FQDN egress, keep runs functional by allowing public
|
||||
// IPv4 HTTPS while excluding private/link-local ranges. Operators who need
|
||||
// exact FQDN enforcement should use egressMode="cilium".
|
||||
export function buildNetworkPolicyManifests(input: BuildNetworkPolicyInput): Record<string, unknown>[] {
|
||||
const fqdnsRequirePublicHttpsFallback = input.egressAllowFqdns.length > 0;
|
||||
const denyAll = {
|
||||
apiVersion: "networking.k8s.io/v1",
|
||||
kind: "NetworkPolicy",
|
||||
metadata: {
|
||||
name: "paperclip-deny-all",
|
||||
namespace: input.namespace,
|
||||
labels: { "paperclip.io/managed-by": "paperclip-k8s-plugin" },
|
||||
},
|
||||
spec: {
|
||||
podSelector: {},
|
||||
policyTypes: ["Ingress", "Egress"],
|
||||
},
|
||||
};
|
||||
|
||||
const egressAllow: Record<string, unknown> = {
|
||||
apiVersion: "networking.k8s.io/v1",
|
||||
kind: "NetworkPolicy",
|
||||
metadata: {
|
||||
name: "paperclip-egress-allow",
|
||||
namespace: input.namespace,
|
||||
labels: { "paperclip.io/managed-by": "paperclip-k8s-plugin" },
|
||||
},
|
||||
spec: {
|
||||
podSelector: { matchLabels: { "paperclip.io/role": "agent" } },
|
||||
policyTypes: ["Egress"],
|
||||
egress: [
|
||||
{
|
||||
to: [
|
||||
{
|
||||
namespaceSelector: { matchLabels: { "kubernetes.io/metadata.name": "kube-system" } },
|
||||
podSelector: { matchLabels: { "k8s-app": "kube-dns" } },
|
||||
},
|
||||
],
|
||||
ports: [
|
||||
{ protocol: "UDP", port: 53 },
|
||||
{ protocol: "TCP", port: 53 },
|
||||
],
|
||||
},
|
||||
{
|
||||
to: [
|
||||
{
|
||||
namespaceSelector: { matchLabels: { "kubernetes.io/metadata.name": input.paperclipServerNamespace } },
|
||||
podSelector: { matchLabels: { app: "paperclip-server" } },
|
||||
},
|
||||
],
|
||||
ports: [{ protocol: "TCP", port: 3100 }],
|
||||
},
|
||||
...(fqdnsRequirePublicHttpsFallback
|
||||
? [
|
||||
{
|
||||
to: [
|
||||
{
|
||||
ipBlock: {
|
||||
cidr: "0.0.0.0/0",
|
||||
except: PUBLIC_IPV4_EXCEPTIONS,
|
||||
},
|
||||
},
|
||||
],
|
||||
ports: [{ protocol: "TCP", port: 443 }],
|
||||
},
|
||||
]
|
||||
: []),
|
||||
...input.egressAllowCidrs.map((cidr) => ({
|
||||
to: [{ ipBlock: { cidr } }],
|
||||
})),
|
||||
],
|
||||
},
|
||||
};
|
||||
|
||||
return [denyAll, egressAllow];
|
||||
}
|
||||
@@ -0,0 +1,554 @@
|
||||
import { randomBytes } from "node:crypto";
|
||||
import { definePlugin } from "@paperclipai/plugin-sdk";
|
||||
import type {
|
||||
PluginEnvironmentAcquireLeaseParams,
|
||||
PluginEnvironmentExecuteParams,
|
||||
PluginEnvironmentExecuteResult,
|
||||
PluginEnvironmentLease,
|
||||
PluginEnvironmentProbeParams,
|
||||
PluginEnvironmentProbeResult,
|
||||
PluginEnvironmentRealizeWorkspaceParams,
|
||||
PluginEnvironmentRealizeWorkspaceResult,
|
||||
PluginEnvironmentReleaseLeaseParams,
|
||||
PluginEnvironmentValidateConfigParams,
|
||||
PluginEnvironmentValidationResult,
|
||||
} from "@paperclipai/plugin-sdk";
|
||||
import {
|
||||
kubernetesProviderConfigSchema,
|
||||
type KubernetesProviderConfig,
|
||||
type KubernetesLeaseMetadata,
|
||||
} from "./types.js";
|
||||
import { createKubeConfig, makeKubeClients } from "./kube-client.js";
|
||||
import { getAdapterDefaults } from "./adapter-defaults.js";
|
||||
import { resolveImage } from "./image-allowlist.js";
|
||||
import { buildJobManifest } from "./pod-spec-builder.js";
|
||||
import { buildSandboxCrManifest } from "./sandbox-cr-builder.js";
|
||||
import { ensureTenant } from "./tenant-orchestrator.js";
|
||||
import { createPerRunSecret } from "./secret-manager.js";
|
||||
import { jobOrchestrator, JobTimeoutError } from "./job-orchestrator.js";
|
||||
import {
|
||||
sandboxCrOrchestrator,
|
||||
SandboxCrTimeoutError,
|
||||
} from "./sandbox-cr-orchestrator.js";
|
||||
import { execInPod } from "./pod-exec.js";
|
||||
import {
|
||||
deriveCompanySlug,
|
||||
deriveNamespaceName,
|
||||
newRunUlidDns,
|
||||
paperclipLabels,
|
||||
} from "./utils.js";
|
||||
|
||||
// The namespace paperclip-server itself runs in. Used when building
|
||||
// NetworkPolicy manifests so the tenant namespace allows inbound traffic
|
||||
// from the server pod.
|
||||
const PAPERCLIP_SERVER_NAMESPACE = "paperclip";
|
||||
|
||||
// Name of the ServiceAccount created inside each tenant namespace by ensureTenant.
|
||||
const TENANT_SERVICE_ACCOUNT = "paperclip-tenant-sa";
|
||||
|
||||
// Resource quota defaults applied to every tenant namespace (M4b; tunable via
|
||||
// config in a future milestone).
|
||||
const DEFAULT_RESOURCE_QUOTA = {
|
||||
pods: "20",
|
||||
requestsCpu: "10",
|
||||
requestsMemory: "20Gi",
|
||||
limitsCpu: "20",
|
||||
limitsMemory: "40Gi",
|
||||
};
|
||||
|
||||
function deriveTenantNamespace(config: KubernetesProviderConfig, companyId: string): string {
|
||||
// TODO: future versions could thread companyName through AcquireLeaseParams
|
||||
// to get a friendlier slug (e.g. "acme-corp") instead of the UUID-derived one.
|
||||
const slug = config.companySlug ?? deriveCompanySlug(companyId);
|
||||
return deriveNamespaceName(config.namespacePrefix, slug);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads adapter env keys (e.g. ANTHROPIC_API_KEY) from the current process
|
||||
* environment. The plugin worker runs inside paperclip-server's pod, which has
|
||||
* these vars injected at deploy time.
|
||||
*
|
||||
* M4b approach: env vars sourced from process.env at acquire time.
|
||||
* TODO: future milestones may thread per-run secrets differently (e.g. via
|
||||
* a secret store reference on the environment config).
|
||||
*/
|
||||
function extractAdapterEnvFromProcess(envKeys: string[]): Record<string, string> {
|
||||
const out: Record<string, string> = {};
|
||||
for (const k of envKeys) {
|
||||
const v = process.env[k];
|
||||
if (v) out[k] = v;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
function generateBootstrapToken(): string {
|
||||
// TODO: paperclip-server's actual callback auth scheme is separate and is
|
||||
// out of M4b scope. This per-run random token is stored in the per-run
|
||||
// Secret and consumed by paperclip-agent-shim for initial registration.
|
||||
return randomBytes(32).toString("hex");
|
||||
}
|
||||
|
||||
const plugin = definePlugin({
|
||||
async setup(ctx) {
|
||||
ctx.logger.info("Kubernetes sandbox provider plugin ready");
|
||||
},
|
||||
|
||||
async onHealth() {
|
||||
return { status: "ok", message: "Kubernetes sandbox provider plugin healthy" };
|
||||
},
|
||||
|
||||
async onEnvironmentValidateConfig(
|
||||
params: PluginEnvironmentValidateConfigParams,
|
||||
): Promise<PluginEnvironmentValidationResult> {
|
||||
const parsed = kubernetesProviderConfigSchema.safeParse(params.config);
|
||||
if (!parsed.success) {
|
||||
return {
|
||||
ok: false,
|
||||
errors: parsed.error.issues.map((i) => i.message),
|
||||
};
|
||||
}
|
||||
const warnings: string[] = [];
|
||||
const cfg = parsed.data;
|
||||
const adapterDefaults = getAdapterDefaults(cfg.adapterType);
|
||||
const totalFqdns = [...adapterDefaults.allowFqdns, ...cfg.egressAllowFqdns];
|
||||
if (cfg.egressMode === "standard" && totalFqdns.length > 0) {
|
||||
warnings.push(
|
||||
`egressMode=standard cannot enforce FQDN-based egress rules for ${totalFqdns.join(", ")}. Agent pods will get public IPv4 HTTPS egress with private/link-local ranges excluded. Switch egressMode to "cilium" for exact FQDN enforcement.`,
|
||||
);
|
||||
}
|
||||
return { ok: true, normalizedConfig: cfg as Record<string, unknown>, warnings: warnings.length > 0 ? warnings : undefined };
|
||||
},
|
||||
|
||||
async onEnvironmentProbe(
|
||||
params: PluginEnvironmentProbeParams,
|
||||
): Promise<PluginEnvironmentProbeResult> {
|
||||
const parsed = kubernetesProviderConfigSchema.safeParse(params.config);
|
||||
if (!parsed.success) {
|
||||
return {
|
||||
ok: false,
|
||||
summary: "Invalid Kubernetes provider configuration.",
|
||||
metadata: {
|
||||
errors: parsed.error.issues.map((i) => i.message),
|
||||
},
|
||||
};
|
||||
}
|
||||
const config = parsed.data;
|
||||
const namespace = deriveTenantNamespace(config, params.companyId);
|
||||
|
||||
try {
|
||||
const kc = createKubeConfig({
|
||||
inCluster: config.inCluster,
|
||||
kubeconfig: config.kubeconfig,
|
||||
});
|
||||
const clients = makeKubeClients(kc);
|
||||
// Reachability check: list pods in the tenant namespace. If the namespace
|
||||
// doesn't exist yet this will throw a 404 which we treat as "reachable
|
||||
// but namespace not provisioned" — still a successful probe.
|
||||
try {
|
||||
await clients.core.listNamespacedPod({ namespace });
|
||||
} catch (err) {
|
||||
const code = (err as { code?: number; statusCode?: number }).code
|
||||
?? (err as { code?: number; statusCode?: number }).statusCode;
|
||||
if (code !== 404) throw err;
|
||||
// 404 means namespace doesn't exist yet — cluster is reachable.
|
||||
}
|
||||
return {
|
||||
ok: true,
|
||||
summary: `Kubernetes cluster reachable. Tenant namespace: ${namespace}.`,
|
||||
metadata: { namespace, provider: "kubernetes" },
|
||||
};
|
||||
} catch (err) {
|
||||
return {
|
||||
ok: false,
|
||||
summary: "Kubernetes cluster probe failed.",
|
||||
metadata: {
|
||||
namespace,
|
||||
provider: "kubernetes",
|
||||
error: err instanceof Error ? err.message : String(err),
|
||||
},
|
||||
};
|
||||
}
|
||||
},
|
||||
|
||||
async onEnvironmentAcquireLease(
|
||||
params: PluginEnvironmentAcquireLeaseParams,
|
||||
): Promise<PluginEnvironmentLease> {
|
||||
const config = kubernetesProviderConfigSchema.parse(params.config);
|
||||
const namespace = deriveTenantNamespace(config, params.companyId);
|
||||
|
||||
// Emit a runtime warning if FQDNs are configured but egressMode=standard
|
||||
// cannot enforce them. Mirrors the validateConfig warning so operators see
|
||||
// it in paperclip-server logs even if they missed the validation step.
|
||||
const adapterDefaultsForWarn = getAdapterDefaults(config.adapterType);
|
||||
const totalFqdnsForWarn = [...adapterDefaultsForWarn.allowFqdns, ...config.egressAllowFqdns];
|
||||
if (config.egressMode === "standard" && totalFqdnsForWarn.length > 0) {
|
||||
console.warn(
|
||||
`[plugin-kubernetes] egressMode=standard cannot enforce FQDN-based egress rules for ${totalFqdnsForWarn.join(", ")}. Agent pods will get public IPv4 HTTPS egress with private/link-local ranges excluded. Switch egressMode to "cilium" for exact FQDN enforcement.`,
|
||||
);
|
||||
}
|
||||
|
||||
const kc = createKubeConfig({
|
||||
inCluster: config.inCluster,
|
||||
kubeconfig: config.kubeconfig,
|
||||
});
|
||||
const clients = makeKubeClients(kc);
|
||||
|
||||
// Ensure the tenant namespace and all its RBAC / network policy resources
|
||||
// exist before we try to create the Job.
|
||||
const adapterDefaults = getAdapterDefaults(config.adapterType);
|
||||
|
||||
await ensureTenant(clients, {
|
||||
namespace,
|
||||
companyId: params.companyId,
|
||||
paperclipServerNamespace: PAPERCLIP_SERVER_NAMESPACE,
|
||||
serviceAccountAnnotations: config.serviceAccountAnnotations,
|
||||
egressMode: config.egressMode,
|
||||
egressAllowFqdns: [...adapterDefaults.allowFqdns, ...config.egressAllowFqdns],
|
||||
egressAllowCidrs: config.egressAllowCidrs,
|
||||
resourceQuota: DEFAULT_RESOURCE_QUOTA,
|
||||
});
|
||||
|
||||
const jobName = `pc-${newRunUlidDns()}`;
|
||||
const secretName = `${jobName}-env`;
|
||||
|
||||
// TODO: use params.runId as stand-in for agentId in labels; future
|
||||
// versions will have a dedicated agentId on AcquireLeaseParams.
|
||||
const labels = paperclipLabels({
|
||||
runId: params.runId,
|
||||
agentId: params.runId,
|
||||
companyId: params.companyId,
|
||||
adapterType: config.adapterType,
|
||||
});
|
||||
|
||||
const image = resolveImage(
|
||||
{ imageOverride: null },
|
||||
adapterDefaults,
|
||||
{ imageAllowList: config.imageAllowList, imageRegistry: config.imageRegistry },
|
||||
);
|
||||
|
||||
// Pick the orchestrator and build the appropriate manifest based on backend.
|
||||
const isSandboxCrBackend = config.backend === "sandbox-cr";
|
||||
const orchestrator = isSandboxCrBackend ? sandboxCrOrchestrator : jobOrchestrator;
|
||||
|
||||
const manifest = isSandboxCrBackend
|
||||
? buildSandboxCrManifest({
|
||||
namespace,
|
||||
sandboxName: jobName,
|
||||
adapterType: config.adapterType,
|
||||
image,
|
||||
envSecretName: secretName,
|
||||
serviceAccountName: TENANT_SERVICE_ACCOUNT,
|
||||
labels,
|
||||
resources: config.defaultResources ?? {},
|
||||
runtimeClassName: config.runtimeClassName,
|
||||
imagePullSecrets: config.imagePullSecrets,
|
||||
})
|
||||
: buildJobManifest({
|
||||
namespace,
|
||||
jobName,
|
||||
adapterType: config.adapterType,
|
||||
image,
|
||||
envSecretName: secretName,
|
||||
serviceAccountName: TENANT_SERVICE_ACCOUNT,
|
||||
labels,
|
||||
resources: config.defaultResources ?? {},
|
||||
runtimeClassName: config.runtimeClassName,
|
||||
activeDeadlineSec: config.podActivityDeadlineSec,
|
||||
ttlSecondsAfterFinished: config.jobTtlSecondsAfterFinished,
|
||||
imagePullSecrets: config.imagePullSecrets,
|
||||
});
|
||||
|
||||
const { uid: ownerUid } = await orchestrator.claim(clients, namespace, manifest);
|
||||
|
||||
// M4b: adapter env vars are sourced from the plugin worker's own process
|
||||
// environment (paperclip-server pod has them injected at deploy time).
|
||||
const adapterEnv = extractAdapterEnvFromProcess(adapterDefaults.envKeys);
|
||||
const bootstrapToken = generateBootstrapToken();
|
||||
|
||||
// Secret ownerRef: for job backend, the Job owns the Secret (cascade delete).
|
||||
// For sandbox-cr backend, the Sandbox CR owns the Secret.
|
||||
// NOTE: For sandbox-cr, if the Secret outlives the Sandbox due to a cluster
|
||||
// quirk, the release() call will still clean it up via namespace GC or
|
||||
// explicit delete in a future milestone.
|
||||
await createPerRunSecret(clients, {
|
||||
namespace,
|
||||
secretName,
|
||||
runId: params.runId,
|
||||
ownerKind: isSandboxCrBackend ? "Sandbox" : "Job",
|
||||
ownerApiVersion: isSandboxCrBackend ? "agents.x-k8s.io/v1alpha1" : "batch/v1",
|
||||
ownerName: jobName,
|
||||
ownerUid,
|
||||
bootstrapToken,
|
||||
adapterEnv,
|
||||
});
|
||||
|
||||
const podName = await orchestrator.findPod(clients, namespace, jobName);
|
||||
|
||||
const leaseMetadata: KubernetesLeaseMetadata = {
|
||||
namespace,
|
||||
jobName,
|
||||
podName,
|
||||
secretName,
|
||||
phase: "Pending",
|
||||
backend: config.backend,
|
||||
};
|
||||
|
||||
return {
|
||||
providerLeaseId: jobName,
|
||||
metadata: leaseMetadata as unknown as Record<string, unknown>,
|
||||
};
|
||||
},
|
||||
|
||||
async onEnvironmentRealizeWorkspace(
|
||||
params: PluginEnvironmentRealizeWorkspaceParams,
|
||||
): Promise<PluginEnvironmentRealizeWorkspaceResult> {
|
||||
// The agent pod already has /workspace mounted as an emptyDir at pod
|
||||
// scheduling time (see pod-spec-builder). Nothing to provision here —
|
||||
// we just hand back the cwd. Honor a caller-supplied remotePath if set.
|
||||
const cwd =
|
||||
params.workspace.remotePath && params.workspace.remotePath.trim().length > 0
|
||||
? params.workspace.remotePath.trim()
|
||||
: "/workspace";
|
||||
return {
|
||||
cwd,
|
||||
metadata: {
|
||||
provider: "kubernetes",
|
||||
remoteCwd: cwd,
|
||||
},
|
||||
};
|
||||
},
|
||||
|
||||
async onEnvironmentReleaseLease(
|
||||
params: PluginEnvironmentReleaseLeaseParams,
|
||||
): Promise<void> {
|
||||
if (!params.providerLeaseId) return;
|
||||
const config = kubernetesProviderConfigSchema.parse(params.config);
|
||||
const namespace =
|
||||
typeof params.leaseMetadata?.namespace === "string"
|
||||
? params.leaseMetadata.namespace
|
||||
: deriveTenantNamespace(config, params.companyId);
|
||||
|
||||
const kc = createKubeConfig({
|
||||
inCluster: config.inCluster,
|
||||
kubeconfig: config.kubeconfig,
|
||||
});
|
||||
const clients = makeKubeClients(kc);
|
||||
|
||||
const leaseBackend =
|
||||
typeof params.leaseMetadata?.backend === "string"
|
||||
? (params.leaseMetadata.backend as "sandbox-cr" | "job")
|
||||
: config.backend;
|
||||
const releaseOrchestrator =
|
||||
leaseBackend === "sandbox-cr" ? sandboxCrOrchestrator : jobOrchestrator;
|
||||
|
||||
try {
|
||||
await releaseOrchestrator.release(clients, namespace, params.providerLeaseId);
|
||||
} catch (err) {
|
||||
// If the resource is already gone (404), that's fine.
|
||||
const code = (err as { code?: number; statusCode?: number }).code
|
||||
?? (err as { code?: number; statusCode?: number }).statusCode;
|
||||
if (code !== 404) throw err;
|
||||
}
|
||||
},
|
||||
|
||||
async onEnvironmentExecute(
|
||||
params: PluginEnvironmentExecuteParams,
|
||||
): Promise<PluginEnvironmentExecuteResult> {
|
||||
const { lease, timeoutMs } = params;
|
||||
|
||||
if (!lease.providerLeaseId) {
|
||||
return {
|
||||
exitCode: 1,
|
||||
timedOut: false,
|
||||
stdout: "",
|
||||
stderr: "No provider lease ID available for execution.",
|
||||
};
|
||||
}
|
||||
|
||||
const config = kubernetesProviderConfigSchema.parse(params.config);
|
||||
const namespace =
|
||||
typeof lease.metadata?.namespace === "string"
|
||||
? lease.metadata.namespace
|
||||
: deriveTenantNamespace(config, params.companyId);
|
||||
|
||||
// Determine which backend this lease was created with.
|
||||
const leaseBackend =
|
||||
typeof lease.metadata?.backend === "string"
|
||||
? (lease.metadata.backend as "sandbox-cr" | "job")
|
||||
: config.backend;
|
||||
|
||||
const kc = createKubeConfig({
|
||||
inCluster: config.inCluster,
|
||||
kubeconfig: config.kubeconfig,
|
||||
});
|
||||
const clients = makeKubeClients(kc);
|
||||
|
||||
const effectiveTimeoutMs =
|
||||
typeof timeoutMs === "number" && timeoutMs > 0
|
||||
? timeoutMs
|
||||
: config.podActivityDeadlineSec * 1000;
|
||||
|
||||
if (leaseBackend === "sandbox-cr") {
|
||||
// ── Sandbox-CR backend ──────────────────────────────────────────────────
|
||||
// 1. Ensure the Sandbox pod is Ready (wait if needed).
|
||||
// 2. Exec the command into the running pod.
|
||||
// 3. Return exec result directly (no log scraping needed).
|
||||
|
||||
let podName =
|
||||
typeof lease.metadata?.podName === "string" && lease.metadata.podName
|
||||
? lease.metadata.podName
|
||||
: null;
|
||||
|
||||
// Wait for pod Ready if we don't have a pod name yet (or as a health check).
|
||||
try {
|
||||
await sandboxCrOrchestrator.waitForCompletion(
|
||||
clients,
|
||||
namespace,
|
||||
lease.providerLeaseId,
|
||||
{ timeoutMs: effectiveTimeoutMs, pollMs: 2000 },
|
||||
);
|
||||
} catch (err) {
|
||||
if (err instanceof SandboxCrTimeoutError) {
|
||||
return {
|
||||
exitCode: null,
|
||||
timedOut: true,
|
||||
stdout: "",
|
||||
stderr: `Sandbox pod did not become Ready within ${effectiveTimeoutMs}ms`,
|
||||
metadata: {
|
||||
provider: "kubernetes",
|
||||
backend: "sandbox-cr",
|
||||
namespace,
|
||||
sandboxName: lease.providerLeaseId,
|
||||
},
|
||||
};
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
|
||||
// Resolve pod name (may now be populated in Sandbox status).
|
||||
if (!podName) {
|
||||
podName = await sandboxCrOrchestrator.findPod(
|
||||
clients,
|
||||
namespace,
|
||||
lease.providerLeaseId,
|
||||
);
|
||||
}
|
||||
|
||||
if (!podName) {
|
||||
return {
|
||||
exitCode: 1,
|
||||
timedOut: false,
|
||||
stdout: "",
|
||||
stderr: "Sandbox pod is Ready but podName could not be resolved.",
|
||||
metadata: {
|
||||
provider: "kubernetes",
|
||||
backend: "sandbox-cr",
|
||||
namespace,
|
||||
sandboxName: lease.providerLeaseId,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
// Build the command to exec. If params.command is provided use it;
|
||||
// otherwise wrap in a login shell so profile scripts run.
|
||||
const rawCommand =
|
||||
typeof params.command === "string" && params.command.trim().length > 0
|
||||
? params.command
|
||||
: params.args?.join(" ") ?? "";
|
||||
|
||||
const execCommand = rawCommand.length > 0
|
||||
? ["/bin/sh", "-lc", rawCommand]
|
||||
: ["/bin/sh", "-l"];
|
||||
|
||||
const execResult = await execInPod(
|
||||
kc,
|
||||
namespace,
|
||||
podName,
|
||||
"agent",
|
||||
execCommand,
|
||||
typeof params.stdin === "string" ? params.stdin : undefined,
|
||||
);
|
||||
|
||||
return {
|
||||
exitCode: execResult.exitCode,
|
||||
timedOut: false,
|
||||
stdout: execResult.stdout,
|
||||
stderr: execResult.stderr,
|
||||
metadata: {
|
||||
provider: "kubernetes",
|
||||
backend: "sandbox-cr",
|
||||
namespace,
|
||||
sandboxName: lease.providerLeaseId,
|
||||
podName,
|
||||
},
|
||||
};
|
||||
} else {
|
||||
// ── Job backend (legacy / stable fallback) ──────────────────────────────
|
||||
// The container entrypoint is baked into the Job spec (Tini + paperclip-agent-shim).
|
||||
// We do NOT re-exec command/args — instead we wait for the Job to finish
|
||||
// and collect its logs.
|
||||
//
|
||||
// params.command / params.args / params.stdin are intentionally ignored.
|
||||
|
||||
let status;
|
||||
let timedOut = false;
|
||||
try {
|
||||
status = await jobOrchestrator.waitForCompletion(
|
||||
clients,
|
||||
namespace,
|
||||
lease.providerLeaseId,
|
||||
{ timeoutMs: effectiveTimeoutMs, pollMs: 2000 },
|
||||
);
|
||||
} catch (err) {
|
||||
if (err instanceof JobTimeoutError) {
|
||||
timedOut = true;
|
||||
status = null;
|
||||
} else {
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
// Collect logs from the pod.
|
||||
const podName =
|
||||
typeof lease.metadata?.podName === "string"
|
||||
? lease.metadata.podName
|
||||
: await jobOrchestrator.findPod(
|
||||
clients,
|
||||
namespace,
|
||||
lease.providerLeaseId,
|
||||
);
|
||||
|
||||
const stdoutChunks: string[] = [];
|
||||
const stderrChunks: string[] = [];
|
||||
|
||||
if (podName) {
|
||||
await jobOrchestrator.streamLogs(
|
||||
clients,
|
||||
namespace,
|
||||
podName,
|
||||
async (stream, text) => {
|
||||
if (stream === "stdout") stdoutChunks.push(text);
|
||||
else stderrChunks.push(text);
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
return {
|
||||
exitCode: timedOut ? null : status?.phase === "Succeeded" ? 0 : 1,
|
||||
timedOut,
|
||||
stdout: stdoutChunks.join(""),
|
||||
stderr: stderrChunks.join(""),
|
||||
metadata: {
|
||||
provider: "kubernetes",
|
||||
backend: "job",
|
||||
namespace,
|
||||
jobName: lease.providerLeaseId,
|
||||
podName: podName ?? null,
|
||||
phase: status?.phase ?? null,
|
||||
},
|
||||
};
|
||||
}
|
||||
},
|
||||
});
|
||||
|
||||
export default plugin;
|
||||
@@ -0,0 +1,79 @@
|
||||
/**
|
||||
* Exec a command inside a running pod container using the Kubernetes exec API.
|
||||
*
|
||||
* Uses @kubernetes/client-node's Exec class, which opens a WebSocket to the
|
||||
* kube-apiserver and streams stdout/stderr. The statusCallback receives a V1Status
|
||||
* with status="Success" or status="Failure" + details.causes[{reason:"ExitCode"}].
|
||||
*
|
||||
* NOTE: tty=false so stdout and stderr arrive on separate channels. If tty=true
|
||||
* were used, they would be merged onto stdout and the exit code would not be
|
||||
* reliable from the status callback on older cluster versions.
|
||||
*/
|
||||
|
||||
import { Exec } from "@kubernetes/client-node";
|
||||
import { PassThrough } from "node:stream";
|
||||
import type { KubeConfig } from "@kubernetes/client-node";
|
||||
|
||||
export async function execInPod(
|
||||
kc: KubeConfig,
|
||||
namespace: string,
|
||||
podName: string,
|
||||
containerName: string,
|
||||
command: string[],
|
||||
stdin?: string,
|
||||
): Promise<{ exitCode: number; stdout: string; stderr: string }> {
|
||||
const exec = new Exec(kc);
|
||||
const stdoutStream = new PassThrough();
|
||||
const stderrStream = new PassThrough();
|
||||
|
||||
// If stdin is provided build a readable stream from it; the Exec API accepts
|
||||
// a Readable | null for stdin.
|
||||
const stdinStream: import("node:stream").Readable | null = stdin
|
||||
? PassThrough.from(stdin)
|
||||
: null;
|
||||
|
||||
let stdoutData = "";
|
||||
let stderrData = "";
|
||||
|
||||
stdoutStream.on("data", (chunk: Buffer) => {
|
||||
stdoutData += chunk.toString("utf-8");
|
||||
});
|
||||
stderrStream.on("data", (chunk: Buffer) => {
|
||||
stderrData += chunk.toString("utf-8");
|
||||
});
|
||||
|
||||
return await new Promise<{ exitCode: number; stdout: string; stderr: string }>(
|
||||
(resolve, reject) => {
|
||||
exec
|
||||
.exec(
|
||||
namespace,
|
||||
podName,
|
||||
containerName,
|
||||
command,
|
||||
stdoutStream,
|
||||
stderrStream,
|
||||
stdinStream,
|
||||
false, // tty=false: keep stdout/stderr on separate channels
|
||||
(status) => {
|
||||
// status.status is "Success" | "Failure"
|
||||
if (status.status === "Success") {
|
||||
resolve({ exitCode: 0, stdout: stdoutData, stderr: stderrData });
|
||||
return;
|
||||
}
|
||||
// On failure, the exit code surfaces via
|
||||
// status.details?.causes[].{reason:"ExitCode", message:"<N>"}
|
||||
const causes = status.details?.causes ?? [];
|
||||
const exitCodeCause = causes.find(
|
||||
(c: { reason?: string; message?: string }) =>
|
||||
c.reason === "ExitCode",
|
||||
);
|
||||
const exitCode = exitCodeCause?.message
|
||||
? Number(exitCodeCause.message)
|
||||
: 1;
|
||||
resolve({ exitCode, stdout: stdoutData, stderr: stderrData });
|
||||
},
|
||||
)
|
||||
.catch(reject);
|
||||
},
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,94 @@
|
||||
export interface BuildJobManifestInput {
|
||||
namespace: string;
|
||||
jobName: string;
|
||||
adapterType: string;
|
||||
image: string;
|
||||
envSecretName: string;
|
||||
serviceAccountName: string;
|
||||
labels: Record<string, string>;
|
||||
resources: {
|
||||
requests?: { cpu?: string; memory?: string };
|
||||
limits?: { cpu?: string; memory?: string };
|
||||
};
|
||||
runtimeClassName?: string;
|
||||
activeDeadlineSec: number;
|
||||
ttlSecondsAfterFinished: number;
|
||||
imagePullSecrets?: string[];
|
||||
}
|
||||
|
||||
export function buildJobManifest(input: BuildJobManifestInput): Record<string, unknown> {
|
||||
const podLabels = {
|
||||
...input.labels,
|
||||
"paperclip.io/role": "agent",
|
||||
};
|
||||
return {
|
||||
apiVersion: "batch/v1",
|
||||
kind: "Job",
|
||||
metadata: {
|
||||
name: input.jobName,
|
||||
namespace: input.namespace,
|
||||
labels: { ...input.labels },
|
||||
},
|
||||
spec: {
|
||||
backoffLimit: 0,
|
||||
ttlSecondsAfterFinished: input.ttlSecondsAfterFinished,
|
||||
activeDeadlineSeconds: input.activeDeadlineSec,
|
||||
template: {
|
||||
metadata: { labels: podLabels },
|
||||
spec: {
|
||||
serviceAccountName: input.serviceAccountName,
|
||||
// Agent containers call back to paperclip-server via HTTPS egress;
|
||||
// they never call the Kubernetes API, so mounting an SA token is
|
||||
// unnecessary attack surface.
|
||||
automountServiceAccountToken: false,
|
||||
restartPolicy: "Never",
|
||||
...(input.runtimeClassName ? { runtimeClassName: input.runtimeClassName } : {}),
|
||||
...(input.imagePullSecrets && input.imagePullSecrets.length > 0
|
||||
? { imagePullSecrets: input.imagePullSecrets.map((name) => ({ name })) }
|
||||
: {}),
|
||||
securityContext: {
|
||||
runAsNonRoot: true,
|
||||
runAsUser: 1000,
|
||||
runAsGroup: 1000,
|
||||
fsGroup: 1000,
|
||||
fsGroupChangePolicy: "OnRootMismatch",
|
||||
seccompProfile: { type: "RuntimeDefault" },
|
||||
},
|
||||
containers: [
|
||||
{
|
||||
name: "agent",
|
||||
image: input.image,
|
||||
imagePullPolicy: "IfNotPresent",
|
||||
command: ["/usr/bin/tini", "--", "/usr/local/bin/paperclip-agent-shim"],
|
||||
envFrom: [{ secretRef: { name: input.envSecretName } }],
|
||||
securityContext: {
|
||||
runAsNonRoot: true,
|
||||
runAsUser: 1000,
|
||||
runAsGroup: 1000,
|
||||
readOnlyRootFilesystem: true,
|
||||
allowPrivilegeEscalation: false,
|
||||
capabilities: { drop: ["ALL"] },
|
||||
},
|
||||
resources: {
|
||||
requests: input.resources.requests ?? { cpu: "250m", memory: "512Mi" },
|
||||
limits: input.resources.limits ?? { cpu: "2", memory: "4Gi" },
|
||||
},
|
||||
volumeMounts: [
|
||||
{ name: "workspace", mountPath: "/workspace" },
|
||||
{ name: "home", mountPath: "/home/paperclip" },
|
||||
{ name: "cache", mountPath: "/home/paperclip/.cache" },
|
||||
{ name: "tmp", mountPath: "/tmp" },
|
||||
],
|
||||
},
|
||||
],
|
||||
volumes: [
|
||||
{ name: "workspace", emptyDir: { sizeLimit: "8Gi" } },
|
||||
{ name: "home", emptyDir: { sizeLimit: "1Gi" } },
|
||||
{ name: "cache", emptyDir: { sizeLimit: "1Gi" } },
|
||||
{ name: "tmp", emptyDir: { sizeLimit: "2Gi" } },
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,136 @@
|
||||
/**
|
||||
* Builds a kubernetes-sigs/agent-sandbox Sandbox CR manifest.
|
||||
*
|
||||
* The Sandbox CR creates a long-lived pod (sleep infinity entrypoint) into
|
||||
* which paperclip-server can exec arbitrary commands. This solves the
|
||||
* architectural mismatch with the batch/v1 Job backend, which only supports
|
||||
* a single one-shot entrypoint — not the multi-command adapter-install pattern
|
||||
* used by paperclip-server.
|
||||
*
|
||||
* Security baseline is identical to buildJobManifest (pod-spec-builder.ts):
|
||||
* non-root, drop ALL caps, read-only rootFS, Tini PID 1, seccomp
|
||||
* RuntimeDefault, fsGroupChangePolicy OnRootMismatch, automountSAToken=false.
|
||||
*
|
||||
* NOTE: paperclip-server runs OUTSIDE the cluster, so we cannot set ownerReferences
|
||||
* on the Sandbox CR (the owner would need to be an in-cluster resource). The
|
||||
* release path is explicit delete via sandboxCrOrchestrator.release().
|
||||
*/
|
||||
|
||||
export interface BuildSandboxCrManifestInput {
|
||||
namespace: string;
|
||||
sandboxName: string;
|
||||
adapterType: string;
|
||||
image: string;
|
||||
envSecretName: string;
|
||||
serviceAccountName: string;
|
||||
labels: Record<string, string>;
|
||||
resources: {
|
||||
requests?: { cpu?: string; memory?: string };
|
||||
limits?: { cpu?: string; memory?: string };
|
||||
};
|
||||
runtimeClassName?: string;
|
||||
imagePullSecrets?: string[];
|
||||
}
|
||||
|
||||
export function buildSandboxCrManifest(
|
||||
input: BuildSandboxCrManifestInput,
|
||||
): Record<string, unknown> {
|
||||
const podLabels: Record<string, string> = {
|
||||
...input.labels,
|
||||
"paperclip.io/role": "agent",
|
||||
};
|
||||
return {
|
||||
apiVersion: "agents.x-k8s.io/v1alpha1",
|
||||
kind: "Sandbox",
|
||||
metadata: {
|
||||
name: input.sandboxName,
|
||||
namespace: input.namespace,
|
||||
labels: { ...input.labels },
|
||||
// No ownerReferences: paperclip-server is out-of-cluster. Release is
|
||||
// explicit delete.
|
||||
},
|
||||
spec: {
|
||||
podTemplate: {
|
||||
metadata: {
|
||||
labels: podLabels,
|
||||
},
|
||||
spec: {
|
||||
serviceAccountName: input.serviceAccountName,
|
||||
// Agent containers call back to paperclip-server via HTTPS egress;
|
||||
// they never call the Kubernetes API, so mounting an SA token is
|
||||
// unnecessary attack surface.
|
||||
automountServiceAccountToken: false,
|
||||
// Sandbox controller requires restartPolicy: Always so the pod
|
||||
// stays running between exec calls.
|
||||
restartPolicy: "Always",
|
||||
...(input.runtimeClassName
|
||||
? { runtimeClassName: input.runtimeClassName }
|
||||
: {}),
|
||||
...(input.imagePullSecrets && input.imagePullSecrets.length > 0
|
||||
? {
|
||||
imagePullSecrets: input.imagePullSecrets.map((name) => ({
|
||||
name,
|
||||
})),
|
||||
}
|
||||
: {}),
|
||||
securityContext: {
|
||||
runAsNonRoot: true,
|
||||
runAsUser: 1000,
|
||||
runAsGroup: 1000,
|
||||
fsGroup: 1000,
|
||||
fsGroupChangePolicy: "OnRootMismatch",
|
||||
seccompProfile: { type: "RuntimeDefault" },
|
||||
},
|
||||
containers: [
|
||||
{
|
||||
name: "agent",
|
||||
image: input.image,
|
||||
imagePullPolicy: "IfNotPresent",
|
||||
// sleep infinity keeps the pod running; paperclip-server execs
|
||||
// commands into it via Kubernetes exec API. Tini as PID 1 for
|
||||
// proper signal forwarding and zombie reaping.
|
||||
command: [
|
||||
"/usr/bin/tini",
|
||||
"--",
|
||||
"/bin/sh",
|
||||
"-c",
|
||||
"sleep infinity",
|
||||
],
|
||||
envFrom: [{ secretRef: { name: input.envSecretName } }],
|
||||
securityContext: {
|
||||
runAsNonRoot: true,
|
||||
runAsUser: 1000,
|
||||
runAsGroup: 1000,
|
||||
readOnlyRootFilesystem: true,
|
||||
allowPrivilegeEscalation: false,
|
||||
capabilities: { drop: ["ALL"] },
|
||||
},
|
||||
resources: {
|
||||
requests: input.resources.requests ?? {
|
||||
cpu: "250m",
|
||||
memory: "512Mi",
|
||||
},
|
||||
limits: input.resources.limits ?? {
|
||||
cpu: "2",
|
||||
memory: "4Gi",
|
||||
},
|
||||
},
|
||||
volumeMounts: [
|
||||
{ name: "workspace", mountPath: "/workspace" },
|
||||
{ name: "home", mountPath: "/home/paperclip" },
|
||||
{ name: "cache", mountPath: "/home/paperclip/.cache" },
|
||||
{ name: "tmp", mountPath: "/tmp" },
|
||||
],
|
||||
},
|
||||
],
|
||||
volumes: [
|
||||
{ name: "workspace", emptyDir: { sizeLimit: "8Gi" } },
|
||||
{ name: "home", emptyDir: { sizeLimit: "1Gi" } },
|
||||
{ name: "cache", emptyDir: { sizeLimit: "1Gi" } },
|
||||
{ name: "tmp", emptyDir: { sizeLimit: "2Gi" } },
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,288 @@
|
||||
/**
|
||||
* SandboxOrchestrator implementation backed by the kubernetes-sigs/agent-sandbox
|
||||
* Sandbox CRD (agents.x-k8s.io/v1alpha1).
|
||||
*
|
||||
* The Sandbox CR creates a long-lived pod that paperclip-server can exec into
|
||||
* for multi-command adapter-install workflows — the key architectural win over
|
||||
* the batch/v1 Job backend.
|
||||
*
|
||||
* Key semantic differences from jobOrchestrator:
|
||||
* - claim() creates a Sandbox CR via CustomObjectsApi instead of a batch Job
|
||||
* - getStatus() maps Sandbox phase (Pending|Ready|Terminating|Failed) to SandboxStatus
|
||||
* - findPod() reads status.podName from the Sandbox CR (falls back to label query)
|
||||
* - waitForCompletion() means "wait until pod is Ready to exec" NOT "wait until
|
||||
* workload finishes". The Sandbox pod runs sleep infinity; execution completion
|
||||
* is tracked by the individual execInPod() calls.
|
||||
* - release() deletes the Sandbox CR with Foreground propagation (controller
|
||||
* tears down the underlying pod).
|
||||
*
|
||||
* NOTE: streamLogs() is provided for interface conformance but is limited —
|
||||
* the sleep-infinity pod has no meaningful stdout. Callers in execute mode
|
||||
* should use execInPod() and capture its stdout/stderr directly.
|
||||
*/
|
||||
|
||||
import type { KubeClients } from "./kube-client.js";
|
||||
import type { SandboxOrchestrator, SandboxStatus } from "./sandbox-orchestrator.js";
|
||||
|
||||
const SANDBOX_GROUP = "agents.x-k8s.io";
|
||||
const SANDBOX_VERSION = "v1alpha1";
|
||||
const SANDBOX_PLURAL = "sandboxes";
|
||||
|
||||
export class SandboxCrTimeoutError extends Error {
|
||||
constructor(namespace: string, name: string, timeoutMs: number) {
|
||||
super(
|
||||
`Sandbox ${namespace}/${name} did not reach Ready phase within ${timeoutMs}ms`,
|
||||
);
|
||||
this.name = "SandboxCrTimeoutError";
|
||||
}
|
||||
}
|
||||
|
||||
function sleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
/**
|
||||
* Map a Sandbox CR status.phase value to our SandboxStatus shape.
|
||||
* Sandbox phases: Pending | Ready | Terminating | Failed
|
||||
*/
|
||||
function mapSandboxPhase(
|
||||
cr: Record<string, unknown>,
|
||||
): SandboxStatus {
|
||||
const status = (cr.status as Record<string, unknown>) ?? {};
|
||||
const phase = (status.phase as string) ?? "Pending";
|
||||
|
||||
switch (phase) {
|
||||
case "Ready":
|
||||
return {
|
||||
phase: "Running", // SandboxStatus.phase uses Job semantics; "Running" = active pod
|
||||
complete: false,
|
||||
active: 1,
|
||||
succeeded: 0,
|
||||
failed: 0,
|
||||
};
|
||||
case "Terminating":
|
||||
return {
|
||||
phase: "Running",
|
||||
complete: false,
|
||||
active: 0,
|
||||
succeeded: 0,
|
||||
failed: 0,
|
||||
reason: "Terminating",
|
||||
};
|
||||
case "Failed": {
|
||||
const conditions = (status.conditions as { type?: string; reason?: string; message?: string }[]) ?? [];
|
||||
const failedCond = conditions.find((c) => c.type === "Failed");
|
||||
return {
|
||||
phase: "Failed",
|
||||
complete: false,
|
||||
active: 0,
|
||||
succeeded: 0,
|
||||
failed: 1,
|
||||
reason: failedCond?.reason,
|
||||
message: failedCond?.message,
|
||||
};
|
||||
}
|
||||
default:
|
||||
// "Pending" or unknown
|
||||
return {
|
||||
phase: "Pending",
|
||||
complete: false,
|
||||
active: 0,
|
||||
succeeded: 0,
|
||||
failed: 0,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
export async function createSandboxCr(
|
||||
clients: KubeClients,
|
||||
namespace: string,
|
||||
manifest: Record<string, unknown>,
|
||||
): Promise<{ uid: string }> {
|
||||
const result = await clients.custom.createNamespacedCustomObject({
|
||||
group: SANDBOX_GROUP,
|
||||
version: SANDBOX_VERSION,
|
||||
namespace,
|
||||
plural: SANDBOX_PLURAL,
|
||||
body: manifest,
|
||||
});
|
||||
const uid = (result as { metadata?: { uid?: string } }).metadata?.uid;
|
||||
if (!uid) throw new Error("Sandbox CR created without a UID");
|
||||
return { uid };
|
||||
}
|
||||
|
||||
export async function getSandboxCrStatus(
|
||||
clients: KubeClients,
|
||||
namespace: string,
|
||||
name: string,
|
||||
): Promise<SandboxStatus> {
|
||||
const result = await clients.custom.getNamespacedCustomObject({
|
||||
group: SANDBOX_GROUP,
|
||||
version: SANDBOX_VERSION,
|
||||
namespace,
|
||||
plural: SANDBOX_PLURAL,
|
||||
name,
|
||||
});
|
||||
return mapSandboxPhase(result as Record<string, unknown>);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the pod name backing a Sandbox CR.
|
||||
* Primary: read status.podName from the CR (set by the controller once ready).
|
||||
* Fallback: list pods in the namespace filtered by the paperclip.io/managed-by
|
||||
* label and the sandbox name label set on the pod template.
|
||||
*/
|
||||
export async function findPodForSandbox(
|
||||
clients: KubeClients,
|
||||
namespace: string,
|
||||
name: string,
|
||||
): Promise<string | null> {
|
||||
// Primary: read status.podName from the Sandbox CR
|
||||
const cr = await clients.custom.getNamespacedCustomObject({
|
||||
group: SANDBOX_GROUP,
|
||||
version: SANDBOX_VERSION,
|
||||
namespace,
|
||||
plural: SANDBOX_PLURAL,
|
||||
name,
|
||||
}) as Record<string, unknown>;
|
||||
|
||||
const status = (cr.status as Record<string, unknown>) ?? {};
|
||||
const podName = status.podName as string | undefined;
|
||||
if (podName && podName.trim().length > 0) {
|
||||
return podName;
|
||||
}
|
||||
|
||||
// Fallback: list pods with sandbox-name label (sandbox controller typically
|
||||
// labels pods with the sandbox name)
|
||||
const result = await clients.core.listNamespacedPod({
|
||||
namespace,
|
||||
labelSelector: `paperclip.io/managed-by=paperclip-k8s-plugin`,
|
||||
});
|
||||
const items =
|
||||
(
|
||||
(
|
||||
result as {
|
||||
items?: {
|
||||
metadata?: { name?: string; labels?: Record<string, string> };
|
||||
status?: { phase?: string };
|
||||
}[];
|
||||
}
|
||||
).items
|
||||
) ?? [];
|
||||
|
||||
// Filter to pods that belong to this sandbox by name prefix or label
|
||||
const matching = items.filter((p) => {
|
||||
const podMeta = p.metadata ?? {};
|
||||
const labels = podMeta.labels ?? {};
|
||||
// The sandbox controller may label pods differently; try matching by name prefix
|
||||
return (
|
||||
podMeta.name?.startsWith(name) ||
|
||||
labels["agents.x-k8s.io/sandbox-name"] === name
|
||||
);
|
||||
});
|
||||
|
||||
const running = matching.find((p) => p.status?.phase === "Running");
|
||||
return (running ?? matching[0])?.metadata?.name ?? null;
|
||||
}
|
||||
|
||||
export async function streamSandboxLogs(
|
||||
clients: KubeClients,
|
||||
namespace: string,
|
||||
podName: string,
|
||||
onChunk: (stream: "stdout" | "stderr", text: string) => Promise<void>,
|
||||
): Promise<void> {
|
||||
// V1 limitation: the Pod log API returns the container's combined log stream. The
|
||||
// sleep-infinity pod will have minimal output; this is provided for interface
|
||||
// conformance. For actual command output, use execInPod() directly.
|
||||
const result = await clients.core.readNamespacedPodLog({
|
||||
namespace,
|
||||
name: podName,
|
||||
});
|
||||
const text =
|
||||
typeof result === "string"
|
||||
? result
|
||||
: typeof (result as { body?: unknown })?.body === "string"
|
||||
? (result as { body: string }).body
|
||||
: "";
|
||||
if (text.length > 0) await onChunk("stdout", text);
|
||||
}
|
||||
|
||||
export async function deleteSandboxCr(
|
||||
clients: KubeClients,
|
||||
namespace: string,
|
||||
name: string,
|
||||
): Promise<void> {
|
||||
await clients.custom.deleteNamespacedCustomObject({
|
||||
group: SANDBOX_GROUP,
|
||||
version: SANDBOX_VERSION,
|
||||
namespace,
|
||||
plural: SANDBOX_PLURAL,
|
||||
name,
|
||||
propagationPolicy: "Foreground",
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Wait until the Sandbox CR's pod reaches Ready phase (i.e., the pod is up and
|
||||
* exec-able). This is NOT waiting for a workload to finish — the Sandbox pod
|
||||
* runs sleep infinity indefinitely. Execution completion is tracked by the
|
||||
* individual execInPod() calls.
|
||||
*
|
||||
* Throws SandboxCrTimeoutError if Ready is not reached within timeoutMs.
|
||||
* Throws if the Sandbox transitions to Failed.
|
||||
*/
|
||||
export async function waitForSandboxReady(
|
||||
clients: KubeClients,
|
||||
namespace: string,
|
||||
name: string,
|
||||
opts: { timeoutMs: number; pollMs?: number } = {
|
||||
timeoutMs: 120_000,
|
||||
pollMs: 2000,
|
||||
},
|
||||
): Promise<SandboxStatus> {
|
||||
const deadline = Date.now() + opts.timeoutMs;
|
||||
const pollMs = opts.pollMs ?? 2000;
|
||||
|
||||
while (Date.now() < deadline) {
|
||||
const cr = await clients.custom.getNamespacedCustomObject({
|
||||
group: SANDBOX_GROUP,
|
||||
version: SANDBOX_VERSION,
|
||||
namespace,
|
||||
plural: SANDBOX_PLURAL,
|
||||
name,
|
||||
}) as Record<string, unknown>;
|
||||
|
||||
const status = (cr.status as Record<string, unknown>) ?? {};
|
||||
const phase = (status.phase as string) ?? "Pending";
|
||||
|
||||
if (phase === "Ready") {
|
||||
return mapSandboxPhase(cr);
|
||||
}
|
||||
if (phase === "Failed") {
|
||||
const mapped = mapSandboxPhase(cr);
|
||||
throw new Error(
|
||||
`Sandbox ${namespace}/${name} failed: ${mapped.reason ?? "unknown reason"} — ${mapped.message ?? ""}`,
|
||||
);
|
||||
}
|
||||
// Pending or Terminating — keep polling
|
||||
await sleep(pollMs);
|
||||
}
|
||||
|
||||
throw new SandboxCrTimeoutError(namespace, name, opts.timeoutMs);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sandbox CR-backed conformance to SandboxOrchestrator.
|
||||
*
|
||||
* waitForCompletion semantics change: for this backend, "completion" means
|
||||
* "pod is up and Ready to exec into" — NOT "workload finished". The actual
|
||||
* command execution and its completion is handled by execInPod().
|
||||
*/
|
||||
export const sandboxCrOrchestrator: SandboxOrchestrator = {
|
||||
claim: createSandboxCr,
|
||||
getStatus: getSandboxCrStatus,
|
||||
findPod: findPodForSandbox,
|
||||
streamLogs: streamSandboxLogs,
|
||||
release: deleteSandboxCr,
|
||||
waitForCompletion: waitForSandboxReady,
|
||||
};
|
||||
@@ -0,0 +1,68 @@
|
||||
import type { KubeClients } from "./kube-client.js";
|
||||
|
||||
export interface SandboxStatus {
|
||||
phase: "Pending" | "Running" | "Succeeded" | "Failed";
|
||||
complete: boolean;
|
||||
active: number;
|
||||
succeeded: number;
|
||||
failed: number;
|
||||
reason?: string;
|
||||
message?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Abstract interface over a sandbox runtime backend. The current implementation
|
||||
* is Job-backed (job-orchestrator.ts). Future backends slot in by exporting an
|
||||
* object conforming to this shape — e.g. a Kata-FC warm-pool backend that
|
||||
* additionally implements the optional pause/resume slots, or a CRD-backed
|
||||
* backend on kubernetes-sigs/agent-sandbox once it reaches Beta.
|
||||
*/
|
||||
export interface SandboxOrchestrator {
|
||||
/** Provision the sandbox. Returns the runtime's stable UID. */
|
||||
claim(
|
||||
clients: KubeClients,
|
||||
namespace: string,
|
||||
manifest: Record<string, unknown>,
|
||||
): Promise<{ uid: string }>;
|
||||
|
||||
/** Read current lifecycle phase. */
|
||||
getStatus(
|
||||
clients: KubeClients,
|
||||
namespace: string,
|
||||
name: string,
|
||||
): Promise<SandboxStatus>;
|
||||
|
||||
/** Locate the pod backing this sandbox (or null if none exists yet). */
|
||||
findPod(
|
||||
clients: KubeClients,
|
||||
namespace: string,
|
||||
name: string,
|
||||
): Promise<string | null>;
|
||||
|
||||
/** Read logs from the sandbox's pod. V1: post-completion read. */
|
||||
streamLogs(
|
||||
clients: KubeClients,
|
||||
namespace: string,
|
||||
podName: string,
|
||||
onChunk: (stream: "stdout" | "stderr", text: string) => Promise<void>,
|
||||
): Promise<void>;
|
||||
|
||||
/** Tear down the sandbox. Implementations MUST cascade-delete child resources. */
|
||||
release(clients: KubeClients, namespace: string, name: string): Promise<void>;
|
||||
|
||||
/** Block until phase is Succeeded or Failed, or throw on timeout. */
|
||||
waitForCompletion(
|
||||
clients: KubeClients,
|
||||
namespace: string,
|
||||
name: string,
|
||||
opts: { timeoutMs: number; pollMs?: number },
|
||||
): Promise<SandboxStatus>;
|
||||
|
||||
// Optional warm-pool / Kata-FC extension slots. Job-backed implementation
|
||||
// does not provide these; runtimes that do (e.g. Kata-FC microVM pause)
|
||||
// implement them and acquire the warm-pool capability.
|
||||
// TODO: requires custom in-cluster controller for k8s — kubelet does not
|
||||
// expose pause/resume at the pod level. Add when warm-pool design lands.
|
||||
pause?(clients: KubeClients, namespace: string, name: string): Promise<void>;
|
||||
resume?(clients: KubeClients, namespace: string, name: string): Promise<void>;
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
import type { KubeClients } from "./kube-client.js";
|
||||
|
||||
export interface CreatePerRunSecretInput {
|
||||
namespace: string;
|
||||
secretName: string;
|
||||
runId: string;
|
||||
ownerKind: string;
|
||||
ownerApiVersion: string;
|
||||
ownerName: string;
|
||||
ownerUid: string;
|
||||
bootstrapToken: string;
|
||||
adapterEnv: Record<string, string>;
|
||||
}
|
||||
|
||||
export async function createPerRunSecret(clients: KubeClients, input: CreatePerRunSecretInput): Promise<void> {
|
||||
if (!input.ownerUid) {
|
||||
throw new Error("createPerRunSecret requires a non-empty ownerUid");
|
||||
}
|
||||
if ("BOOTSTRAP_TOKEN" in input.adapterEnv) {
|
||||
throw new Error("adapterEnv must not contain BOOTSTRAP_TOKEN (reserved key)");
|
||||
}
|
||||
await clients.core.createNamespacedSecret({
|
||||
namespace: input.namespace,
|
||||
body: {
|
||||
apiVersion: "v1",
|
||||
kind: "Secret",
|
||||
type: "Opaque",
|
||||
metadata: {
|
||||
name: input.secretName,
|
||||
namespace: input.namespace,
|
||||
labels: {
|
||||
"paperclip.io/run-id": input.runId,
|
||||
"paperclip.io/managed-by": "paperclip-k8s-plugin",
|
||||
},
|
||||
ownerReferences: [
|
||||
{
|
||||
apiVersion: input.ownerApiVersion,
|
||||
kind: input.ownerKind,
|
||||
name: input.ownerName,
|
||||
uid: input.ownerUid,
|
||||
controller: true,
|
||||
blockOwnerDeletion: true,
|
||||
},
|
||||
],
|
||||
},
|
||||
stringData: {
|
||||
BOOTSTRAP_TOKEN: input.bootstrapToken,
|
||||
...input.adapterEnv,
|
||||
},
|
||||
},
|
||||
});
|
||||
}
|
||||
@@ -0,0 +1,322 @@
|
||||
import type { KubeClients } from "./kube-client.js";
|
||||
import { buildNetworkPolicyManifests } from "./network-policy.js";
|
||||
import { buildCiliumNetworkPolicyManifest } from "./cilium-network-policy.js";
|
||||
|
||||
export interface EnsureTenantInput {
|
||||
namespace: string;
|
||||
companyId: string;
|
||||
paperclipServerNamespace: string;
|
||||
serviceAccountAnnotations: Record<string, string>;
|
||||
egressMode: "standard" | "cilium";
|
||||
egressAllowFqdns: string[];
|
||||
egressAllowCidrs: string[];
|
||||
resourceQuota: {
|
||||
pods: string;
|
||||
requestsCpu: string;
|
||||
requestsMemory: string;
|
||||
limitsCpu: string;
|
||||
limitsMemory: string;
|
||||
};
|
||||
}
|
||||
|
||||
const SERVICE_ACCOUNT_NAME = "paperclip-tenant-sa";
|
||||
const ROLE_NAME = "paperclip-tenant-role";
|
||||
const ROLE_BINDING_NAME = "paperclip-tenant-rb";
|
||||
const RESOURCE_QUOTA_NAME = "paperclip-quota";
|
||||
const LIMIT_RANGE_NAME = "paperclip-limits";
|
||||
|
||||
/**
|
||||
* Tenant provisioning reconciles the resources this plugin owns. Existing
|
||||
* resources are replaced with the desired manifest so quota, RBAC, service
|
||||
* account annotations, and egress policy changes take effect on the next run.
|
||||
*/
|
||||
export async function ensureTenant(clients: KubeClients, input: EnsureTenantInput): Promise<void> {
|
||||
await ensureNamespace(clients, input);
|
||||
await ensureServiceAccount(clients, input);
|
||||
await ensureRole(clients, input);
|
||||
await ensureRoleBinding(clients, input);
|
||||
await ensureResourceQuota(clients, input);
|
||||
await ensureLimitRange(clients, input);
|
||||
await ensureNetworkPolicies(clients, input);
|
||||
}
|
||||
|
||||
async function ensureNamespace(clients: KubeClients, input: EnsureTenantInput): Promise<void> {
|
||||
try {
|
||||
await clients.core.readNamespace({ name: input.namespace });
|
||||
return;
|
||||
} catch (err) {
|
||||
if (!isNotFound(err)) throw err;
|
||||
}
|
||||
await clients.core.createNamespace({
|
||||
body: {
|
||||
apiVersion: "v1",
|
||||
kind: "Namespace",
|
||||
metadata: {
|
||||
name: input.namespace,
|
||||
labels: {
|
||||
"paperclip.io/company-id": input.companyId,
|
||||
"paperclip.io/managed-by": "paperclip-k8s-plugin",
|
||||
"pod-security.kubernetes.io/enforce": "restricted",
|
||||
"pod-security.kubernetes.io/audit": "restricted",
|
||||
"pod-security.kubernetes.io/warn": "restricted",
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
async function ensureServiceAccount(clients: KubeClients, input: EnsureTenantInput): Promise<void> {
|
||||
const manifest = {
|
||||
apiVersion: "v1",
|
||||
kind: "ServiceAccount",
|
||||
metadata: {
|
||||
name: SERVICE_ACCOUNT_NAME,
|
||||
namespace: input.namespace,
|
||||
annotations: input.serviceAccountAnnotations,
|
||||
labels: { "paperclip.io/managed-by": "paperclip-k8s-plugin" },
|
||||
},
|
||||
};
|
||||
try {
|
||||
const existing = await clients.core.readNamespacedServiceAccount({ name: SERVICE_ACCOUNT_NAME, namespace: input.namespace });
|
||||
await clients.core.replaceNamespacedServiceAccount({
|
||||
name: SERVICE_ACCOUNT_NAME,
|
||||
namespace: input.namespace,
|
||||
body: withResourceVersion(manifest, existing) as never,
|
||||
});
|
||||
return;
|
||||
} catch (err) {
|
||||
if (!isNotFound(err)) throw err;
|
||||
}
|
||||
await clients.core.createNamespacedServiceAccount({ namespace: input.namespace, body: manifest });
|
||||
}
|
||||
|
||||
async function ensureRole(clients: KubeClients, input: EnsureTenantInput): Promise<void> {
|
||||
const manifest = {
|
||||
apiVersion: "rbac.authorization.k8s.io/v1",
|
||||
kind: "Role",
|
||||
metadata: { name: ROLE_NAME, namespace: input.namespace },
|
||||
rules: [
|
||||
{ apiGroups: [""], resources: ["pods/log"], verbs: ["get"] },
|
||||
],
|
||||
};
|
||||
try {
|
||||
const existing = await clients.rbac.readNamespacedRole({ name: ROLE_NAME, namespace: input.namespace });
|
||||
await clients.rbac.replaceNamespacedRole({
|
||||
name: ROLE_NAME,
|
||||
namespace: input.namespace,
|
||||
body: withResourceVersion(manifest, existing) as never,
|
||||
});
|
||||
return;
|
||||
} catch (err) {
|
||||
if (!isNotFound(err)) throw err;
|
||||
}
|
||||
await clients.rbac.createNamespacedRole({ namespace: input.namespace, body: manifest });
|
||||
}
|
||||
|
||||
async function ensureRoleBinding(clients: KubeClients, input: EnsureTenantInput): Promise<void> {
|
||||
const manifest = {
|
||||
apiVersion: "rbac.authorization.k8s.io/v1",
|
||||
kind: "RoleBinding",
|
||||
metadata: { name: ROLE_BINDING_NAME, namespace: input.namespace },
|
||||
roleRef: { apiGroup: "rbac.authorization.k8s.io", kind: "Role", name: ROLE_NAME },
|
||||
subjects: [{ kind: "ServiceAccount", name: SERVICE_ACCOUNT_NAME, namespace: input.namespace }],
|
||||
};
|
||||
try {
|
||||
const existing = await clients.rbac.readNamespacedRoleBinding({ name: ROLE_BINDING_NAME, namespace: input.namespace });
|
||||
await clients.rbac.replaceNamespacedRoleBinding({
|
||||
name: ROLE_BINDING_NAME,
|
||||
namespace: input.namespace,
|
||||
body: withResourceVersion(manifest, existing) as never,
|
||||
});
|
||||
return;
|
||||
} catch (err) {
|
||||
if (!isNotFound(err)) throw err;
|
||||
}
|
||||
await clients.rbac.createNamespacedRoleBinding({ namespace: input.namespace, body: manifest });
|
||||
}
|
||||
|
||||
async function ensureResourceQuota(clients: KubeClients, input: EnsureTenantInput): Promise<void> {
|
||||
const manifest = {
|
||||
apiVersion: "v1",
|
||||
kind: "ResourceQuota",
|
||||
metadata: { name: RESOURCE_QUOTA_NAME, namespace: input.namespace },
|
||||
spec: {
|
||||
hard: {
|
||||
pods: input.resourceQuota.pods,
|
||||
"requests.cpu": input.resourceQuota.requestsCpu,
|
||||
"requests.memory": input.resourceQuota.requestsMemory,
|
||||
"limits.cpu": input.resourceQuota.limitsCpu,
|
||||
"limits.memory": input.resourceQuota.limitsMemory,
|
||||
},
|
||||
},
|
||||
};
|
||||
try {
|
||||
const existing = await clients.core.readNamespacedResourceQuota({ name: RESOURCE_QUOTA_NAME, namespace: input.namespace });
|
||||
await clients.core.replaceNamespacedResourceQuota({
|
||||
name: RESOURCE_QUOTA_NAME,
|
||||
namespace: input.namespace,
|
||||
body: withResourceVersion(manifest, existing) as never,
|
||||
});
|
||||
return;
|
||||
} catch (err) {
|
||||
if (!isNotFound(err)) throw err;
|
||||
}
|
||||
await clients.core.createNamespacedResourceQuota({ namespace: input.namespace, body: manifest });
|
||||
}
|
||||
|
||||
async function ensureLimitRange(clients: KubeClients, input: EnsureTenantInput): Promise<void> {
|
||||
const manifest = {
|
||||
apiVersion: "v1",
|
||||
kind: "LimitRange",
|
||||
metadata: { name: LIMIT_RANGE_NAME, namespace: input.namespace },
|
||||
spec: {
|
||||
limits: [
|
||||
{
|
||||
type: "Container",
|
||||
max: { cpu: "4", memory: "8Gi" },
|
||||
min: { cpu: "100m", memory: "128Mi" },
|
||||
// The k8s client-node type names this `_default` but the actual
|
||||
// Kubernetes API field is `default`. We produce a JSON-shape
|
||||
// manifest so the cast is safe.
|
||||
default: { cpu: "1", memory: "2Gi" },
|
||||
defaultRequest: { cpu: "250m", memory: "512Mi" },
|
||||
},
|
||||
],
|
||||
},
|
||||
};
|
||||
try {
|
||||
const existing = await clients.core.readNamespacedLimitRange({ name: LIMIT_RANGE_NAME, namespace: input.namespace });
|
||||
await clients.core.replaceNamespacedLimitRange({
|
||||
name: LIMIT_RANGE_NAME,
|
||||
namespace: input.namespace,
|
||||
body: withResourceVersion(manifest, existing) as never,
|
||||
});
|
||||
return;
|
||||
} catch (err) {
|
||||
if (!isNotFound(err)) throw err;
|
||||
}
|
||||
await clients.core.createNamespacedLimitRange({
|
||||
namespace: input.namespace,
|
||||
body: manifest as never,
|
||||
});
|
||||
}
|
||||
|
||||
async function ensureNetworkPolicies(clients: KubeClients, input: EnsureTenantInput): Promise<void> {
|
||||
const [denyAll, egressStd] = buildNetworkPolicyManifests({
|
||||
namespace: input.namespace,
|
||||
paperclipServerNamespace: input.paperclipServerNamespace,
|
||||
egressAllowFqdns: input.egressAllowFqdns,
|
||||
egressAllowCidrs: input.egressAllowCidrs,
|
||||
});
|
||||
|
||||
await ensureNetworkPolicy(clients, input.namespace, denyAll);
|
||||
|
||||
if (input.egressMode === "cilium") {
|
||||
const cnp = buildCiliumNetworkPolicyManifest({
|
||||
namespace: input.namespace,
|
||||
paperclipServerNamespace: input.paperclipServerNamespace,
|
||||
egressAllowFqdns: input.egressAllowFqdns,
|
||||
egressAllowCidrs: input.egressAllowCidrs,
|
||||
});
|
||||
await ensureCiliumNetworkPolicy(clients, input.namespace, cnp);
|
||||
await deleteNetworkPolicyIfExists(clients, input.namespace, "paperclip-egress-allow");
|
||||
} else {
|
||||
await ensureNetworkPolicy(clients, input.namespace, egressStd);
|
||||
await deleteCiliumNetworkPolicyIfExists(clients, input.namespace, "paperclip-egress-fqdn");
|
||||
}
|
||||
}
|
||||
|
||||
async function ensureNetworkPolicy(
|
||||
clients: KubeClients,
|
||||
namespace: string,
|
||||
manifest: Record<string, unknown>,
|
||||
): Promise<void> {
|
||||
const name = (manifest.metadata as { name: string }).name;
|
||||
try {
|
||||
const existing = await clients.networking.readNamespacedNetworkPolicy({ name, namespace });
|
||||
await clients.networking.replaceNamespacedNetworkPolicy({
|
||||
name,
|
||||
namespace,
|
||||
body: withResourceVersion(manifest, existing) as never,
|
||||
});
|
||||
return;
|
||||
} catch (err) {
|
||||
if (!isNotFound(err)) throw err;
|
||||
}
|
||||
await clients.networking.createNamespacedNetworkPolicy({ namespace, body: manifest as never });
|
||||
}
|
||||
|
||||
async function ensureCiliumNetworkPolicy(
|
||||
clients: KubeClients,
|
||||
namespace: string,
|
||||
manifest: Record<string, unknown>,
|
||||
): Promise<void> {
|
||||
const name = (manifest.metadata as { name: string }).name;
|
||||
try {
|
||||
const existing = await clients.custom.getNamespacedCustomObject({
|
||||
group: "cilium.io",
|
||||
version: "v2",
|
||||
namespace,
|
||||
plural: "ciliumnetworkpolicies",
|
||||
name,
|
||||
});
|
||||
await clients.custom.replaceNamespacedCustomObject({
|
||||
group: "cilium.io",
|
||||
version: "v2",
|
||||
namespace,
|
||||
plural: "ciliumnetworkpolicies",
|
||||
name,
|
||||
body: withResourceVersion(manifest, existing),
|
||||
});
|
||||
return;
|
||||
} catch (err) {
|
||||
if (!isNotFound(err)) throw err;
|
||||
}
|
||||
await clients.custom.createNamespacedCustomObject({
|
||||
group: "cilium.io",
|
||||
version: "v2",
|
||||
namespace,
|
||||
plural: "ciliumnetworkpolicies",
|
||||
body: manifest,
|
||||
});
|
||||
}
|
||||
|
||||
async function deleteNetworkPolicyIfExists(clients: KubeClients, namespace: string, name: string): Promise<void> {
|
||||
try {
|
||||
await clients.networking.deleteNamespacedNetworkPolicy({ name, namespace });
|
||||
} catch (err) {
|
||||
if (!isNotFound(err)) throw err;
|
||||
}
|
||||
}
|
||||
|
||||
async function deleteCiliumNetworkPolicyIfExists(clients: KubeClients, namespace: string, name: string): Promise<void> {
|
||||
try {
|
||||
await clients.custom.deleteNamespacedCustomObject({
|
||||
group: "cilium.io",
|
||||
version: "v2",
|
||||
namespace,
|
||||
plural: "ciliumnetworkpolicies",
|
||||
name,
|
||||
});
|
||||
} catch (err) {
|
||||
if (!isNotFound(err)) throw err;
|
||||
}
|
||||
}
|
||||
|
||||
function withResourceVersion<T extends Record<string, unknown>>(manifest: T, existing: unknown): T {
|
||||
const resourceVersion = (existing as { metadata?: { resourceVersion?: string } })?.metadata?.resourceVersion;
|
||||
if (!resourceVersion) return manifest;
|
||||
return {
|
||||
...manifest,
|
||||
metadata: {
|
||||
...(manifest.metadata as Record<string, unknown>),
|
||||
resourceVersion,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function isNotFound(err: unknown): boolean {
|
||||
if (typeof err !== "object" || err === null) return false;
|
||||
const e = err as { code?: number; statusCode?: number };
|
||||
return e.code === 404 || e.statusCode === 404;
|
||||
}
|
||||
@@ -0,0 +1,85 @@
|
||||
import { z } from "zod";
|
||||
import { KNOWN_ADAPTER_TYPES } from "./adapter-defaults.js";
|
||||
|
||||
const cidrRegex = /^(\d{1,3}\.){3}\d{1,3}\/\d{1,2}$/;
|
||||
|
||||
export const kubernetesProviderConfigSchema = z
|
||||
.object({
|
||||
inCluster: z.boolean().default(false),
|
||||
kubeconfig: z.string().optional(),
|
||||
|
||||
namespacePrefix: z.string().regex(/^[a-z0-9-]{1,32}$/).default("paperclip-"),
|
||||
companySlug: z.string().regex(/^[a-z0-9-]{1,32}$/).optional(),
|
||||
|
||||
imageRegistry: z.string().url().optional(),
|
||||
imageAllowList: z.array(z.string()).default([]),
|
||||
imagePullSecrets: z.array(z.string()).default([]),
|
||||
|
||||
egressAllowFqdns: z.array(z.string()).default([]),
|
||||
egressAllowCidrs: z.array(z.string().regex(cidrRegex, "Invalid CIDR")).default([]),
|
||||
egressMode: z.enum(["cilium", "standard"]).default("standard"),
|
||||
|
||||
defaultResources: z
|
||||
.object({
|
||||
requests: z.object({ cpu: z.string(), memory: z.string() }).partial().optional(),
|
||||
limits: z.object({ cpu: z.string(), memory: z.string() }).partial().optional(),
|
||||
})
|
||||
.optional(),
|
||||
|
||||
runtimeClassName: z.string().optional(),
|
||||
serviceAccountAnnotations: z.record(z.string()).default({}),
|
||||
|
||||
jobTtlSecondsAfterFinished: z.number().int().nonnegative().default(900),
|
||||
podActivityDeadlineSec: z.number().int().positive().default(3600),
|
||||
|
||||
/**
|
||||
* The adapter type that Jobs in this environment will run.
|
||||
* Each Kubernetes environment is bound to one adapter; create multiple
|
||||
* environments for different adapters.
|
||||
* Defaults to `"claude_local"`.
|
||||
*/
|
||||
adapterType: z
|
||||
.string()
|
||||
.default("claude_local")
|
||||
.refine((v) => KNOWN_ADAPTER_TYPES.has(v), {
|
||||
message: "adapterType must be one of the known adapter types",
|
||||
}),
|
||||
|
||||
/**
|
||||
* The sandbox backend to use.
|
||||
*
|
||||
* - `"sandbox-cr"` (default, alpha) — uses the kubernetes-sigs/agent-sandbox
|
||||
* Sandbox CRD (agents.x-k8s.io/v1alpha1). Creates a long-lived pod that
|
||||
* paperclip-server can exec into for multi-command adapter-install workflows.
|
||||
* Requires the agent-sandbox controller to be installed in the cluster.
|
||||
*
|
||||
* - `"job"` — uses batch/v1 Job (stable fallback). One-shot entrypoint; does
|
||||
* NOT support multi-command exec. Use this for clusters without agent-sandbox
|
||||
* installed, or when you need stable (non-alpha) k8s APIs.
|
||||
*/
|
||||
backend: z.enum(["sandbox-cr", "job"]).default("sandbox-cr"),
|
||||
})
|
||||
.refine(
|
||||
(cfg) => cfg.inCluster || cfg.kubeconfig,
|
||||
{
|
||||
message:
|
||||
"kubernetes provider requires one of `inCluster` or `kubeconfig`",
|
||||
},
|
||||
);
|
||||
|
||||
export type KubernetesProviderConfig = z.infer<typeof kubernetesProviderConfigSchema>;
|
||||
|
||||
export function parseKubernetesProviderConfig(input: unknown): KubernetesProviderConfig {
|
||||
return kubernetesProviderConfigSchema.parse(input);
|
||||
}
|
||||
|
||||
export interface KubernetesLeaseMetadata {
|
||||
namespace: string;
|
||||
/** Name of the workload resource (Job name for job backend, Sandbox CR name for sandbox-cr backend). */
|
||||
jobName: string;
|
||||
podName: string | null;
|
||||
secretName: string;
|
||||
phase: "Pending" | "Running" | "Succeeded" | "Failed";
|
||||
/** Which backend provisioned this lease. */
|
||||
backend: "sandbox-cr" | "job";
|
||||
}
|
||||
@@ -0,0 +1,46 @@
|
||||
const ULID_ALPHABET = "0123456789abcdefghjkmnpqrstvwxyz";
|
||||
|
||||
export function deriveCompanySlug(input: string): string {
|
||||
const slug = input
|
||||
.toLowerCase()
|
||||
.replace(/[^a-z0-9-]+/g, "-")
|
||||
.replace(/^-+|-+$/g, "")
|
||||
.slice(0, 32)
|
||||
.replace(/-+$/, "");
|
||||
return slug.length > 0 ? slug : "company";
|
||||
}
|
||||
|
||||
export function deriveNamespaceName(prefix: string, slug: string): string {
|
||||
return `${prefix}${slug}`;
|
||||
}
|
||||
|
||||
export function newRunUlidDns(now: () => number = Date.now): string {
|
||||
const timestamp = now();
|
||||
let out = "";
|
||||
let t = timestamp;
|
||||
for (let i = 0; i < 10; i++) {
|
||||
out = ULID_ALPHABET[t & 0x1f] + out;
|
||||
t = Math.floor(t / 32);
|
||||
}
|
||||
for (let i = 0; i < 16; i++) {
|
||||
out += ULID_ALPHABET[Math.floor(Math.random() * 32)];
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
export interface LabelsInput {
|
||||
runId: string;
|
||||
agentId: string;
|
||||
companyId: string;
|
||||
adapterType: string;
|
||||
}
|
||||
|
||||
export function paperclipLabels(input: LabelsInput): Record<string, string> {
|
||||
return {
|
||||
"paperclip.io/run-id": input.runId,
|
||||
"paperclip.io/agent-id": input.agentId,
|
||||
"paperclip.io/company-id": input.companyId,
|
||||
"paperclip.io/adapter": input.adapterType,
|
||||
"paperclip.io/managed-by": "paperclip-k8s-plugin",
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,5 @@
|
||||
import { runWorker } from "@paperclipai/plugin-sdk";
|
||||
import plugin from "./plugin.js";
|
||||
|
||||
export default plugin;
|
||||
runWorker(plugin, import.meta.url);
|
||||
Reference in New Issue
Block a user