5153b01ada
## Thinking Path > - Paperclip orchestrates AI-agent companies through adapter-backed local and external runtimes. > - The agent configuration UI lets operators choose adapter models and refresh model lists when adapters support live discovery. > - Codex already had a live refresh path, but Claude Local only exposed static fallback models and the UI hid the refresh action for Claude. > - A newly available Claude Opus model should not require a code release every time the model catalog changes. > - This pull request adds Anthropic model discovery for Claude Local, keeps the static fallback current with Claude Opus 4.8, and exposes the existing refresh button in the Claude Local dropdown. > - The benefit is that operators can refresh Claude models from the same model selector flow they already use for Codex. ## What Changed - Added `claude-opus-4-8` to the Claude Local fallback model list. - Added Claude model discovery through Anthropic-compatible `GET /v1/models` when `ANTHROPIC_API_KEY` is available. - Added normal cache reuse, forced refresh support, a SHA-256-based API-key fingerprint for cache keys, and warning logging for discovery errors before fallback. - Wired `claude_local.refreshModels` into the server adapter registry. - Enabled the existing `Refresh models` dropdown action for `claude_local` in `AgentConfigForm`. - Added tests for Claude fallback, live discovery, API-failure fallback, forced refresh, and the UI refresh-button gate. ## Verification - `pnpm exec vitest run server/src/__tests__/adapter-models.test.ts` - `pnpm exec vitest run ui/src/components/AgentConfigForm.test.ts` - `pnpm --filter @paperclipai/adapter-claude-local typecheck` - `pnpm --filter @paperclipai/server typecheck` - `pnpm --filter @paperclipai/ui typecheck` - Greptile review reached Confidence Score: 5/5 on commit `b796cf4f1` with addressed threads resolved. UI note: the visible change is a conditional action row inside the existing model dropdown; the regression test covers that `claude_local` now receives the refresh action. ## Risks - Low risk. Without `ANTHROPIC_API_KEY`, Claude Local still uses the static fallback list. - If Anthropic model discovery fails or times out, Paperclip falls back to the existing cached or static list. - Bedrock environments remain on Bedrock-native model IDs. ## Model Used OpenAI GPT-5 via Codex local coding agent, with repository file access, shell command execution, git operations, and targeted test/typecheck verification. Exact context window is not exposed by the runtime. ## Checklist - [x] I have included a thinking path that traces from project context to this change - [x] I have specified the model used (with version and capability details) - [x] I have checked ROADMAP.md and confirmed this PR does not duplicate planned core work - [x] I have run tests locally and they pass - [x] I have added or updated tests where applicable - [x] If this change affects the UI, I have included before/after screenshots - [x] I have updated relevant documentation to reflect my changes - [x] I have considered and documented any risks above - [x] I will address all Greptile and reviewer comments before requesting merge
219 lines
8.0 KiB
TypeScript
219 lines
8.0 KiB
TypeScript
import { beforeEach, describe, expect, it, vi } from "vitest";
|
|
import { models as claudeFallbackModels } from "@paperclipai/adapter-claude-local";
|
|
import { resetClaudeModelsCacheForTests } from "@paperclipai/adapter-claude-local/server";
|
|
import { models as codexFallbackModels } from "@paperclipai/adapter-codex-local";
|
|
import { models as cursorFallbackModels } from "@paperclipai/adapter-cursor-local";
|
|
import { models as opencodeFallbackModels } from "@paperclipai/adapter-opencode-local";
|
|
import { resetOpenCodeModelsCacheForTests } from "@paperclipai/adapter-opencode-local/server";
|
|
import { listAdapterModels, listServerAdapters, refreshAdapterModels } from "../adapters/index.js";
|
|
import { resetCodexModelsCacheForTests } from "../adapters/codex-models.js";
|
|
import { resetCursorModelsCacheForTests, setCursorModelsRunnerForTests } from "../adapters/cursor-models.js";
|
|
|
|
vi.mock("acpx/runtime", () => ({
|
|
createAcpRuntime: vi.fn(),
|
|
createAgentRegistry: vi.fn(),
|
|
createRuntimeStore: vi.fn(),
|
|
isAcpRuntimeError: vi.fn(() => false),
|
|
}));
|
|
|
|
describe("adapter model listing", () => {
|
|
beforeEach(() => {
|
|
delete process.env.OPENAI_API_KEY;
|
|
delete process.env.ANTHROPIC_API_KEY;
|
|
delete process.env.ANTHROPIC_BASE_URL;
|
|
delete process.env.ANTHROPIC_BEDROCK_BASE_URL;
|
|
delete process.env.CLAUDE_CODE_USE_BEDROCK;
|
|
delete process.env.PAPERCLIP_OPENCODE_COMMAND;
|
|
resetClaudeModelsCacheForTests();
|
|
resetCodexModelsCacheForTests();
|
|
resetCursorModelsCacheForTests();
|
|
setCursorModelsRunnerForTests(null);
|
|
resetOpenCodeModelsCacheForTests();
|
|
vi.restoreAllMocks();
|
|
});
|
|
|
|
it("returns an empty list for unknown adapters", async () => {
|
|
const models = await listAdapterModels("unknown_adapter");
|
|
expect(models).toEqual([]);
|
|
});
|
|
|
|
it("uses provider-prefixed ACPX fallback model labels", () => {
|
|
const adapter = listServerAdapters().find((candidate) => candidate.type === "acpx_local");
|
|
|
|
expect(adapter?.models?.some((model) => model.label.startsWith("Claude: "))).toBe(true);
|
|
expect(adapter?.models?.some((model) => model.label.startsWith("Codex: "))).toBe(true);
|
|
});
|
|
|
|
it("returns codex fallback models when no OpenAI key is available", async () => {
|
|
const fetchSpy = vi.spyOn(globalThis, "fetch");
|
|
const models = await listAdapterModels("codex_local");
|
|
|
|
expect(models).toEqual(codexFallbackModels);
|
|
expect(fetchSpy).not.toHaveBeenCalled();
|
|
});
|
|
|
|
it("returns claude fallback models including the latest Opus alias when no Anthropic key is available", async () => {
|
|
const fetchSpy = vi.spyOn(globalThis, "fetch");
|
|
const models = await listAdapterModels("claude_local");
|
|
|
|
expect(models).toEqual(claudeFallbackModels);
|
|
expect(models.some((model) => model.id === "claude-opus-4-8")).toBe(true);
|
|
expect(fetchSpy).not.toHaveBeenCalled();
|
|
});
|
|
|
|
it("loads claude models dynamically and merges fallback options", async () => {
|
|
process.env.ANTHROPIC_API_KEY = "sk-ant-test";
|
|
const fetchSpy = vi.spyOn(globalThis, "fetch").mockResolvedValue({
|
|
ok: true,
|
|
json: async () => ({
|
|
data: [
|
|
{ id: "claude-sonnet-4-20250514", display_name: "Claude Sonnet 4" },
|
|
{ id: "claude-opus-4-8-20260529", display_name: "Claude Opus 4.8" },
|
|
],
|
|
}),
|
|
} as Response);
|
|
|
|
const first = await listAdapterModels("claude_local");
|
|
const second = await listAdapterModels("claude_local");
|
|
|
|
expect(fetchSpy).toHaveBeenCalledTimes(1);
|
|
expect(first).toEqual(second);
|
|
expect(first.some((model) => model.id === "claude-opus-4-8-20260529")).toBe(true);
|
|
expect(first.some((model) => model.id === "claude-opus-4-8")).toBe(true);
|
|
});
|
|
|
|
it("refreshes cached claude models on demand", async () => {
|
|
process.env.ANTHROPIC_API_KEY = "sk-ant-test";
|
|
const fetchSpy = vi.spyOn(globalThis, "fetch")
|
|
.mockResolvedValueOnce({
|
|
ok: true,
|
|
json: async () => ({
|
|
data: [{ id: "claude-sonnet-4-20250514", display_name: "Claude Sonnet 4" }],
|
|
}),
|
|
} as Response)
|
|
.mockResolvedValueOnce({
|
|
ok: true,
|
|
json: async () => ({
|
|
data: [{ id: "claude-opus-4-8-20260529", display_name: "Claude Opus 4.8" }],
|
|
}),
|
|
} as Response);
|
|
|
|
const initial = await listAdapterModels("claude_local");
|
|
const refreshed = await refreshAdapterModels("claude_local");
|
|
|
|
expect(fetchSpy).toHaveBeenCalledTimes(2);
|
|
expect(initial.some((model) => model.id === "claude-sonnet-4-20250514")).toBe(true);
|
|
expect(refreshed.some((model) => model.id === "claude-opus-4-8-20260529")).toBe(true);
|
|
});
|
|
|
|
it("falls back to static claude models when Anthropic model discovery fails", async () => {
|
|
process.env.ANTHROPIC_API_KEY = "sk-ant-test";
|
|
vi.spyOn(globalThis, "fetch").mockResolvedValue({
|
|
ok: false,
|
|
status: 401,
|
|
json: async () => ({}),
|
|
} as Response);
|
|
|
|
const models = await listAdapterModels("claude_local");
|
|
expect(models).toEqual(claudeFallbackModels);
|
|
});
|
|
|
|
it("loads codex models dynamically and merges fallback options", async () => {
|
|
process.env.OPENAI_API_KEY = "sk-test";
|
|
const fetchSpy = vi.spyOn(globalThis, "fetch").mockResolvedValue({
|
|
ok: true,
|
|
json: async () => ({
|
|
data: [
|
|
{ id: "gpt-5-pro" },
|
|
{ id: "gpt-5" },
|
|
],
|
|
}),
|
|
} as Response);
|
|
|
|
const first = await listAdapterModels("codex_local");
|
|
const second = await listAdapterModels("codex_local");
|
|
|
|
expect(fetchSpy).toHaveBeenCalledTimes(1);
|
|
expect(first).toEqual(second);
|
|
expect(first.some((model) => model.id === "gpt-5-pro")).toBe(true);
|
|
expect(first.some((model) => model.id === "codex-mini-latest")).toBe(true);
|
|
});
|
|
|
|
it("refreshes cached codex models on demand", async () => {
|
|
process.env.OPENAI_API_KEY = "sk-test";
|
|
const fetchSpy = vi.spyOn(globalThis, "fetch")
|
|
.mockResolvedValueOnce({
|
|
ok: true,
|
|
json: async () => ({
|
|
data: [{ id: "gpt-5" }],
|
|
}),
|
|
} as Response)
|
|
.mockResolvedValueOnce({
|
|
ok: true,
|
|
json: async () => ({
|
|
data: [{ id: "gpt-5.5" }],
|
|
}),
|
|
} as Response);
|
|
|
|
const initial = await listAdapterModels("codex_local");
|
|
const refreshed = await refreshAdapterModels("codex_local");
|
|
|
|
expect(fetchSpy).toHaveBeenCalledTimes(2);
|
|
expect(initial.some((model) => model.id === "gpt-5")).toBe(true);
|
|
expect(refreshed.some((model) => model.id === "gpt-5.5")).toBe(true);
|
|
});
|
|
|
|
it("falls back to static codex models when OpenAI model discovery fails", async () => {
|
|
process.env.OPENAI_API_KEY = "sk-test";
|
|
vi.spyOn(globalThis, "fetch").mockResolvedValue({
|
|
ok: false,
|
|
status: 401,
|
|
json: async () => ({}),
|
|
} as Response);
|
|
|
|
const models = await listAdapterModels("codex_local");
|
|
expect(models).toEqual(codexFallbackModels);
|
|
});
|
|
|
|
|
|
it("returns cursor fallback models when CLI discovery is unavailable", async () => {
|
|
setCursorModelsRunnerForTests(() => ({
|
|
status: null,
|
|
stdout: "",
|
|
stderr: "",
|
|
hasError: true,
|
|
}));
|
|
|
|
const models = await listAdapterModels("cursor");
|
|
expect(models).toEqual(cursorFallbackModels);
|
|
});
|
|
|
|
it("returns opencode fallback models including gpt-5.4", async () => {
|
|
process.env.PAPERCLIP_OPENCODE_COMMAND = "__paperclip_missing_opencode_command__";
|
|
|
|
const models = await listAdapterModels("opencode_local");
|
|
|
|
expect(models).toEqual(opencodeFallbackModels);
|
|
});
|
|
|
|
it("loads cursor models dynamically and caches them", async () => {
|
|
const runner = vi.fn(() => ({
|
|
status: 0,
|
|
stdout: "Available models: auto, composer-1.5, gpt-5.3-codex-high, sonnet-4.6",
|
|
stderr: "",
|
|
hasError: false,
|
|
}));
|
|
setCursorModelsRunnerForTests(runner);
|
|
|
|
const first = await listAdapterModels("cursor");
|
|
const second = await listAdapterModels("cursor");
|
|
|
|
expect(runner).toHaveBeenCalledTimes(1);
|
|
expect(first).toEqual(second);
|
|
expect(first.some((model) => model.id === "auto")).toBe(true);
|
|
expect(first.some((model) => model.id === "gpt-5.3-codex-high")).toBe(true);
|
|
expect(first.some((model) => model.id === "composer-1")).toBe(true);
|
|
});
|
|
|
|
});
|