refactor(skills): replace per-host REST shims with git wire protocol

The skill import/update/file-read pipeline talked to host-specific REST
APIs (GitHub /commits/{ref}, /git/trees/{sha}, raw.githubusercontent.com)
and the recent Gitea support was a parallel shim on top of the same
pattern. The result was multiple ref-resolution shapes that needed
per-host branching, and on Gitea the /commits/{ref} endpoint returns
404 outright -- so even public Gitea/Forgejo repos failed to import.

Replace with a single git-source module backed by isomorphic-git +
memfs. It speaks the smart-HTTP protocol any sane git server already
serves:

- resolveGitRef: one listServerRefs call, no host API. Handles default
  branch (symref on HEAD), named branches, annotated/lightweight tags,
  and SHA passthrough.
- openRepoSnapshot: shallow singleBranch clone into an in-memory fs;
  listFiles via git.walk, readFile via git.readBlob. No tempdirs, no
  execFile, no per-host endpoints.
- Universal auth via onAuth (token-as-username) covering GitHub PATs,
  GitLab PATs, Gitea/Forgejo tokens.
- parseGitSourceUrl recognises github tree/blob, gitea src/branch|
  commit|tag, gitlab /-/tree, bitbucket /src/{ref} URL shapes plus
  bare clone URLs.

Stored skill metadata is unchanged (hostname/owner/repo/ref/trackingRef/
repoSkillDir), so existing rows keep working -- the clone URL is
derived at fetch time.

company-portability.ts still imports github-fetch.ts (same broken
pattern, separate feature). Left as a follow-up.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-16 09:16:00 -04:00
parent 818a8eade8
commit 0fd4e9c4d1
4 changed files with 830 additions and 117 deletions
+44 -117
View File
@@ -29,7 +29,7 @@ import type {
import { normalizeAgentUrlKey } from "@paperclipai/shared";
import { resolvePaperclipInstanceRoot } from "../home-paths.js";
import { notFound, unprocessable } from "../errors.js";
import { ghFetch, gitHubApiBase, inferGitHostFamily, resolveRawGitHubUrl } from "./github-fetch.js";
import { openRepoSnapshot, parseGitSourceUrl, resolveGitRef, type ParsedGitSource, type RepoSnapshot } from "./git-source.js";
import { agentService } from "./agents.js";
import { projectService } from "./projects.js";
import { secretService } from "./secrets.js";
@@ -541,106 +541,20 @@ function parseFrontmatterMarkdown(raw: string): { frontmatter: Record<string, un
};
}
async function fetchText(url: string, authToken?: string) {
const response = await ghFetch(url, undefined, authToken);
async function fetchPlainText(url: string) {
let response: Response;
try {
response = await fetch(url);
} catch {
const hostname = (() => { try { return new URL(url).hostname; } catch { return url; } })();
throw unprocessable(`Could not connect to ${hostname}`);
}
if (!response.ok) {
throw unprocessable(`Failed to fetch ${url}: ${response.status}`);
}
return response.text();
}
async function fetchJson<T>(url: string, authToken?: string): Promise<T> {
const response = await ghFetch(url, {
headers: {
accept: "application/vnd.github+json",
},
}, authToken);
if (!response.ok) {
throw unprocessable(`Failed to fetch ${url}: ${response.status}`);
}
return response.json() as Promise<T>;
}
async function resolveGitHubDefaultBranch(owner: string, repo: string, apiBase: string, authToken?: string) {
const response = await fetchJson<{ default_branch?: string }>(
`${apiBase}/repos/${owner}/${repo}`,
authToken,
);
return asString(response.default_branch) ?? "main";
}
async function resolveGitHubCommitSha(owner: string, repo: string, ref: string, apiBase: string, authToken?: string) {
const response = await fetchJson<{ sha?: string }>(
`${apiBase}/repos/${owner}/${repo}/commits/${encodeURIComponent(ref)}`,
authToken,
);
const sha = asString(response.sha);
if (!sha) {
throw unprocessable(`Failed to resolve ref ${ref}`);
}
return sha;
}
function parseGitHubSourceUrl(rawUrl: string) {
const url = new URL(rawUrl);
if (url.protocol !== "https:") {
throw unprocessable("Source URL must use HTTPS");
}
const parts = url.pathname.split("/").filter(Boolean);
if (parts.length < 2) {
throw unprocessable("Invalid git source URL");
}
const owner = parts[0]!;
const repo = parts[1]!.replace(/\.git$/i, "");
const family = inferGitHostFamily(url.hostname);
let ref = "main";
let basePath = "";
let filePath: string | null = null;
let explicitRef = false;
if (family === "github") {
if (parts[2] === "tree") {
ref = parts[3] ?? "main";
basePath = parts.slice(4).join("/");
explicitRef = true;
} else if (parts[2] === "blob") {
ref = parts[3] ?? "main";
filePath = parts.slice(4).join("/");
basePath = filePath ? path.posix.dirname(filePath) : "";
explicitRef = true;
}
} else if (parts[2] === "src" && (parts[3] === "branch" || parts[3] === "commit" || parts[3] === "tag")) {
// Gitea/Forgejo web URLs: /{owner}/{repo}/src/{branch|commit|tag}/{ref}/{path}
ref = parts[4] ?? "main";
const tail = parts.slice(5);
const tailJoined = tail.join("/");
if (tail.length > 0 && /\.[A-Za-z0-9]+$/.test(tail[tail.length - 1]!)) {
filePath = tailJoined;
basePath = path.posix.dirname(tailJoined);
} else {
basePath = tailJoined;
}
explicitRef = true;
}
return { hostname: url.hostname, owner, repo, ref, basePath, filePath, explicitRef };
}
async function resolveGitHubPinnedRef(parsed: ReturnType<typeof parseGitHubSourceUrl>, authToken?: string) {
const apiBase = gitHubApiBase(parsed.hostname);
if (/^[0-9a-f]{40}$/i.test(parsed.ref.trim())) {
return {
pinnedRef: parsed.ref,
trackingRef: parsed.explicitRef ? parsed.ref : null,
};
}
const trackingRef = parsed.explicitRef
? parsed.ref
: await resolveGitHubDefaultBranch(parsed.owner, parsed.repo, apiBase, authToken);
const pinnedRef = await resolveGitHubCommitSha(parsed.owner, parsed.repo, trackingRef, apiBase, authToken);
return { pinnedRef, trackingRef };
}
function extractCommandTokens(raw: string) {
const matches = raw.match(/"[^"]*"|'[^']*'|\S+/g) ?? [];
@@ -1081,20 +995,12 @@ async function readUrlSkillImports(
return segments.length >= 2 && !parsed.pathname.endsWith(".md");
} catch { return false; } })();
if (looksLikeRepoUrl) {
const parsed = parseGitHubSourceUrl(url);
const apiBase = gitHubApiBase(parsed.hostname);
const { pinnedRef, trackingRef } = await resolveGitHubPinnedRef(parsed, authToken);
let ref = pinnedRef;
const tree = await fetchJson<{ tree?: Array<{ path: string; type: string }> }>(
`${apiBase}/repos/${parsed.owner}/${parsed.repo}/git/trees/${ref}?recursive=1`,
authToken,
).catch(() => {
throw unprocessable(`Failed to read GitHub tree for ${url}`);
});
const allPaths = (tree.tree ?? [])
.filter((entry) => entry.type === "blob")
.map((entry) => entry.path)
.filter((entry): entry is string => typeof entry === "string");
const parsed = parseGitSourceUrl(url);
const resolved = await resolveGitRef(parsed, authToken);
const snapshot = await openRepoSnapshot(parsed, resolved.trackingRef, resolved.pinnedSha, authToken);
const ref = snapshot.sha;
const trackingRef = resolved.trackingRef;
const allPaths = await snapshot.listFiles();
const basePrefix = parsed.basePath ? `${parsed.basePath.replace(/^\/+|\/+$/g, "")}/` : "";
const scopedPaths = basePrefix
? allPaths.filter((entry) => entry.startsWith(basePrefix))
@@ -1108,13 +1014,13 @@ async function readUrlSkillImports(
);
if (skillPaths.length === 0) {
throw unprocessable(
"No SKILL.md files were found in the provided GitHub source.",
"No SKILL.md files were found in the provided source.",
);
}
const skills: ImportedSkill[] = [];
for (const relativeSkillPath of skillPaths) {
const repoSkillPath = basePrefix ? `${basePrefix}${relativeSkillPath}` : relativeSkillPath;
const markdown = await fetchText(resolveRawGitHubUrl(parsed.hostname, parsed.owner, parsed.repo, ref, repoSkillPath), authToken);
const markdown = await snapshot.readFile(repoSkillPath);
const parsedMarkdown = parseFrontmatterMarkdown(markdown);
const skillDir = path.posix.dirname(relativeSkillPath);
const slug = deriveImportedSkillSlug(parsedMarkdown.frontmatter, path.posix.basename(skillDir));
@@ -1168,15 +1074,15 @@ async function readUrlSkillImports(
if (skills.length === 0) {
throw unprocessable(
requestedSkillSlug
? `Skill ${requestedSkillSlug} was not found in the provided GitHub source.`
: "No SKILL.md files were found in the provided GitHub source.",
? `Skill ${requestedSkillSlug} was not found in the provided source.`
: "No SKILL.md files were found in the provided source.",
);
}
return { skills, warnings };
}
if (url.startsWith("http://") || url.startsWith("https://")) {
const markdown = await fetchText(url, authToken);
const markdown = await fetchPlainText(url);
const parsedMarkdown = parseFrontmatterMarkdown(markdown);
const urlObj = new URL(url);
const fileName = path.posix.basename(urlObj.pathname);
@@ -1801,9 +1707,18 @@ export function companySkillService(db: Db) {
}
const hostname = asString(metadata.hostname) || "github.com";
const apiBase = gitHubApiBase(hostname);
const authToken = await resolveSkillAuthToken(companyId, skill);
const latestRef = await resolveGitHubCommitSha(owner, repo, trackingRef, apiBase, authToken);
const parsed: ParsedGitSource = {
cloneUrl: `https://${hostname}/${owner}/${repo}.git`,
hostname,
owner,
repo,
ref: trackingRef,
basePath: "",
filePath: null,
explicitRef: true,
};
const { pinnedSha: latestRef } = await resolveGitRef(parsed, authToken);
return {
supported: true,
reason: null,
@@ -1843,13 +1758,25 @@ export function companySkillService(db: Db) {
const repo = asString(metadata.repo);
const hostname = asString(metadata.hostname) || "github.com";
const ref = skill.sourceRef ?? asString(metadata.ref) ?? "main";
const trackingRef = asString(metadata.trackingRef);
const repoSkillDir = normalizeGitHubSkillDirectory(asString(metadata.repoSkillDir), skill.slug);
if (!owner || !repo) {
throw unprocessable("Skill source metadata is incomplete.");
}
const authToken = await resolveSkillAuthToken(companyId, skill);
const repoPath = normalizePortablePath(path.posix.join(repoSkillDir, normalizedPath));
content = await fetchText(resolveRawGitHubUrl(hostname, owner, repo, ref, repoPath), authToken);
const parsedSource: ParsedGitSource = {
cloneUrl: `https://${hostname}/${owner}/${repo}.git`,
hostname,
owner,
repo,
ref,
basePath: repoSkillDir,
filePath: null,
explicitRef: true,
};
const snapshot: RepoSnapshot = await openRepoSnapshot(parsedSource, trackingRef ?? null, ref, authToken);
content = await snapshot.readFile(repoPath);
} else if (skill.sourceType === "url") {
if (normalizedPath !== "SKILL.md") {
throw notFound("This skill source only exposes SKILL.md");
+243
View File
@@ -0,0 +1,243 @@
import path from "path";
import git from "isomorphic-git";
import http from "isomorphic-git/http/node";
import { Volume, createFsFromVolume } from "memfs";
import { unprocessable } from "../errors.js";
export type ParsedGitSource = {
cloneUrl: string;
hostname: string;
owner: string;
repo: string;
ref: string | null;
basePath: string;
filePath: string | null;
explicitRef: boolean;
};
export type RefResolution = {
pinnedSha: string;
trackingRef: string | null;
};
export type RepoSnapshot = {
sha: string;
listFiles(): Promise<string[]>;
readFile(repoPath: string): Promise<string>;
};
const SHA_REGEX = /^[0-9a-f]{40}$/i;
export function buildCloneUrl(hostname: string, owner: string, repo: string): string {
return `https://${hostname}/${owner}/${repo}.git`;
}
export function parseGitSourceUrl(rawUrl: string): ParsedGitSource {
let url: URL;
try {
url = new URL(rawUrl);
} catch {
throw unprocessable("Invalid git source URL");
}
if (url.protocol !== "https:") {
throw unprocessable("Source URL must use HTTPS");
}
const segments = url.pathname.split("/").filter(Boolean);
if (segments.length < 2) {
throw unprocessable("Source URL must include an owner and repository");
}
const owner = segments[0]!;
const repo = segments[1]!.replace(/\.git$/i, "");
let ref: string | null = null;
let basePath = "";
let filePath: string | null = null;
let explicitRef = false;
let tail: string[] = [];
// Recognise common host-specific URL shapes so users can paste a tree/blob link.
if (segments[2] === "tree" || segments[2] === "blob") {
// github.com style
ref = segments[3] ?? null;
tail = segments.slice(4);
explicitRef = ref !== null;
} else if (segments[2] === "src" && (segments[3] === "branch" || segments[3] === "commit" || segments[3] === "tag")) {
// gitea / forgejo style
ref = segments[4] ?? null;
tail = segments.slice(5);
explicitRef = ref !== null;
} else if (segments[2] === "-" && (segments[3] === "tree" || segments[3] === "blob")) {
// gitlab style: /{owner}/{repo}/-/tree/{ref}/{path}
ref = segments[4] ?? null;
tail = segments.slice(5);
explicitRef = ref !== null;
} else if (segments[2] === "src" && segments.length >= 4) {
// bitbucket style: /{owner}/{repo}/src/{ref}/{path}
ref = segments[3] ?? null;
tail = segments.slice(4);
explicitRef = ref !== null;
}
if (segments[2] === "blob" || (segments[2] === "-" && segments[3] === "blob")) {
const joined = tail.join("/");
filePath = joined || null;
basePath = filePath ? path.posix.dirname(filePath) : "";
if (basePath === ".") basePath = "";
} else if (tail.length > 0) {
const joined = tail.join("/");
// Heuristic: if the last segment looks like a file (has an extension), treat as file
const last = tail[tail.length - 1]!;
if (/\.[A-Za-z0-9]+$/.test(last)) {
filePath = joined;
basePath = path.posix.dirname(joined);
if (basePath === ".") basePath = "";
} else {
basePath = joined;
}
}
return {
cloneUrl: buildCloneUrl(url.hostname, owner, repo),
hostname: url.hostname,
owner,
repo,
ref,
basePath,
filePath,
explicitRef,
};
}
function buildAuthCallback(authToken: string | undefined) {
if (!authToken) return undefined;
// Universal pattern: token-as-username works for GitHub PATs (classic and fine-grained),
// GitLab project/personal access tokens, Gitea/Forgejo tokens, and Bitbucket app passwords
// when used over the git smart-HTTP protocol.
return () => ({ username: authToken, password: "x-oauth-basic" });
}
async function withGitErrors<T>(label: string, fn: () => Promise<T>): Promise<T> {
try {
return await fn();
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
if (/HTTP Error: 401/i.test(message)) {
throw unprocessable(`${label}: authentication required or token rejected`);
}
if (/HTTP Error: 403/i.test(message)) {
throw unprocessable(`${label}: access forbidden`);
}
if (/HTTP Error: 404/i.test(message) || /repository not found/i.test(message)) {
throw unprocessable(`${label}: repository not found`);
}
if (/ENOTFOUND|EAI_AGAIN|ECONNREFUSED|ETIMEDOUT/i.test(message)) {
throw unprocessable(`${label}: could not connect to host`);
}
throw unprocessable(`${label}: ${message}`);
}
}
export async function resolveGitRef(
parsed: ParsedGitSource,
authToken?: string,
): Promise<RefResolution> {
const onAuth = buildAuthCallback(authToken);
if (parsed.ref && SHA_REGEX.test(parsed.ref.trim())) {
return {
pinnedSha: parsed.ref.trim().toLowerCase(),
trackingRef: parsed.explicitRef ? parsed.ref.trim() : null,
};
}
const refs = await withGitErrors(`Resolve refs for ${parsed.cloneUrl}`, () =>
git.listServerRefs({
http,
url: parsed.cloneUrl,
onAuth,
symrefs: true,
protocolVersion: 2,
}),
);
const findExact = (fullRef: string) => refs.find((r) => r.ref === fullRef);
if (!parsed.ref) {
const head = refs.find((r) => r.ref === "HEAD");
if (!head?.oid) {
throw unprocessable(`Could not determine default branch for ${parsed.cloneUrl}`);
}
const target = head.target?.replace(/^refs\/heads\//, "") ?? null;
return { pinnedSha: head.oid, trackingRef: target };
}
const wanted = parsed.ref.replace(/^refs\/(heads|tags)\//, "");
const branch = findExact(`refs/heads/${wanted}`);
if (branch?.oid) return { pinnedSha: branch.oid, trackingRef: wanted };
// Prefer the peeled (annotated) tag oid when present, else the tag object oid.
const peeled = findExact(`refs/tags/${wanted}^{}`);
if (peeled?.oid) return { pinnedSha: peeled.oid, trackingRef: wanted };
const tag = findExact(`refs/tags/${wanted}`);
if (tag?.oid) return { pinnedSha: tag.oid, trackingRef: wanted };
throw unprocessable(`Ref '${parsed.ref}' not found in ${parsed.cloneUrl}`);
}
export async function openRepoSnapshot(
parsed: ParsedGitSource,
trackingRef: string | null,
expectedSha: string,
authToken?: string,
): Promise<RepoSnapshot> {
const volume = new Volume();
const fs = createFsFromVolume(volume) as unknown as Parameters<typeof git.clone>[0]["fs"];
const dir = "/repo";
const onAuth = buildAuthCallback(authToken);
await withGitErrors(`Clone ${parsed.cloneUrl}`, async () => {
await git.clone({
fs,
http,
dir,
url: parsed.cloneUrl,
ref: trackingRef ?? expectedSha,
singleBranch: true,
depth: 1,
noCheckout: true,
onAuth,
});
});
// Re-resolve to the actual commit cloned. If upstream moved between resolveGitRef and
// clone, we trust what we cloned (snapshot is self-consistent).
const sha = await git.resolveRef({ fs, dir, ref: "HEAD" });
async function listFiles(): Promise<string[]> {
const out: string[] = [];
await git.walk({
fs,
dir,
trees: [git.TREE({ ref: sha })],
map: async (filepath, entries) => {
if (filepath === ".") return;
const entry = entries?.[0];
if (!entry) return;
const type = await entry.type();
if (type === "blob") {
out.push(filepath);
}
},
});
return out;
}
async function readFile(repoPath: string): Promise<string> {
const normalized = repoPath.replace(/^\/+/, "");
const { blob } = await git.readBlob({ fs, dir, oid: sha, filepath: normalized });
return new TextDecoder("utf-8").decode(blob);
}
return { sha, listFiles, readFile };
}