refactor(skills): replace per-host REST shims with git wire protocol

The skill import/update/file-read pipeline talked to host-specific REST
APIs (GitHub /commits/{ref}, /git/trees/{sha}, raw.githubusercontent.com)
and the recent Gitea support was a parallel shim on top of the same
pattern. The result was multiple ref-resolution shapes that needed
per-host branching, and on Gitea the /commits/{ref} endpoint returns
404 outright -- so even public Gitea/Forgejo repos failed to import.

Replace with a single git-source module backed by isomorphic-git +
memfs. It speaks the smart-HTTP protocol any sane git server already
serves:

- resolveGitRef: one listServerRefs call, no host API. Handles default
  branch (symref on HEAD), named branches, annotated/lightweight tags,
  and SHA passthrough.
- openRepoSnapshot: shallow singleBranch clone into an in-memory fs;
  listFiles via git.walk, readFile via git.readBlob. No tempdirs, no
  execFile, no per-host endpoints.
- Universal auth via onAuth (token-as-username) covering GitHub PATs,
  GitLab PATs, Gitea/Forgejo tokens.
- parseGitSourceUrl recognises github tree/blob, gitea src/branch|
  commit|tag, gitlab /-/tree, bitbucket /src/{ref} URL shapes plus
  bare clone URLs.

Stored skill metadata is unchanged (hostname/owner/repo/ref/trackingRef/
repoSkillDir), so existing rows keep working -- the clone URL is
derived at fetch time.

company-portability.ts still imports github-fetch.ts (same broken
pattern, separate feature). Left as a follow-up.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-16 09:16:00 -04:00
parent 818a8eade8
commit 0fd4e9c4d1
4 changed files with 830 additions and 117 deletions
+243
View File
@@ -0,0 +1,243 @@
import path from "path";
import git from "isomorphic-git";
import http from "isomorphic-git/http/node";
import { Volume, createFsFromVolume } from "memfs";
import { unprocessable } from "../errors.js";
export type ParsedGitSource = {
cloneUrl: string;
hostname: string;
owner: string;
repo: string;
ref: string | null;
basePath: string;
filePath: string | null;
explicitRef: boolean;
};
export type RefResolution = {
pinnedSha: string;
trackingRef: string | null;
};
export type RepoSnapshot = {
sha: string;
listFiles(): Promise<string[]>;
readFile(repoPath: string): Promise<string>;
};
const SHA_REGEX = /^[0-9a-f]{40}$/i;
export function buildCloneUrl(hostname: string, owner: string, repo: string): string {
return `https://${hostname}/${owner}/${repo}.git`;
}
export function parseGitSourceUrl(rawUrl: string): ParsedGitSource {
let url: URL;
try {
url = new URL(rawUrl);
} catch {
throw unprocessable("Invalid git source URL");
}
if (url.protocol !== "https:") {
throw unprocessable("Source URL must use HTTPS");
}
const segments = url.pathname.split("/").filter(Boolean);
if (segments.length < 2) {
throw unprocessable("Source URL must include an owner and repository");
}
const owner = segments[0]!;
const repo = segments[1]!.replace(/\.git$/i, "");
let ref: string | null = null;
let basePath = "";
let filePath: string | null = null;
let explicitRef = false;
let tail: string[] = [];
// Recognise common host-specific URL shapes so users can paste a tree/blob link.
if (segments[2] === "tree" || segments[2] === "blob") {
// github.com style
ref = segments[3] ?? null;
tail = segments.slice(4);
explicitRef = ref !== null;
} else if (segments[2] === "src" && (segments[3] === "branch" || segments[3] === "commit" || segments[3] === "tag")) {
// gitea / forgejo style
ref = segments[4] ?? null;
tail = segments.slice(5);
explicitRef = ref !== null;
} else if (segments[2] === "-" && (segments[3] === "tree" || segments[3] === "blob")) {
// gitlab style: /{owner}/{repo}/-/tree/{ref}/{path}
ref = segments[4] ?? null;
tail = segments.slice(5);
explicitRef = ref !== null;
} else if (segments[2] === "src" && segments.length >= 4) {
// bitbucket style: /{owner}/{repo}/src/{ref}/{path}
ref = segments[3] ?? null;
tail = segments.slice(4);
explicitRef = ref !== null;
}
if (segments[2] === "blob" || (segments[2] === "-" && segments[3] === "blob")) {
const joined = tail.join("/");
filePath = joined || null;
basePath = filePath ? path.posix.dirname(filePath) : "";
if (basePath === ".") basePath = "";
} else if (tail.length > 0) {
const joined = tail.join("/");
// Heuristic: if the last segment looks like a file (has an extension), treat as file
const last = tail[tail.length - 1]!;
if (/\.[A-Za-z0-9]+$/.test(last)) {
filePath = joined;
basePath = path.posix.dirname(joined);
if (basePath === ".") basePath = "";
} else {
basePath = joined;
}
}
return {
cloneUrl: buildCloneUrl(url.hostname, owner, repo),
hostname: url.hostname,
owner,
repo,
ref,
basePath,
filePath,
explicitRef,
};
}
function buildAuthCallback(authToken: string | undefined) {
if (!authToken) return undefined;
// Universal pattern: token-as-username works for GitHub PATs (classic and fine-grained),
// GitLab project/personal access tokens, Gitea/Forgejo tokens, and Bitbucket app passwords
// when used over the git smart-HTTP protocol.
return () => ({ username: authToken, password: "x-oauth-basic" });
}
async function withGitErrors<T>(label: string, fn: () => Promise<T>): Promise<T> {
try {
return await fn();
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
if (/HTTP Error: 401/i.test(message)) {
throw unprocessable(`${label}: authentication required or token rejected`);
}
if (/HTTP Error: 403/i.test(message)) {
throw unprocessable(`${label}: access forbidden`);
}
if (/HTTP Error: 404/i.test(message) || /repository not found/i.test(message)) {
throw unprocessable(`${label}: repository not found`);
}
if (/ENOTFOUND|EAI_AGAIN|ECONNREFUSED|ETIMEDOUT/i.test(message)) {
throw unprocessable(`${label}: could not connect to host`);
}
throw unprocessable(`${label}: ${message}`);
}
}
export async function resolveGitRef(
parsed: ParsedGitSource,
authToken?: string,
): Promise<RefResolution> {
const onAuth = buildAuthCallback(authToken);
if (parsed.ref && SHA_REGEX.test(parsed.ref.trim())) {
return {
pinnedSha: parsed.ref.trim().toLowerCase(),
trackingRef: parsed.explicitRef ? parsed.ref.trim() : null,
};
}
const refs = await withGitErrors(`Resolve refs for ${parsed.cloneUrl}`, () =>
git.listServerRefs({
http,
url: parsed.cloneUrl,
onAuth,
symrefs: true,
protocolVersion: 2,
}),
);
const findExact = (fullRef: string) => refs.find((r) => r.ref === fullRef);
if (!parsed.ref) {
const head = refs.find((r) => r.ref === "HEAD");
if (!head?.oid) {
throw unprocessable(`Could not determine default branch for ${parsed.cloneUrl}`);
}
const target = head.target?.replace(/^refs\/heads\//, "") ?? null;
return { pinnedSha: head.oid, trackingRef: target };
}
const wanted = parsed.ref.replace(/^refs\/(heads|tags)\//, "");
const branch = findExact(`refs/heads/${wanted}`);
if (branch?.oid) return { pinnedSha: branch.oid, trackingRef: wanted };
// Prefer the peeled (annotated) tag oid when present, else the tag object oid.
const peeled = findExact(`refs/tags/${wanted}^{}`);
if (peeled?.oid) return { pinnedSha: peeled.oid, trackingRef: wanted };
const tag = findExact(`refs/tags/${wanted}`);
if (tag?.oid) return { pinnedSha: tag.oid, trackingRef: wanted };
throw unprocessable(`Ref '${parsed.ref}' not found in ${parsed.cloneUrl}`);
}
export async function openRepoSnapshot(
parsed: ParsedGitSource,
trackingRef: string | null,
expectedSha: string,
authToken?: string,
): Promise<RepoSnapshot> {
const volume = new Volume();
const fs = createFsFromVolume(volume) as unknown as Parameters<typeof git.clone>[0]["fs"];
const dir = "/repo";
const onAuth = buildAuthCallback(authToken);
await withGitErrors(`Clone ${parsed.cloneUrl}`, async () => {
await git.clone({
fs,
http,
dir,
url: parsed.cloneUrl,
ref: trackingRef ?? expectedSha,
singleBranch: true,
depth: 1,
noCheckout: true,
onAuth,
});
});
// Re-resolve to the actual commit cloned. If upstream moved between resolveGitRef and
// clone, we trust what we cloned (snapshot is self-consistent).
const sha = await git.resolveRef({ fs, dir, ref: "HEAD" });
async function listFiles(): Promise<string[]> {
const out: string[] = [];
await git.walk({
fs,
dir,
trees: [git.TREE({ ref: sha })],
map: async (filepath, entries) => {
if (filepath === ".") return;
const entry = entries?.[0];
if (!entry) return;
const type = await entry.type();
if (type === "blob") {
out.push(filepath);
}
},
});
return out;
}
async function readFile(repoPath: string): Promise<string> {
const normalized = repoPath.replace(/^\/+/, "");
const { blob } = await git.readBlob({ fs, dir, oid: sha, filepath: normalized });
return new TextDecoder("utf-8").decode(blob);
}
return { sha, listFiles, readFile };
}