forked from farhoodlabs/paperclip
80f7d8270c
Mirrors the skills refactor: company-portability was the second user of
the per-host REST shim (its own parallel parseGitHubSourceUrl + fetch
helpers + raw.githubusercontent URL builder), so importing a company
package from a non-github URL hit the same Gitea 404 the skills path did.
- Extend git-source.ts:
- parseGitSourceUrl: also recognises query-string shape
(?ref=...&path=...) used by portability URLs, with precedence over
path-style segments when both are present.
- RepoSnapshot: add readBinary (Uint8Array for the company logo
fetch) and readFileOptional (null on NotFoundError, for the
COMPANY.md probe + main->master fallback).
- Rewrite resolveSource in company-portability.ts to open a single
in-memory snapshot per import and serve all reads (COMPANY.md,
candidate tree, includes, logo) from it. Drops fetchText/fetchJson/
fetchBinary/fetchOptionalText.
- parseGitHubSourceUrl stays exported with its original return shape
({hostname, owner, repo, ref, basePath, companyPath}) so the existing
test suite passes unchanged. It now delegates URL parsing to
parseGitSourceUrl and layers companyPath derivation on top.
- Delete server/src/services/github-fetch.ts: zero remaining callers.
Test coverage:
- 7 new git-source tests (query-string parse variants, query-string
precedence over path style, readBinary, readFileOptional NotFound
null + non-NotFound rethrow) — 34/34 passing.
- 52 existing company-portability tests still pass via the
parseGitHubSourceUrl shim contract.
- Smoke-tested end-to-end against https://git.farh.net/.../?ref=main:
ref resolves, snapshot opens, readFile/readBinary/readFileOptional
all return expected results.
Note: two pre-existing failures in company-skills-routes.test.ts
("does not expose a skill reference...") exist on dev too and are
unrelated to this change.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
283 lines
8.9 KiB
TypeScript
283 lines
8.9 KiB
TypeScript
import path from "path";
|
|
import git from "isomorphic-git";
|
|
import http from "isomorphic-git/http/node";
|
|
import { Volume, createFsFromVolume } from "memfs";
|
|
|
|
import { unprocessable } from "../errors.js";
|
|
|
|
export type ParsedGitSource = {
|
|
cloneUrl: string;
|
|
hostname: string;
|
|
owner: string;
|
|
repo: string;
|
|
ref: string | null;
|
|
basePath: string;
|
|
filePath: string | null;
|
|
explicitRef: boolean;
|
|
};
|
|
|
|
export type RefResolution = {
|
|
pinnedSha: string;
|
|
trackingRef: string | null;
|
|
};
|
|
|
|
export type RepoSnapshot = {
|
|
sha: string;
|
|
listFiles(): Promise<string[]>;
|
|
readFile(repoPath: string): Promise<string>;
|
|
readFileOptional(repoPath: string): Promise<string | null>;
|
|
readBinary(repoPath: string): Promise<Uint8Array>;
|
|
};
|
|
|
|
const SHA_REGEX = /^[0-9a-f]{40}$/i;
|
|
|
|
export function buildCloneUrl(hostname: string, owner: string, repo: string): string {
|
|
return `https://${hostname}/${owner}/${repo}.git`;
|
|
}
|
|
|
|
export function parseGitSourceUrl(rawUrl: string): ParsedGitSource {
|
|
let url: URL;
|
|
try {
|
|
url = new URL(rawUrl);
|
|
} catch {
|
|
throw unprocessable("Invalid git source URL");
|
|
}
|
|
if (url.protocol !== "https:") {
|
|
throw unprocessable("Source URL must use HTTPS");
|
|
}
|
|
const segments = url.pathname.split("/").filter(Boolean);
|
|
if (segments.length < 2) {
|
|
throw unprocessable("Source URL must include an owner and repository");
|
|
}
|
|
const owner = segments[0]!;
|
|
const repo = segments[1]!.replace(/\.git$/i, "");
|
|
|
|
// Query-string shape: /{owner}/{repo}?ref=...&path=...
|
|
// Used by company portability URLs. Takes precedence over path-based parsing
|
|
// so a URL with both shapes (rare) prefers the explicit query params.
|
|
const queryRef = url.searchParams.get("ref")?.trim() ?? null;
|
|
const queryPath = url.searchParams.get("path")?.trim() ?? null;
|
|
if (queryRef || queryPath) {
|
|
const normalizedPath = (queryPath ?? "").replace(/\\/g, "/").replace(/^\/+|\/+$/g, "");
|
|
return {
|
|
cloneUrl: buildCloneUrl(url.hostname, owner, repo),
|
|
hostname: url.hostname,
|
|
owner,
|
|
repo,
|
|
ref: queryRef || null,
|
|
basePath: normalizedPath,
|
|
filePath: null,
|
|
explicitRef: Boolean(queryRef),
|
|
};
|
|
}
|
|
|
|
let ref: string | null = null;
|
|
let basePath = "";
|
|
let filePath: string | null = null;
|
|
let explicitRef = false;
|
|
let tail: string[] = [];
|
|
|
|
// Recognise common host-specific URL shapes so users can paste a tree/blob link.
|
|
if (segments[2] === "tree" || segments[2] === "blob") {
|
|
// github.com style
|
|
ref = segments[3] ?? null;
|
|
tail = segments.slice(4);
|
|
explicitRef = ref !== null;
|
|
} else if (segments[2] === "src" && (segments[3] === "branch" || segments[3] === "commit" || segments[3] === "tag")) {
|
|
// gitea / forgejo style
|
|
ref = segments[4] ?? null;
|
|
tail = segments.slice(5);
|
|
explicitRef = ref !== null;
|
|
} else if (segments[2] === "-" && (segments[3] === "tree" || segments[3] === "blob")) {
|
|
// gitlab style: /{owner}/{repo}/-/tree/{ref}/{path}
|
|
ref = segments[4] ?? null;
|
|
tail = segments.slice(5);
|
|
explicitRef = ref !== null;
|
|
} else if (segments[2] === "src" && segments.length >= 4) {
|
|
// bitbucket style: /{owner}/{repo}/src/{ref}/{path}
|
|
ref = segments[3] ?? null;
|
|
tail = segments.slice(4);
|
|
explicitRef = ref !== null;
|
|
}
|
|
|
|
if (segments[2] === "blob" || (segments[2] === "-" && segments[3] === "blob")) {
|
|
const joined = tail.join("/");
|
|
filePath = joined || null;
|
|
basePath = filePath ? path.posix.dirname(filePath) : "";
|
|
if (basePath === ".") basePath = "";
|
|
} else if (tail.length > 0) {
|
|
const joined = tail.join("/");
|
|
// Heuristic: if the last segment looks like a file (has an extension), treat as file
|
|
const last = tail[tail.length - 1]!;
|
|
if (/\.[A-Za-z0-9]+$/.test(last)) {
|
|
filePath = joined;
|
|
basePath = path.posix.dirname(joined);
|
|
if (basePath === ".") basePath = "";
|
|
} else {
|
|
basePath = joined;
|
|
}
|
|
}
|
|
|
|
return {
|
|
cloneUrl: buildCloneUrl(url.hostname, owner, repo),
|
|
hostname: url.hostname,
|
|
owner,
|
|
repo,
|
|
ref,
|
|
basePath,
|
|
filePath,
|
|
explicitRef,
|
|
};
|
|
}
|
|
|
|
function buildAuthCallback(authToken: string | undefined) {
|
|
if (!authToken) return undefined;
|
|
// Universal pattern: token-as-username works for GitHub PATs (classic and fine-grained),
|
|
// GitLab project/personal access tokens, Gitea/Forgejo tokens, and Bitbucket app passwords
|
|
// when used over the git smart-HTTP protocol.
|
|
return () => ({ username: authToken, password: "x-oauth-basic" });
|
|
}
|
|
|
|
async function withGitErrors<T>(label: string, fn: () => Promise<T>): Promise<T> {
|
|
try {
|
|
return await fn();
|
|
} catch (err) {
|
|
const message = err instanceof Error ? err.message : String(err);
|
|
if (/HTTP Error: 401/i.test(message)) {
|
|
throw unprocessable(`${label}: authentication required or token rejected`);
|
|
}
|
|
if (/HTTP Error: 403/i.test(message)) {
|
|
throw unprocessable(`${label}: access forbidden`);
|
|
}
|
|
if (/HTTP Error: 404/i.test(message) || /repository not found/i.test(message)) {
|
|
throw unprocessable(`${label}: repository not found`);
|
|
}
|
|
if (/ENOTFOUND|EAI_AGAIN|ECONNREFUSED|ETIMEDOUT/i.test(message)) {
|
|
throw unprocessable(`${label}: could not connect to host`);
|
|
}
|
|
throw unprocessable(`${label}: ${message}`);
|
|
}
|
|
}
|
|
|
|
export async function resolveGitRef(
|
|
parsed: ParsedGitSource,
|
|
authToken?: string,
|
|
): Promise<RefResolution> {
|
|
const onAuth = buildAuthCallback(authToken);
|
|
|
|
if (parsed.ref && SHA_REGEX.test(parsed.ref.trim())) {
|
|
return {
|
|
pinnedSha: parsed.ref.trim().toLowerCase(),
|
|
trackingRef: parsed.explicitRef ? parsed.ref.trim() : null,
|
|
};
|
|
}
|
|
|
|
const refs = await withGitErrors(`Resolve refs for ${parsed.cloneUrl}`, () =>
|
|
git.listServerRefs({
|
|
http,
|
|
url: parsed.cloneUrl,
|
|
onAuth,
|
|
symrefs: true,
|
|
protocolVersion: 2,
|
|
}),
|
|
);
|
|
|
|
const findExact = (fullRef: string) => refs.find((r) => r.ref === fullRef);
|
|
|
|
if (!parsed.ref) {
|
|
const head = refs.find((r) => r.ref === "HEAD");
|
|
if (!head?.oid) {
|
|
throw unprocessable(`Could not determine default branch for ${parsed.cloneUrl}`);
|
|
}
|
|
const target = head.target?.replace(/^refs\/heads\//, "") ?? null;
|
|
return { pinnedSha: head.oid, trackingRef: target };
|
|
}
|
|
|
|
const wanted = parsed.ref.replace(/^refs\/(heads|tags)\//, "");
|
|
const branch = findExact(`refs/heads/${wanted}`);
|
|
if (branch?.oid) return { pinnedSha: branch.oid, trackingRef: wanted };
|
|
|
|
// Prefer the peeled (annotated) tag oid when present, else the tag object oid.
|
|
const peeled = findExact(`refs/tags/${wanted}^{}`);
|
|
if (peeled?.oid) return { pinnedSha: peeled.oid, trackingRef: wanted };
|
|
const tag = findExact(`refs/tags/${wanted}`);
|
|
if (tag?.oid) return { pinnedSha: tag.oid, trackingRef: wanted };
|
|
|
|
throw unprocessable(`Ref '${parsed.ref}' not found in ${parsed.cloneUrl}`);
|
|
}
|
|
|
|
export async function openRepoSnapshot(
|
|
parsed: ParsedGitSource,
|
|
trackingRef: string | null,
|
|
expectedSha: string,
|
|
authToken?: string,
|
|
): Promise<RepoSnapshot> {
|
|
const volume = new Volume();
|
|
const fs = createFsFromVolume(volume) as unknown as Parameters<typeof git.clone>[0]["fs"];
|
|
const dir = "/repo";
|
|
const onAuth = buildAuthCallback(authToken);
|
|
|
|
await withGitErrors(`Clone ${parsed.cloneUrl}`, async () => {
|
|
await git.clone({
|
|
fs,
|
|
http,
|
|
dir,
|
|
url: parsed.cloneUrl,
|
|
ref: trackingRef ?? expectedSha,
|
|
singleBranch: true,
|
|
depth: 1,
|
|
noCheckout: true,
|
|
onAuth,
|
|
});
|
|
});
|
|
|
|
// Re-resolve to the actual commit cloned. If upstream moved between resolveGitRef and
|
|
// clone, we trust what we cloned (snapshot is self-consistent).
|
|
const sha = await git.resolveRef({ fs, dir, ref: "HEAD" });
|
|
|
|
async function listFiles(): Promise<string[]> {
|
|
const out: string[] = [];
|
|
await git.walk({
|
|
fs,
|
|
dir,
|
|
trees: [git.TREE({ ref: sha })],
|
|
map: async (filepath, entries) => {
|
|
if (filepath === ".") return;
|
|
const entry = entries?.[0];
|
|
if (!entry) return;
|
|
const type = await entry.type();
|
|
if (type === "blob") {
|
|
out.push(filepath);
|
|
}
|
|
},
|
|
});
|
|
return out;
|
|
}
|
|
|
|
async function readBinary(repoPath: string): Promise<Uint8Array> {
|
|
const normalized = repoPath.replace(/^\/+/, "");
|
|
const { blob } = await git.readBlob({ fs, dir, oid: sha, filepath: normalized });
|
|
return blob;
|
|
}
|
|
|
|
async function readFile(repoPath: string): Promise<string> {
|
|
const blob = await readBinary(repoPath);
|
|
return new TextDecoder("utf-8").decode(blob);
|
|
}
|
|
|
|
async function readFileOptional(repoPath: string): Promise<string | null> {
|
|
try {
|
|
return await readFile(repoPath);
|
|
} catch (err) {
|
|
// isomorphic-git throws NotFoundError when the path is missing from the tree.
|
|
const name = (err as { code?: string; name?: string } | null)?.code
|
|
?? (err as { name?: string } | null)?.name
|
|
?? "";
|
|
if (/NotFound/i.test(name)) return null;
|
|
throw err;
|
|
}
|
|
}
|
|
|
|
return { sha, listFiles, readFile, readFileOptional, readBinary };
|
|
}
|