refactor(portability): migrate to git-source; delete github-fetch.ts
Mirrors the skills refactor: company-portability was the second user of
the per-host REST shim (its own parallel parseGitHubSourceUrl + fetch
helpers + raw.githubusercontent URL builder), so importing a company
package from a non-github URL hit the same Gitea 404 the skills path did.
- Extend git-source.ts:
- parseGitSourceUrl: also recognises query-string shape
(?ref=...&path=...) used by portability URLs, with precedence over
path-style segments when both are present.
- RepoSnapshot: add readBinary (Uint8Array for the company logo
fetch) and readFileOptional (null on NotFoundError, for the
COMPANY.md probe + main->master fallback).
- Rewrite resolveSource in company-portability.ts to open a single
in-memory snapshot per import and serve all reads (COMPANY.md,
candidate tree, includes, logo) from it. Drops fetchText/fetchJson/
fetchBinary/fetchOptionalText.
- parseGitHubSourceUrl stays exported with its original return shape
({hostname, owner, repo, ref, basePath, companyPath}) so the existing
test suite passes unchanged. It now delegates URL parsing to
parseGitSourceUrl and layers companyPath derivation on top.
- Delete server/src/services/github-fetch.ts: zero remaining callers.
Test coverage:
- 7 new git-source tests (query-string parse variants, query-string
precedence over path style, readBinary, readFileOptional NotFound
null + non-NotFound rethrow) — 34/34 passing.
- 52 existing company-portability tests still pass via the
parseGitHubSourceUrl shim contract.
- Smoke-tested end-to-end against https://git.farh.net/.../?ref=main:
ref resolves, snapshot opens, readFile/readBinary/readFileOptional
all return expected results.
Note: two pre-existing failures in company-skills-routes.test.ts
("does not expose a skill reference...") exist on dev too and are
unrelated to this change.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -145,6 +145,43 @@ describe("parseGitSourceUrl", () => {
|
||||
it("rejects malformed URLs", () => {
|
||||
expect(() => parseGitSourceUrl("not a url")).toThrow();
|
||||
});
|
||||
|
||||
it("parses a query-string URL with ?ref= and ?path=", () => {
|
||||
expect(
|
||||
parseGitSourceUrl("https://github.com/o/r?ref=feature%2Fdemo&path=subdir"),
|
||||
).toMatchObject({
|
||||
cloneUrl: "https://github.com/o/r.git",
|
||||
ref: "feature/demo",
|
||||
basePath: "subdir",
|
||||
filePath: null,
|
||||
explicitRef: true,
|
||||
});
|
||||
});
|
||||
|
||||
it("parses a query-string URL with only ?ref=", () => {
|
||||
expect(parseGitSourceUrl("https://github.com/o/r?ref=develop")).toMatchObject({
|
||||
ref: "develop",
|
||||
basePath: "",
|
||||
explicitRef: true,
|
||||
});
|
||||
});
|
||||
|
||||
it("parses a query-string URL with only ?path=", () => {
|
||||
expect(parseGitSourceUrl("https://github.com/o/r?path=sub")).toMatchObject({
|
||||
ref: null,
|
||||
basePath: "sub",
|
||||
explicitRef: false,
|
||||
});
|
||||
});
|
||||
|
||||
it("query-string parsing takes precedence over path-style segments", () => {
|
||||
expect(
|
||||
parseGitSourceUrl("https://github.com/o/r/tree/main/old?ref=newref&path=newpath"),
|
||||
).toMatchObject({
|
||||
ref: "newref",
|
||||
basePath: "newpath",
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("buildCloneUrl", () => {
|
||||
@@ -333,4 +370,41 @@ describe("openRepoSnapshot", () => {
|
||||
openRepoSnapshot(parsed, "main", "1111111111111111111111111111111111111111"),
|
||||
).rejects.toThrow(/repository not found/i);
|
||||
});
|
||||
|
||||
it("readBinary returns the raw blob bytes", async () => {
|
||||
cloneFn.mockResolvedValue(undefined);
|
||||
resolveRefFn.mockResolvedValue("ffffffffffffffffffffffffffffffffffffffff");
|
||||
walkFn.mockImplementation(async () => {});
|
||||
const bytes = new Uint8Array([0x89, 0x50, 0x4e, 0x47]);
|
||||
readBlobFn.mockResolvedValue({ blob: bytes });
|
||||
|
||||
const parsed = parseGitSourceUrl("https://git.example.com/o/r");
|
||||
const snap = await openRepoSnapshot(parsed, "main", "ffffffffffffffffffffffffffffffffffffffff");
|
||||
const result = await snap.readBinary("logo.png");
|
||||
expect(result).toBe(bytes);
|
||||
});
|
||||
|
||||
it("readFileOptional returns null on NotFoundError", async () => {
|
||||
cloneFn.mockResolvedValue(undefined);
|
||||
resolveRefFn.mockResolvedValue("ffffffffffffffffffffffffffffffffffffffff");
|
||||
walkFn.mockImplementation(async () => {});
|
||||
const err = Object.assign(new Error("missing"), { code: "NotFoundError" });
|
||||
readBlobFn.mockRejectedValue(err);
|
||||
|
||||
const parsed = parseGitSourceUrl("https://git.example.com/o/r");
|
||||
const snap = await openRepoSnapshot(parsed, "main", "ffffffffffffffffffffffffffffffffffffffff");
|
||||
const result = await snap.readFileOptional("missing.md");
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
|
||||
it("readFileOptional rethrows non-NotFound errors", async () => {
|
||||
cloneFn.mockResolvedValue(undefined);
|
||||
resolveRefFn.mockResolvedValue("ffffffffffffffffffffffffffffffffffffffff");
|
||||
walkFn.mockImplementation(async () => {});
|
||||
readBlobFn.mockRejectedValue(new Error("disk explosion"));
|
||||
|
||||
const parsed = parseGitSourceUrl("https://git.example.com/o/r");
|
||||
const snap = await openRepoSnapshot(parsed, "main", "ffffffffffffffffffffffffffffffffffffffff");
|
||||
await expect(snap.readFileOptional("any.md")).rejects.toThrow(/disk explosion/);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -57,7 +57,7 @@ import {
|
||||
import { requireOpenCodeModelId } from "@paperclipai/adapter-opencode-local/server";
|
||||
import { findServerAdapter } from "../adapters/index.js";
|
||||
import { forbidden, HttpError, notFound, unprocessable } from "../errors.js";
|
||||
import { ghFetch, gitHubApiBase, resolveRawGitHubUrl } from "./github-fetch.js";
|
||||
import { openRepoSnapshot, parseGitSourceUrl, resolveGitRef, type RepoSnapshot } from "./git-source.js";
|
||||
import type { StorageService } from "../storage/types.js";
|
||||
import { accessService } from "./access.js";
|
||||
import { agentService } from "./agents.js";
|
||||
@@ -2339,42 +2339,6 @@ function parseFrontmatterMarkdown(raw: string): MarkdownDoc {
|
||||
};
|
||||
}
|
||||
|
||||
async function fetchText(url: string) {
|
||||
const response = await ghFetch(url);
|
||||
if (!response.ok) {
|
||||
throw unprocessable(`Failed to fetch ${url}: ${response.status}`);
|
||||
}
|
||||
return response.text();
|
||||
}
|
||||
|
||||
async function fetchOptionalText(url: string) {
|
||||
const response = await ghFetch(url);
|
||||
if (response.status === 404) return null;
|
||||
if (!response.ok) {
|
||||
throw unprocessable(`Failed to fetch ${url}: ${response.status}`);
|
||||
}
|
||||
return response.text();
|
||||
}
|
||||
|
||||
async function fetchBinary(url: string) {
|
||||
const response = await ghFetch(url);
|
||||
if (!response.ok) {
|
||||
throw unprocessable(`Failed to fetch ${url}: ${response.status}`);
|
||||
}
|
||||
return Buffer.from(await response.arrayBuffer());
|
||||
}
|
||||
|
||||
async function fetchJson<T>(url: string): Promise<T> {
|
||||
const response = await ghFetch(url, {
|
||||
headers: {
|
||||
accept: "application/vnd.github+json",
|
||||
},
|
||||
});
|
||||
if (!response.ok) {
|
||||
throw unprocessable(`Failed to fetch ${url}: ${response.status}`);
|
||||
}
|
||||
return response.json() as Promise<T>;
|
||||
}
|
||||
|
||||
function dedupeEnvInputs(values: CompanyPortabilityManifest["envInputs"]) {
|
||||
const seen = new Set<string>();
|
||||
@@ -2864,52 +2828,37 @@ function normalizeGitHubSourcePath(value: string | null | undefined) {
|
||||
|
||||
export function parseGitHubSourceUrl(rawUrl: string) {
|
||||
const url = new URL(rawUrl);
|
||||
if (url.protocol !== "https:") {
|
||||
throw unprocessable("GitHub source URL must use HTTPS");
|
||||
}
|
||||
const hostname = url.hostname;
|
||||
const parts = url.pathname.split("/").filter(Boolean);
|
||||
if (parts.length < 2) {
|
||||
throw unprocessable("Invalid GitHub URL");
|
||||
}
|
||||
const owner = parts[0]!;
|
||||
const repo = parts[1]!.replace(/\.git$/i, "");
|
||||
const queryRef = url.searchParams.get("ref")?.trim();
|
||||
const queryPath = normalizeGitHubSourcePath(url.searchParams.get("path"));
|
||||
// Handle the portability-specific companyPath query param before delegating,
|
||||
// since git-source has no notion of it.
|
||||
const queryCompanyPath = normalizeGitHubSourcePath(url.searchParams.get("companyPath"));
|
||||
if (queryRef || queryPath || queryCompanyPath) {
|
||||
const companyPath = queryCompanyPath || [queryPath, "COMPANY.md"].filter(Boolean).join("/") || "COMPANY.md";
|
||||
let basePath = queryPath;
|
||||
if (!basePath && companyPath !== "COMPANY.md") {
|
||||
basePath = path.posix.dirname(companyPath);
|
||||
if (basePath === ".") basePath = "";
|
||||
|
||||
const parsed = parseGitSourceUrl(rawUrl);
|
||||
|
||||
let companyPath: string;
|
||||
let basePath = parsed.basePath;
|
||||
if (queryCompanyPath) {
|
||||
companyPath = queryCompanyPath;
|
||||
if (!basePath) {
|
||||
const derived = path.posix.dirname(companyPath);
|
||||
basePath = derived === "." ? "" : derived;
|
||||
}
|
||||
return {
|
||||
hostname,
|
||||
owner,
|
||||
repo,
|
||||
ref: queryRef || "main",
|
||||
basePath,
|
||||
companyPath,
|
||||
};
|
||||
} else if (parsed.filePath) {
|
||||
// blob-style URL pointed directly at a file
|
||||
companyPath = parsed.filePath;
|
||||
} else if (basePath) {
|
||||
companyPath = `${basePath}/COMPANY.md`;
|
||||
} else {
|
||||
companyPath = "COMPANY.md";
|
||||
}
|
||||
let ref = "main";
|
||||
let basePath = "";
|
||||
let companyPath = "COMPANY.md";
|
||||
if (parts[2] === "tree") {
|
||||
ref = parts[3] ?? "main";
|
||||
basePath = parts.slice(4).join("/");
|
||||
} else if (parts[2] === "blob") {
|
||||
ref = parts[3] ?? "main";
|
||||
const blobPath = parts.slice(4).join("/");
|
||||
if (!blobPath) {
|
||||
throw unprocessable("Invalid GitHub blob URL");
|
||||
}
|
||||
companyPath = blobPath;
|
||||
basePath = path.posix.dirname(blobPath);
|
||||
if (basePath === ".") basePath = "";
|
||||
}
|
||||
return { hostname, owner, repo, ref, basePath, companyPath };
|
||||
|
||||
return {
|
||||
hostname: parsed.hostname,
|
||||
owner: parsed.owner,
|
||||
repo: parsed.repo,
|
||||
ref: parsed.ref ?? "main",
|
||||
basePath,
|
||||
companyPath,
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -3013,30 +2962,38 @@ export function companyPortabilityService(db: Db, storage?: StorageService) {
|
||||
);
|
||||
}
|
||||
|
||||
const parsed = parseGitHubSourceUrl(source.url);
|
||||
let ref = parsed.ref;
|
||||
const sourceUrl = source.url;
|
||||
const parsed = parseGitHubSourceUrl(sourceUrl);
|
||||
const warnings: string[] = [];
|
||||
const companyRelativePath = parsed.companyPath === "COMPANY.md"
|
||||
? [parsed.basePath, "COMPANY.md"].filter(Boolean).join("/")
|
||||
: parsed.companyPath;
|
||||
|
||||
async function openSnapshot(refName: string): Promise<RepoSnapshot> {
|
||||
const ps = parseGitSourceUrl(sourceUrl);
|
||||
const wanted = { ...ps, ref: refName, explicitRef: true };
|
||||
const resolved = await resolveGitRef(wanted);
|
||||
return openRepoSnapshot(wanted, resolved.trackingRef, resolved.pinnedSha);
|
||||
}
|
||||
|
||||
let ref = parsed.ref;
|
||||
let snapshot: RepoSnapshot;
|
||||
let companyMarkdown: string | null = null;
|
||||
try {
|
||||
companyMarkdown = await fetchOptionalText(
|
||||
resolveRawGitHubUrl(parsed.hostname, parsed.owner, parsed.repo, ref, companyRelativePath),
|
||||
);
|
||||
snapshot = await openSnapshot(ref);
|
||||
companyMarkdown = await snapshot.readFileOptional(companyRelativePath);
|
||||
} catch (err) {
|
||||
if (ref === "main") {
|
||||
ref = "master";
|
||||
warnings.push("GitHub ref main not found; falling back to master.");
|
||||
companyMarkdown = await fetchOptionalText(
|
||||
resolveRawGitHubUrl(parsed.hostname, parsed.owner, parsed.repo, ref, companyRelativePath),
|
||||
);
|
||||
warnings.push("Git ref main not found; falling back to master.");
|
||||
snapshot = await openSnapshot(ref);
|
||||
companyMarkdown = await snapshot.readFileOptional(companyRelativePath);
|
||||
} else {
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
if (!companyMarkdown) {
|
||||
throw unprocessable("GitHub company package is missing COMPANY.md");
|
||||
throw unprocessable("Git company package is missing COMPANY.md");
|
||||
}
|
||||
|
||||
const companyPath = parsed.companyPath === "COMPANY.md"
|
||||
@@ -3045,31 +3002,22 @@ export function companyPortabilityService(db: Db, storage?: StorageService) {
|
||||
const files: Record<string, CompanyPortabilityFileEntry> = {
|
||||
[companyPath]: companyMarkdown,
|
||||
};
|
||||
const apiBase = gitHubApiBase(parsed.hostname);
|
||||
const tree = await fetchJson<{ tree?: Array<{ path: string; type: string }> }>(
|
||||
`${apiBase}/repos/${parsed.owner}/${parsed.repo}/git/trees/${ref}?recursive=1`,
|
||||
).catch(() => ({ tree: [] }));
|
||||
const basePrefix = parsed.basePath ? `${parsed.basePath.replace(/^\/+|\/+$/g, "")}/` : "";
|
||||
const candidatePaths = (tree.tree ?? [])
|
||||
.filter((entry) => entry.type === "blob")
|
||||
.map((entry) => entry.path)
|
||||
.filter((entry): entry is string => typeof entry === "string")
|
||||
.filter((entry) => {
|
||||
if (basePrefix && !entry.startsWith(basePrefix)) return false;
|
||||
const relative = basePrefix ? entry.slice(basePrefix.length) : entry;
|
||||
return (
|
||||
relative.endsWith(".md") ||
|
||||
relative.startsWith("skills/") ||
|
||||
relative === ".paperclip.yaml" ||
|
||||
relative === ".paperclip.yml"
|
||||
);
|
||||
});
|
||||
const allPaths = await snapshot.listFiles();
|
||||
const candidatePaths = allPaths.filter((entry) => {
|
||||
if (basePrefix && !entry.startsWith(basePrefix)) return false;
|
||||
const relative = basePrefix ? entry.slice(basePrefix.length) : entry;
|
||||
return (
|
||||
relative.endsWith(".md") ||
|
||||
relative.startsWith("skills/") ||
|
||||
relative === ".paperclip.yaml" ||
|
||||
relative === ".paperclip.yml"
|
||||
);
|
||||
});
|
||||
for (const repoPath of candidatePaths) {
|
||||
const relativePath = basePrefix ? repoPath.slice(basePrefix.length) : repoPath;
|
||||
if (files[relativePath] !== undefined) continue;
|
||||
files[normalizePortablePath(relativePath)] = await fetchText(
|
||||
resolveRawGitHubUrl(parsed.hostname, parsed.owner, parsed.repo, ref, repoPath),
|
||||
);
|
||||
files[normalizePortablePath(relativePath)] = await snapshot.readFile(repoPath);
|
||||
}
|
||||
const companyDoc = parseFrontmatterMarkdown(companyMarkdown);
|
||||
const includeEntries = readIncludeEntries(companyDoc.frontmatter);
|
||||
@@ -3078,9 +3026,7 @@ export function companyPortabilityService(db: Db, storage?: StorageService) {
|
||||
const relativePath = normalizePortablePath(includeEntry.path);
|
||||
if (files[relativePath] !== undefined) continue;
|
||||
if (!(repoPath.endsWith(".md") || repoPath.endsWith(".yaml") || repoPath.endsWith(".yml"))) continue;
|
||||
files[relativePath] = await fetchText(
|
||||
resolveRawGitHubUrl(parsed.hostname, parsed.owner, parsed.repo, ref, repoPath),
|
||||
);
|
||||
files[relativePath] = await snapshot.readFile(repoPath);
|
||||
}
|
||||
|
||||
const resolved = buildManifestFromPackageFiles(files);
|
||||
@@ -3088,12 +3034,13 @@ export function companyPortabilityService(db: Db, storage?: StorageService) {
|
||||
if (companyLogoPath && !resolved.files[companyLogoPath]) {
|
||||
const repoPath = [parsed.basePath, companyLogoPath].filter(Boolean).join("/");
|
||||
try {
|
||||
const binary = await fetchBinary(
|
||||
resolveRawGitHubUrl(parsed.hostname, parsed.owner, parsed.repo, ref, repoPath),
|
||||
const binary = await snapshot.readBinary(repoPath);
|
||||
resolved.files[companyLogoPath] = bufferToPortableBinaryFile(
|
||||
Buffer.from(binary),
|
||||
inferContentTypeFromPath(companyLogoPath),
|
||||
);
|
||||
resolved.files[companyLogoPath] = bufferToPortableBinaryFile(binary, inferContentTypeFromPath(companyLogoPath));
|
||||
} catch (err) {
|
||||
warnings.push(`Failed to fetch company logo ${companyLogoPath} from GitHub: ${err instanceof Error ? err.message : String(err)}`);
|
||||
warnings.push(`Failed to fetch company logo ${companyLogoPath} from git: ${err instanceof Error ? err.message : String(err)}`);
|
||||
}
|
||||
}
|
||||
resolved.warnings.unshift(...warnings);
|
||||
|
||||
@@ -25,6 +25,8 @@ export type RepoSnapshot = {
|
||||
sha: string;
|
||||
listFiles(): Promise<string[]>;
|
||||
readFile(repoPath: string): Promise<string>;
|
||||
readFileOptional(repoPath: string): Promise<string | null>;
|
||||
readBinary(repoPath: string): Promise<Uint8Array>;
|
||||
};
|
||||
|
||||
const SHA_REGEX = /^[0-9a-f]{40}$/i;
|
||||
@@ -50,6 +52,25 @@ export function parseGitSourceUrl(rawUrl: string): ParsedGitSource {
|
||||
const owner = segments[0]!;
|
||||
const repo = segments[1]!.replace(/\.git$/i, "");
|
||||
|
||||
// Query-string shape: /{owner}/{repo}?ref=...&path=...
|
||||
// Used by company portability URLs. Takes precedence over path-based parsing
|
||||
// so a URL with both shapes (rare) prefers the explicit query params.
|
||||
const queryRef = url.searchParams.get("ref")?.trim() ?? null;
|
||||
const queryPath = url.searchParams.get("path")?.trim() ?? null;
|
||||
if (queryRef || queryPath) {
|
||||
const normalizedPath = (queryPath ?? "").replace(/\\/g, "/").replace(/^\/+|\/+$/g, "");
|
||||
return {
|
||||
cloneUrl: buildCloneUrl(url.hostname, owner, repo),
|
||||
hostname: url.hostname,
|
||||
owner,
|
||||
repo,
|
||||
ref: queryRef || null,
|
||||
basePath: normalizedPath,
|
||||
filePath: null,
|
||||
explicitRef: Boolean(queryRef),
|
||||
};
|
||||
}
|
||||
|
||||
let ref: string | null = null;
|
||||
let basePath = "";
|
||||
let filePath: string | null = null;
|
||||
@@ -233,11 +254,29 @@ export async function openRepoSnapshot(
|
||||
return out;
|
||||
}
|
||||
|
||||
async function readFile(repoPath: string): Promise<string> {
|
||||
async function readBinary(repoPath: string): Promise<Uint8Array> {
|
||||
const normalized = repoPath.replace(/^\/+/, "");
|
||||
const { blob } = await git.readBlob({ fs, dir, oid: sha, filepath: normalized });
|
||||
return blob;
|
||||
}
|
||||
|
||||
async function readFile(repoPath: string): Promise<string> {
|
||||
const blob = await readBinary(repoPath);
|
||||
return new TextDecoder("utf-8").decode(blob);
|
||||
}
|
||||
|
||||
return { sha, listFiles, readFile };
|
||||
async function readFileOptional(repoPath: string): Promise<string | null> {
|
||||
try {
|
||||
return await readFile(repoPath);
|
||||
} catch (err) {
|
||||
// isomorphic-git throws NotFoundError when the path is missing from the tree.
|
||||
const name = (err as { code?: string; name?: string } | null)?.code
|
||||
?? (err as { name?: string } | null)?.name
|
||||
?? "";
|
||||
if (/NotFound/i.test(name)) return null;
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
return { sha, listFiles, readFile, readFileOptional, readBinary };
|
||||
}
|
||||
|
||||
@@ -1,38 +0,0 @@
|
||||
import { unprocessable } from "../errors.js";
|
||||
|
||||
export type GitHostFamily = "github" | "gitea";
|
||||
|
||||
export function inferGitHostFamily(hostname: string): GitHostFamily {
|
||||
const h = hostname.toLowerCase();
|
||||
if (h === "github.com" || h === "www.github.com") return "github";
|
||||
return "gitea";
|
||||
}
|
||||
|
||||
export function gitHubApiBase(hostname: string) {
|
||||
return inferGitHostFamily(hostname) === "github"
|
||||
? "https://api.github.com"
|
||||
: `https://${hostname}/api/v1`;
|
||||
}
|
||||
|
||||
export function resolveRawGitHubUrl(hostname: string, owner: string, repo: string, ref: string, filePath: string) {
|
||||
const p = filePath.replace(/^\/+/, "");
|
||||
if (inferGitHostFamily(hostname) === "github") {
|
||||
return `https://raw.githubusercontent.com/${owner}/${repo}/${ref}/${p}`;
|
||||
}
|
||||
return `https://${hostname}/api/v1/repos/${owner}/${repo}/media/${p}?ref=${encodeURIComponent(ref)}`;
|
||||
}
|
||||
|
||||
export async function ghFetch(url: string, init?: RequestInit, authToken?: string): Promise<Response> {
|
||||
const headers = new Headers(init?.headers);
|
||||
if (authToken) {
|
||||
headers.set("Authorization", `Bearer ${authToken}`);
|
||||
}
|
||||
try {
|
||||
return await fetch(url, { ...init, headers, redirect: authToken ? "manual" : "follow" });
|
||||
} catch {
|
||||
const hostname = (() => {
|
||||
try { return new URL(url).hostname; } catch { return url; }
|
||||
})();
|
||||
throw unprocessable(`Could not connect to ${hostname}`);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user