From 80f7d8270c04355a3aca164e8b50258c244807ee Mon Sep 17 00:00:00 2001 From: Chris Farhood Date: Sat, 16 May 2026 10:28:22 -0400 Subject: [PATCH] refactor(portability): migrate to git-source; delete github-fetch.ts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mirrors the skills refactor: company-portability was the second user of the per-host REST shim (its own parallel parseGitHubSourceUrl + fetch helpers + raw.githubusercontent URL builder), so importing a company package from a non-github URL hit the same Gitea 404 the skills path did. - Extend git-source.ts: - parseGitSourceUrl: also recognises query-string shape (?ref=...&path=...) used by portability URLs, with precedence over path-style segments when both are present. - RepoSnapshot: add readBinary (Uint8Array for the company logo fetch) and readFileOptional (null on NotFoundError, for the COMPANY.md probe + main->master fallback). - Rewrite resolveSource in company-portability.ts to open a single in-memory snapshot per import and serve all reads (COMPANY.md, candidate tree, includes, logo) from it. Drops fetchText/fetchJson/ fetchBinary/fetchOptionalText. - parseGitHubSourceUrl stays exported with its original return shape ({hostname, owner, repo, ref, basePath, companyPath}) so the existing test suite passes unchanged. It now delegates URL parsing to parseGitSourceUrl and layers companyPath derivation on top. - Delete server/src/services/github-fetch.ts: zero remaining callers. Test coverage: - 7 new git-source tests (query-string parse variants, query-string precedence over path style, readBinary, readFileOptional NotFound null + non-NotFound rethrow) — 34/34 passing. - 52 existing company-portability tests still pass via the parseGitHubSourceUrl shim contract. - Smoke-tested end-to-end against https://git.farh.net/.../?ref=main: ref resolves, snapshot opens, readFile/readBinary/readFileOptional all return expected results. Note: two pre-existing failures in company-skills-routes.test.ts ("does not expose a skill reference...") exist on dev too and are unrelated to this change. Co-Authored-By: Claude Opus 4.7 (1M context) --- server/src/__tests__/git-source.test.ts | 74 +++++++++ server/src/services/company-portability.ts | 183 ++++++++------------- server/src/services/git-source.ts | 43 ++++- server/src/services/github-fetch.ts | 38 ----- 4 files changed, 180 insertions(+), 158 deletions(-) delete mode 100644 server/src/services/github-fetch.ts diff --git a/server/src/__tests__/git-source.test.ts b/server/src/__tests__/git-source.test.ts index 4ba1e335..ce257817 100644 --- a/server/src/__tests__/git-source.test.ts +++ b/server/src/__tests__/git-source.test.ts @@ -145,6 +145,43 @@ describe("parseGitSourceUrl", () => { it("rejects malformed URLs", () => { expect(() => parseGitSourceUrl("not a url")).toThrow(); }); + + it("parses a query-string URL with ?ref= and ?path=", () => { + expect( + parseGitSourceUrl("https://github.com/o/r?ref=feature%2Fdemo&path=subdir"), + ).toMatchObject({ + cloneUrl: "https://github.com/o/r.git", + ref: "feature/demo", + basePath: "subdir", + filePath: null, + explicitRef: true, + }); + }); + + it("parses a query-string URL with only ?ref=", () => { + expect(parseGitSourceUrl("https://github.com/o/r?ref=develop")).toMatchObject({ + ref: "develop", + basePath: "", + explicitRef: true, + }); + }); + + it("parses a query-string URL with only ?path=", () => { + expect(parseGitSourceUrl("https://github.com/o/r?path=sub")).toMatchObject({ + ref: null, + basePath: "sub", + explicitRef: false, + }); + }); + + it("query-string parsing takes precedence over path-style segments", () => { + expect( + parseGitSourceUrl("https://github.com/o/r/tree/main/old?ref=newref&path=newpath"), + ).toMatchObject({ + ref: "newref", + basePath: "newpath", + }); + }); }); describe("buildCloneUrl", () => { @@ -333,4 +370,41 @@ describe("openRepoSnapshot", () => { openRepoSnapshot(parsed, "main", "1111111111111111111111111111111111111111"), ).rejects.toThrow(/repository not found/i); }); + + it("readBinary returns the raw blob bytes", async () => { + cloneFn.mockResolvedValue(undefined); + resolveRefFn.mockResolvedValue("ffffffffffffffffffffffffffffffffffffffff"); + walkFn.mockImplementation(async () => {}); + const bytes = new Uint8Array([0x89, 0x50, 0x4e, 0x47]); + readBlobFn.mockResolvedValue({ blob: bytes }); + + const parsed = parseGitSourceUrl("https://git.example.com/o/r"); + const snap = await openRepoSnapshot(parsed, "main", "ffffffffffffffffffffffffffffffffffffffff"); + const result = await snap.readBinary("logo.png"); + expect(result).toBe(bytes); + }); + + it("readFileOptional returns null on NotFoundError", async () => { + cloneFn.mockResolvedValue(undefined); + resolveRefFn.mockResolvedValue("ffffffffffffffffffffffffffffffffffffffff"); + walkFn.mockImplementation(async () => {}); + const err = Object.assign(new Error("missing"), { code: "NotFoundError" }); + readBlobFn.mockRejectedValue(err); + + const parsed = parseGitSourceUrl("https://git.example.com/o/r"); + const snap = await openRepoSnapshot(parsed, "main", "ffffffffffffffffffffffffffffffffffffffff"); + const result = await snap.readFileOptional("missing.md"); + expect(result).toBeNull(); + }); + + it("readFileOptional rethrows non-NotFound errors", async () => { + cloneFn.mockResolvedValue(undefined); + resolveRefFn.mockResolvedValue("ffffffffffffffffffffffffffffffffffffffff"); + walkFn.mockImplementation(async () => {}); + readBlobFn.mockRejectedValue(new Error("disk explosion")); + + const parsed = parseGitSourceUrl("https://git.example.com/o/r"); + const snap = await openRepoSnapshot(parsed, "main", "ffffffffffffffffffffffffffffffffffffffff"); + await expect(snap.readFileOptional("any.md")).rejects.toThrow(/disk explosion/); + }); }); diff --git a/server/src/services/company-portability.ts b/server/src/services/company-portability.ts index 864f773a..30e4c14f 100644 --- a/server/src/services/company-portability.ts +++ b/server/src/services/company-portability.ts @@ -57,7 +57,7 @@ import { import { requireOpenCodeModelId } from "@paperclipai/adapter-opencode-local/server"; import { findServerAdapter } from "../adapters/index.js"; import { forbidden, HttpError, notFound, unprocessable } from "../errors.js"; -import { ghFetch, gitHubApiBase, resolveRawGitHubUrl } from "./github-fetch.js"; +import { openRepoSnapshot, parseGitSourceUrl, resolveGitRef, type RepoSnapshot } from "./git-source.js"; import type { StorageService } from "../storage/types.js"; import { accessService } from "./access.js"; import { agentService } from "./agents.js"; @@ -2339,42 +2339,6 @@ function parseFrontmatterMarkdown(raw: string): MarkdownDoc { }; } -async function fetchText(url: string) { - const response = await ghFetch(url); - if (!response.ok) { - throw unprocessable(`Failed to fetch ${url}: ${response.status}`); - } - return response.text(); -} - -async function fetchOptionalText(url: string) { - const response = await ghFetch(url); - if (response.status === 404) return null; - if (!response.ok) { - throw unprocessable(`Failed to fetch ${url}: ${response.status}`); - } - return response.text(); -} - -async function fetchBinary(url: string) { - const response = await ghFetch(url); - if (!response.ok) { - throw unprocessable(`Failed to fetch ${url}: ${response.status}`); - } - return Buffer.from(await response.arrayBuffer()); -} - -async function fetchJson(url: string): Promise { - const response = await ghFetch(url, { - headers: { - accept: "application/vnd.github+json", - }, - }); - if (!response.ok) { - throw unprocessable(`Failed to fetch ${url}: ${response.status}`); - } - return response.json() as Promise; -} function dedupeEnvInputs(values: CompanyPortabilityManifest["envInputs"]) { const seen = new Set(); @@ -2864,52 +2828,37 @@ function normalizeGitHubSourcePath(value: string | null | undefined) { export function parseGitHubSourceUrl(rawUrl: string) { const url = new URL(rawUrl); - if (url.protocol !== "https:") { - throw unprocessable("GitHub source URL must use HTTPS"); - } - const hostname = url.hostname; - const parts = url.pathname.split("/").filter(Boolean); - if (parts.length < 2) { - throw unprocessable("Invalid GitHub URL"); - } - const owner = parts[0]!; - const repo = parts[1]!.replace(/\.git$/i, ""); - const queryRef = url.searchParams.get("ref")?.trim(); - const queryPath = normalizeGitHubSourcePath(url.searchParams.get("path")); + // Handle the portability-specific companyPath query param before delegating, + // since git-source has no notion of it. const queryCompanyPath = normalizeGitHubSourcePath(url.searchParams.get("companyPath")); - if (queryRef || queryPath || queryCompanyPath) { - const companyPath = queryCompanyPath || [queryPath, "COMPANY.md"].filter(Boolean).join("/") || "COMPANY.md"; - let basePath = queryPath; - if (!basePath && companyPath !== "COMPANY.md") { - basePath = path.posix.dirname(companyPath); - if (basePath === ".") basePath = ""; + + const parsed = parseGitSourceUrl(rawUrl); + + let companyPath: string; + let basePath = parsed.basePath; + if (queryCompanyPath) { + companyPath = queryCompanyPath; + if (!basePath) { + const derived = path.posix.dirname(companyPath); + basePath = derived === "." ? "" : derived; } - return { - hostname, - owner, - repo, - ref: queryRef || "main", - basePath, - companyPath, - }; + } else if (parsed.filePath) { + // blob-style URL pointed directly at a file + companyPath = parsed.filePath; + } else if (basePath) { + companyPath = `${basePath}/COMPANY.md`; + } else { + companyPath = "COMPANY.md"; } - let ref = "main"; - let basePath = ""; - let companyPath = "COMPANY.md"; - if (parts[2] === "tree") { - ref = parts[3] ?? "main"; - basePath = parts.slice(4).join("/"); - } else if (parts[2] === "blob") { - ref = parts[3] ?? "main"; - const blobPath = parts.slice(4).join("/"); - if (!blobPath) { - throw unprocessable("Invalid GitHub blob URL"); - } - companyPath = blobPath; - basePath = path.posix.dirname(blobPath); - if (basePath === ".") basePath = ""; - } - return { hostname, owner, repo, ref, basePath, companyPath }; + + return { + hostname: parsed.hostname, + owner: parsed.owner, + repo: parsed.repo, + ref: parsed.ref ?? "main", + basePath, + companyPath, + }; } @@ -3013,30 +2962,38 @@ export function companyPortabilityService(db: Db, storage?: StorageService) { ); } - const parsed = parseGitHubSourceUrl(source.url); - let ref = parsed.ref; + const sourceUrl = source.url; + const parsed = parseGitHubSourceUrl(sourceUrl); const warnings: string[] = []; const companyRelativePath = parsed.companyPath === "COMPANY.md" ? [parsed.basePath, "COMPANY.md"].filter(Boolean).join("/") : parsed.companyPath; + + async function openSnapshot(refName: string): Promise { + const ps = parseGitSourceUrl(sourceUrl); + const wanted = { ...ps, ref: refName, explicitRef: true }; + const resolved = await resolveGitRef(wanted); + return openRepoSnapshot(wanted, resolved.trackingRef, resolved.pinnedSha); + } + + let ref = parsed.ref; + let snapshot: RepoSnapshot; let companyMarkdown: string | null = null; try { - companyMarkdown = await fetchOptionalText( - resolveRawGitHubUrl(parsed.hostname, parsed.owner, parsed.repo, ref, companyRelativePath), - ); + snapshot = await openSnapshot(ref); + companyMarkdown = await snapshot.readFileOptional(companyRelativePath); } catch (err) { if (ref === "main") { ref = "master"; - warnings.push("GitHub ref main not found; falling back to master."); - companyMarkdown = await fetchOptionalText( - resolveRawGitHubUrl(parsed.hostname, parsed.owner, parsed.repo, ref, companyRelativePath), - ); + warnings.push("Git ref main not found; falling back to master."); + snapshot = await openSnapshot(ref); + companyMarkdown = await snapshot.readFileOptional(companyRelativePath); } else { throw err; } } if (!companyMarkdown) { - throw unprocessable("GitHub company package is missing COMPANY.md"); + throw unprocessable("Git company package is missing COMPANY.md"); } const companyPath = parsed.companyPath === "COMPANY.md" @@ -3045,31 +3002,22 @@ export function companyPortabilityService(db: Db, storage?: StorageService) { const files: Record = { [companyPath]: companyMarkdown, }; - const apiBase = gitHubApiBase(parsed.hostname); - const tree = await fetchJson<{ tree?: Array<{ path: string; type: string }> }>( - `${apiBase}/repos/${parsed.owner}/${parsed.repo}/git/trees/${ref}?recursive=1`, - ).catch(() => ({ tree: [] })); const basePrefix = parsed.basePath ? `${parsed.basePath.replace(/^\/+|\/+$/g, "")}/` : ""; - const candidatePaths = (tree.tree ?? []) - .filter((entry) => entry.type === "blob") - .map((entry) => entry.path) - .filter((entry): entry is string => typeof entry === "string") - .filter((entry) => { - if (basePrefix && !entry.startsWith(basePrefix)) return false; - const relative = basePrefix ? entry.slice(basePrefix.length) : entry; - return ( - relative.endsWith(".md") || - relative.startsWith("skills/") || - relative === ".paperclip.yaml" || - relative === ".paperclip.yml" - ); - }); + const allPaths = await snapshot.listFiles(); + const candidatePaths = allPaths.filter((entry) => { + if (basePrefix && !entry.startsWith(basePrefix)) return false; + const relative = basePrefix ? entry.slice(basePrefix.length) : entry; + return ( + relative.endsWith(".md") || + relative.startsWith("skills/") || + relative === ".paperclip.yaml" || + relative === ".paperclip.yml" + ); + }); for (const repoPath of candidatePaths) { const relativePath = basePrefix ? repoPath.slice(basePrefix.length) : repoPath; if (files[relativePath] !== undefined) continue; - files[normalizePortablePath(relativePath)] = await fetchText( - resolveRawGitHubUrl(parsed.hostname, parsed.owner, parsed.repo, ref, repoPath), - ); + files[normalizePortablePath(relativePath)] = await snapshot.readFile(repoPath); } const companyDoc = parseFrontmatterMarkdown(companyMarkdown); const includeEntries = readIncludeEntries(companyDoc.frontmatter); @@ -3078,9 +3026,7 @@ export function companyPortabilityService(db: Db, storage?: StorageService) { const relativePath = normalizePortablePath(includeEntry.path); if (files[relativePath] !== undefined) continue; if (!(repoPath.endsWith(".md") || repoPath.endsWith(".yaml") || repoPath.endsWith(".yml"))) continue; - files[relativePath] = await fetchText( - resolveRawGitHubUrl(parsed.hostname, parsed.owner, parsed.repo, ref, repoPath), - ); + files[relativePath] = await snapshot.readFile(repoPath); } const resolved = buildManifestFromPackageFiles(files); @@ -3088,12 +3034,13 @@ export function companyPortabilityService(db: Db, storage?: StorageService) { if (companyLogoPath && !resolved.files[companyLogoPath]) { const repoPath = [parsed.basePath, companyLogoPath].filter(Boolean).join("/"); try { - const binary = await fetchBinary( - resolveRawGitHubUrl(parsed.hostname, parsed.owner, parsed.repo, ref, repoPath), + const binary = await snapshot.readBinary(repoPath); + resolved.files[companyLogoPath] = bufferToPortableBinaryFile( + Buffer.from(binary), + inferContentTypeFromPath(companyLogoPath), ); - resolved.files[companyLogoPath] = bufferToPortableBinaryFile(binary, inferContentTypeFromPath(companyLogoPath)); } catch (err) { - warnings.push(`Failed to fetch company logo ${companyLogoPath} from GitHub: ${err instanceof Error ? err.message : String(err)}`); + warnings.push(`Failed to fetch company logo ${companyLogoPath} from git: ${err instanceof Error ? err.message : String(err)}`); } } resolved.warnings.unshift(...warnings); diff --git a/server/src/services/git-source.ts b/server/src/services/git-source.ts index f52dce46..c67cadcd 100644 --- a/server/src/services/git-source.ts +++ b/server/src/services/git-source.ts @@ -25,6 +25,8 @@ export type RepoSnapshot = { sha: string; listFiles(): Promise; readFile(repoPath: string): Promise; + readFileOptional(repoPath: string): Promise; + readBinary(repoPath: string): Promise; }; const SHA_REGEX = /^[0-9a-f]{40}$/i; @@ -50,6 +52,25 @@ export function parseGitSourceUrl(rawUrl: string): ParsedGitSource { const owner = segments[0]!; const repo = segments[1]!.replace(/\.git$/i, ""); + // Query-string shape: /{owner}/{repo}?ref=...&path=... + // Used by company portability URLs. Takes precedence over path-based parsing + // so a URL with both shapes (rare) prefers the explicit query params. + const queryRef = url.searchParams.get("ref")?.trim() ?? null; + const queryPath = url.searchParams.get("path")?.trim() ?? null; + if (queryRef || queryPath) { + const normalizedPath = (queryPath ?? "").replace(/\\/g, "/").replace(/^\/+|\/+$/g, ""); + return { + cloneUrl: buildCloneUrl(url.hostname, owner, repo), + hostname: url.hostname, + owner, + repo, + ref: queryRef || null, + basePath: normalizedPath, + filePath: null, + explicitRef: Boolean(queryRef), + }; + } + let ref: string | null = null; let basePath = ""; let filePath: string | null = null; @@ -233,11 +254,29 @@ export async function openRepoSnapshot( return out; } - async function readFile(repoPath: string): Promise { + async function readBinary(repoPath: string): Promise { const normalized = repoPath.replace(/^\/+/, ""); const { blob } = await git.readBlob({ fs, dir, oid: sha, filepath: normalized }); + return blob; + } + + async function readFile(repoPath: string): Promise { + const blob = await readBinary(repoPath); return new TextDecoder("utf-8").decode(blob); } - return { sha, listFiles, readFile }; + async function readFileOptional(repoPath: string): Promise { + try { + return await readFile(repoPath); + } catch (err) { + // isomorphic-git throws NotFoundError when the path is missing from the tree. + const name = (err as { code?: string; name?: string } | null)?.code + ?? (err as { name?: string } | null)?.name + ?? ""; + if (/NotFound/i.test(name)) return null; + throw err; + } + } + + return { sha, listFiles, readFile, readFileOptional, readBinary }; } diff --git a/server/src/services/github-fetch.ts b/server/src/services/github-fetch.ts deleted file mode 100644 index 66ee992f..00000000 --- a/server/src/services/github-fetch.ts +++ /dev/null @@ -1,38 +0,0 @@ -import { unprocessable } from "../errors.js"; - -export type GitHostFamily = "github" | "gitea"; - -export function inferGitHostFamily(hostname: string): GitHostFamily { - const h = hostname.toLowerCase(); - if (h === "github.com" || h === "www.github.com") return "github"; - return "gitea"; -} - -export function gitHubApiBase(hostname: string) { - return inferGitHostFamily(hostname) === "github" - ? "https://api.github.com" - : `https://${hostname}/api/v1`; -} - -export function resolveRawGitHubUrl(hostname: string, owner: string, repo: string, ref: string, filePath: string) { - const p = filePath.replace(/^\/+/, ""); - if (inferGitHostFamily(hostname) === "github") { - return `https://raw.githubusercontent.com/${owner}/${repo}/${ref}/${p}`; - } - return `https://${hostname}/api/v1/repos/${owner}/${repo}/media/${p}?ref=${encodeURIComponent(ref)}`; -} - -export async function ghFetch(url: string, init?: RequestInit, authToken?: string): Promise { - const headers = new Headers(init?.headers); - if (authToken) { - headers.set("Authorization", `Bearer ${authToken}`); - } - try { - return await fetch(url, { ...init, headers, redirect: authToken ? "manual" : "follow" }); - } catch { - const hostname = (() => { - try { return new URL(url).hostname; } catch { return url; } - })(); - throw unprocessable(`Could not connect to ${hostname}`); - } -}