import path from "node:path"; import { unprocessable } from "../errors.js"; import { assertPublicHost, giteaApiBase, giteaFetch, getGiteaHostProbe, giteaHostProbeCache, isPrivateOrLoopbackHost, resolveRawGiteaUrl, resolveRawGiteaUrlLegacy, setGiteaHostProbe, } from "./gitea-fetch.js"; export { assertPublicHost, giteaApiBase, giteaHostProbeCache, isPrivateOrLoopbackHost, resolveRawGiteaUrl, resolveRawGiteaUrlLegacy, setGiteaHostProbe, getGiteaHostProbe, }; const PROBE_TIMEOUT_MS = 3000; const GITEA_TREE_PAGE_LIMIT = 1000; export type GiteaSourceUrl = { hostname: string; owner: string; repo: string; ref: string; basePath: string; filePath: string | null; explicitRef: boolean; }; export type GiteaBranchResponse = { name?: string; commit?: { id?: string; url?: string }; }; export type GiteaRepoResponse = { default_branch?: string; }; export type GiteaTreeEntry = { path?: string; type?: string; mode?: string; sha?: string; size?: number; url?: string; }; export type GiteaTreeResponse = { sha?: string; tree?: GiteaTreeEntry[]; truncated?: boolean; }; function asString(value: unknown): string | null { if (typeof value !== "string") return null; const trimmed = value.trim(); return trimmed.length > 0 ? trimmed : null; } function isPlainRecord(value: unknown): value is Record { return typeof value === "object" && value !== null && !Array.isArray(value); } /** * Parse a Gitea/Forgejo HTTPS repo URL into its components. * Mirrors parseGitHubSourceUrl (server/src/services/company-skills.ts:634-660). * Accepts: * https://{host}/{owner}/{repo} * https://{host}/{owner}/{repo}.git * https://{host}/{owner}/{repo}/tree/{ref}/{basePath...} * https://{host}/{owner}/{repo}/blob/{ref}/{filePath} */ export function parseGiteaSourceUrl(rawUrl: string): GiteaSourceUrl { let url: URL; try { url = new URL(rawUrl); } catch { throw unprocessable("Invalid Gitea URL"); } if (url.protocol !== "https:") { throw unprocessable("Gitea source URL must use HTTPS"); } const parts = url.pathname.split("/").filter(Boolean); if (parts.length < 2) { throw unprocessable("Invalid Gitea URL"); } const owner = parts[0]!; const repo = parts[1]!.replace(/\.git$/i, ""); if (!owner || !repo) { throw unprocessable("Invalid Gitea URL: owner and repo are required"); } let ref = "main"; let basePath = ""; let filePath: string | null = null; let explicitRef = false; if (parts[2] === "tree") { ref = parts[3] ?? "main"; basePath = parts.slice(4).join("/"); explicitRef = true; } else if (parts[2] === "blob") { ref = parts[3] ?? "main"; filePath = parts.slice(4).join("/"); basePath = filePath ? path.posix.dirname(filePath) : ""; explicitRef = true; } assertPublicHost(url.hostname); return { hostname: url.hostname, owner, repo, ref, basePath, filePath, explicitRef }; } /** * Probe a hostname to determine if it hosts a Gitea/Forgejo instance. * GETs `https://{host}/api/v1/version` with a short timeout. Cached for * the process lifetime in giteaHostProbeCache. * * Returns false without contacting the host for loopback / link-local / * RFC1918 literal-IP hosts, to avoid being used as an SSRF probe. */ export async function probeGiteaHost(hostname: string): Promise { const key = hostname.toLowerCase(); const cached = getGiteaHostProbe(key); if (cached !== undefined) return cached; if (isPrivateOrLoopbackHost(key)) { setGiteaHostProbe(key, false); return false; } const controller = new AbortController(); const timer = setTimeout(() => controller.abort(), PROBE_TIMEOUT_MS); let result = false; try { const response = await fetch(`https://${key}/api/v1/version`, { method: "GET", signal: controller.signal, headers: { accept: "application/json" }, }); if (response.ok) { const data = (await response.json().catch(() => null)) as unknown; if (isPlainRecord(data) && typeof data.version === "string") { result = true; } } } catch { // network error, abort, parse error — all treated as "not gitea" } finally { clearTimeout(timer); } setGiteaHostProbe(key, result); return result; } export async function resolveGiteaDefaultBranch( owner: string, repo: string, apiBase: string, ): Promise { const response = await fetchGiteaJson(`${apiBase}/repos/${owner}/${repo}`); return asString(response.default_branch) ?? "main"; } /** * Resolve a parsed Gitea URL into a pinned commit SHA and a tracking ref. * Mirrors resolveGitHubPinnedRef (server/src/services/company-skills.ts:662-676). */ export async function resolveGiteaPinnedRef(parsed: GiteaSourceUrl): Promise<{ pinnedRef: string; trackingRef: string | null; }> { if (/^[0-9a-f]{40}$/i.test(parsed.ref.trim())) { return { pinnedRef: parsed.ref, trackingRef: parsed.explicitRef ? parsed.ref : null, }; } const apiBase = giteaApiBase(parsed.hostname); const trackingRef = parsed.explicitRef ? parsed.ref : await resolveGiteaDefaultBranch(parsed.owner, parsed.repo, apiBase); // Gitea's /repos/{o}/{r}/commits/{ref} endpoint only resolves SHAs — a branch // name returns 404. The branches endpoint accepts both branch names and tags. const branch = await fetchGiteaBranch(apiBase, parsed.owner, parsed.repo, trackingRef); const pinnedRef = asString(branch.commit?.id); if (!pinnedRef) { throw unprocessable(`Failed to resolve Gitea ref ${trackingRef}`); } return { pinnedRef, trackingRef }; } /** * Fetch the full list of blob paths in a repo tree at a given ref. * Paginates with `?page=N&limit=1000` when the response is truncated. */ export async function fetchGiteaTreeBlobPaths( apiBase: string, owner: string, repo: string, ref: string, ): Promise { const all: string[] = []; let page = 1; // hard cap so a misconfigured host can't make us loop forever const MAX_PAGES = 50; let stillTruncated = false; for (let i = 0; i < MAX_PAGES; i += 1) { const url = page === 1 ? `${apiBase}/repos/${owner}/${repo}/git/trees/${ref}?recursive=true&limit=${GITEA_TREE_PAGE_LIMIT}` : `${apiBase}/repos/${owner}/${repo}/git/trees/${ref}?recursive=true&limit=${GITEA_TREE_PAGE_LIMIT}&page=${page}`; const data = await fetchGiteaJson(url); const entries = Array.isArray(data.tree) ? data.tree : []; for (const entry of entries) { if (entry.type === "blob" && typeof entry.path === "string") { all.push(entry.path); } } stillTruncated = Boolean(data.truncated); if (!stillTruncated) break; page += 1; } if (stillTruncated) { // Tree still truncated at the page cap — refuse rather than silently // import a partial skill listing, which would hide SKILL.md files. throw unprocessable( `Gitea repo tree for ${owner}/${repo}@${ref} exceeds ${MAX_PAGES * GITEA_TREE_PAGE_LIMIT} entries; refusing to import a partial listing.`, ); } return all; } /** * Fetch a raw file from a Gitea/Forgejo repo. Tries the modern * /raw/branch/{ref}/{path} URL first, falling back to legacy * /raw/{ref}/{path} on 404. */ export async function fetchGiteaText( hostname: string, owner: string, repo: string, ref: string, filePath: string, ): Promise { const canonical = resolveRawGiteaUrl(hostname, owner, repo, ref, filePath); const canonicalResponse = await giteaFetch(canonical, { headers: { accept: "text/plain" }, }); if (canonicalResponse.ok) { return canonicalResponse.text(); } if (canonicalResponse.status !== 404) { throw unprocessable( `Failed to fetch ${canonical}: ${canonicalResponse.status}`, ); } const legacy = resolveRawGiteaUrlLegacy(hostname, owner, repo, ref, filePath); const legacyResponse = await giteaFetch(legacy, { headers: { accept: "text/plain" }, }); if (!legacyResponse.ok) { throw unprocessable( `Failed to fetch ${legacy}: ${legacyResponse.status}`, ); } return legacyResponse.text(); } /** * Fetch a branch record by name. Used for update checks to resolve * the latest commit SHA on the tracking branch. */ export async function fetchGiteaBranch( apiBase: string, owner: string, repo: string, branch: string, ): Promise { return fetchGiteaJson( `${apiBase}/repos/${owner}/${repo}/branches/${encodeURIComponent(branch)}`, ); } export async function fetchGiteaJson(url: string): Promise { const response = await giteaFetch(url, { headers: { accept: "application/json" }, }); if (!response.ok) { throw unprocessable(`Failed to fetch ${url}: ${response.status}`); } return (await response.json()) as T; }