diff --git a/server/src/__tests__/gitea-skills.test.ts b/server/src/__tests__/gitea-skills.test.ts index ab999554..ee5a29af 100644 --- a/server/src/__tests__/gitea-skills.test.ts +++ b/server/src/__tests__/gitea-skills.test.ts @@ -5,9 +5,9 @@ import { fetchGiteaTreeBlobPaths, giteaApiBase, giteaHostProbeCache, + isPrivateOrLoopbackHost, parseGiteaSourceUrl, probeGiteaHost, - resolveGiteaCommitSha, resolveGiteaPinnedRef, resolveRawGiteaUrl, resolveRawGiteaUrlLegacy, @@ -109,6 +109,52 @@ describe("parseGiteaSourceUrl", () => { it("rejects URLs with fewer than 2 path segments", () => { expect(() => parseGiteaSourceUrl("https://git.example.com/acme")).toThrow(/Invalid Gitea URL/); }); + + it("rejects URLs with empty repo after .git strip", () => { + expect(() => parseGiteaSourceUrl("https://git.example.com/acme/.git")).toThrow( + /owner and repo are required/, + ); + }); + + it("rejects URLs pointing at private/loopback hosts", () => { + expect(() => parseGiteaSourceUrl("https://192.168.1.10/acme/skills")).toThrow( + /private, loopback/, + ); + expect(() => parseGiteaSourceUrl("https://localhost/acme/skills")).toThrow( + /private, loopback/, + ); + }); +}); + +describe("isPrivateOrLoopbackHost", () => { + it("flags loopback and localhost variants", () => { + expect(isPrivateOrLoopbackHost("localhost")).toBe(true); + expect(isPrivateOrLoopbackHost("127.0.0.1")).toBe(true); + expect(isPrivateOrLoopbackHost("127.99.99.99")).toBe(true); + expect(isPrivateOrLoopbackHost("::1")).toBe(true); + expect(isPrivateOrLoopbackHost("foo.localhost")).toBe(true); + }); + + it("flags RFC1918 ranges", () => { + expect(isPrivateOrLoopbackHost("10.0.0.1")).toBe(true); + expect(isPrivateOrLoopbackHost("172.16.0.1")).toBe(true); + expect(isPrivateOrLoopbackHost("172.31.255.254")).toBe(true); + expect(isPrivateOrLoopbackHost("192.168.1.1")).toBe(true); + }); + + it("flags link-local and 0.0.0.0", () => { + expect(isPrivateOrLoopbackHost("169.254.169.254")).toBe(true); + expect(isPrivateOrLoopbackHost("0.0.0.0")).toBe(true); + expect(isPrivateOrLoopbackHost("fe80::1")).toBe(true); + expect(isPrivateOrLoopbackHost("fd00::1")).toBe(true); + }); + + it("allows public hosts", () => { + expect(isPrivateOrLoopbackHost("git.example.com")).toBe(false); + expect(isPrivateOrLoopbackHost("gitea.com")).toBe(false); + expect(isPrivateOrLoopbackHost("172.32.0.1")).toBe(false); + expect(isPrivateOrLoopbackHost("11.0.0.1")).toBe(false); + }); }); describe("probeGiteaHost", () => { @@ -170,6 +216,15 @@ describe("probeGiteaHost", () => { expect(result).toBe(true); expect(fetchMock).not.toHaveBeenCalled(); }); + + it("short-circuits to false for private/loopback hosts without making a request", async () => { + const fetchMock = vi.fn(); + vi.stubGlobal("fetch", fetchMock); + expect(await probeGiteaHost("127.0.0.1")).toBe(false); + expect(await probeGiteaHost("192.168.1.1")).toBe(false); + expect(await probeGiteaHost("localhost")).toBe(false); + expect(fetchMock).not.toHaveBeenCalled(); + }); }); describe("resolveGiteaPinnedRef", () => { @@ -201,27 +256,6 @@ describe("resolveGiteaPinnedRef", () => { }); }); -describe("resolveGiteaCommitSha", () => { - it("returns the sha from a commit response when given a 40-hex ref", async () => { - const sha = "abc123abc123abc123abc123abc123abc123abcd"; - vi.stubGlobal( - "fetch", - vi.fn().mockResolvedValue(jsonResponse({ sha })), - ); - const result = await resolveGiteaCommitSha("acme", "skills", sha, giteaApiBase("git.example.com")); - expect(result).toBe(sha); - }); - - it("refuses to call the API for a non-SHA ref", async () => { - const fetchMock = vi.fn(); - vi.stubGlobal("fetch", fetchMock); - await expect( - resolveGiteaCommitSha("acme", "skills", "main", giteaApiBase("git.example.com")), - ).rejects.toMatchObject({ status: 422 }); - expect(fetchMock).not.toHaveBeenCalled(); - }); -}); - describe("fetchGiteaTreeBlobPaths", () => { it("returns blob paths from a single-page tree", async () => { vi.stubGlobal( @@ -263,6 +297,22 @@ describe("fetchGiteaTreeBlobPaths", () => { expect(fetchMock).toHaveBeenCalledTimes(2); expect(String(fetchMock.mock.calls[1]?.[0])).toContain("page=2"); }); + + it("throws when the page cap is hit while the tree is still truncated", async () => { + // Return truncated=true on every page so the loop hits MAX_PAGES (50). + vi.stubGlobal( + "fetch", + vi.fn().mockResolvedValue( + jsonResponse({ + tree: [{ path: "page.md", type: "blob" }], + truncated: true, + }), + ), + ); + await expect( + fetchGiteaTreeBlobPaths(giteaApiBase("git.example.com"), "acme", "skills", "main"), + ).rejects.toThrow(/exceeds .* entries/); + }); }); describe("fetchGiteaText", () => { diff --git a/server/src/services/company-skills.ts b/server/src/services/company-skills.ts index 33dea1d1..67cd2bda 100644 --- a/server/src/services/company-skills.ts +++ b/server/src/services/company-skills.ts @@ -1133,106 +1133,15 @@ async function readUrlSkillImports( return segments.length >= 2 && !parsed.pathname.endsWith(".md"); } catch { return false; } })(); if (looksLikeRepoUrl) { - const repoUrl = new URL(url); - const repoHost = repoUrl.hostname.toLowerCase(); + const repoHost = new URL(url).hostname.toLowerCase(); const isGitHubDotCom = repoHost === "github.com" || repoHost === "www.github.com"; - if (isGitHubDotCom) { - const parsed = parseGitHubSourceUrl(url); - const apiBase = gitHubApiBase(parsed.hostname); - const { pinnedRef, trackingRef } = await resolveGitHubPinnedRef(parsed); - let ref = pinnedRef; - const tree = await fetchJson<{ tree?: Array<{ path: string; type: string }> }>( - `${apiBase}/repos/${parsed.owner}/${parsed.repo}/git/trees/${ref}?recursive=1`, - ).catch(() => { - throw unprocessable(`Failed to read GitHub tree for ${url}`); - }); - const allPaths = (tree.tree ?? []) - .filter((entry) => entry.type === "blob") - .map((entry) => entry.path) - .filter((entry): entry is string => typeof entry === "string"); - const basePrefix = parsed.basePath ? `${parsed.basePath.replace(/^\/+|\/+$/g, "")}/` : ""; - const scopedPaths = basePrefix - ? allPaths.filter((entry) => entry.startsWith(basePrefix)) - : allPaths; - const relativePaths = scopedPaths.map((entry) => basePrefix ? entry.slice(basePrefix.length) : entry); - const filteredPaths = parsed.filePath - ? relativePaths.filter((entry) => entry === path.posix.relative(parsed.basePath || ".", parsed.filePath!)) - : relativePaths; - const skillPaths = filteredPaths.filter( - (entry) => path.posix.basename(entry).toLowerCase() === "skill.md", - ); - if (skillPaths.length === 0) { - throw unprocessable( - "No SKILL.md files were found in the provided GitHub source.", - ); - } - const skills: ImportedSkill[] = []; - for (const relativeSkillPath of skillPaths) { - const repoSkillPath = basePrefix ? `${basePrefix}${relativeSkillPath}` : relativeSkillPath; - const markdown = await fetchText(resolveRawGitHubUrl(parsed.hostname, parsed.owner, parsed.repo, ref, repoSkillPath)); - const parsedMarkdown = parseFrontmatterMarkdown(markdown); - const skillDir = path.posix.dirname(relativeSkillPath); - const slug = deriveImportedSkillSlug(parsedMarkdown.frontmatter, path.posix.basename(skillDir)); - const skillKey = readCanonicalSkillKey( - parsedMarkdown.frontmatter, - isPlainRecord(parsedMarkdown.frontmatter.metadata) ? parsedMarkdown.frontmatter.metadata : null, - ); - if (requestedSkillSlug && !matchesRequestedSkill(relativeSkillPath, requestedSkillSlug) && slug !== requestedSkillSlug) { - continue; - } - const metadata = { - ...(skillKey ? { skillKey } : {}), - sourceKind: "github", - ...(parsed.hostname !== "github.com" ? { hostname: parsed.hostname } : {}), - owner: parsed.owner, - repo: parsed.repo, - ref, - trackingRef, - repoSkillDir: normalizeGitHubSkillDirectory( - basePrefix ? `${basePrefix}${skillDir}` : skillDir, - slug, - ), - }; - const inventory = filteredPaths - .filter((entry) => entry === relativeSkillPath || entry.startsWith(`${skillDir}/`)) - .map((entry) => ({ - path: entry === relativeSkillPath ? "SKILL.md" : entry.slice(skillDir.length + 1), - kind: classifyInventoryKind(entry === relativeSkillPath ? "SKILL.md" : entry.slice(skillDir.length + 1)), - })) - .sort((left, right) => left.path.localeCompare(right.path)); - skills.push({ - key: deriveCanonicalSkillKey(companyId, { - slug, - sourceType: "github", - sourceLocator: sourceUrl, - metadata, - }), - slug, - name: asString(parsedMarkdown.frontmatter.name) ?? slug, - description: asString(parsedMarkdown.frontmatter.description), - markdown, - sourceType: "github", - sourceLocator: sourceUrl, - sourceRef: ref, - trustLevel: deriveTrustLevel(inventory), - compatibility: "compatible", - fileInventory: inventory, - metadata, - }); - } - if (skills.length === 0) { - throw unprocessable( - requestedSkillSlug - ? `Skill ${requestedSkillSlug} was not found in the provided GitHub source.` - : "No SKILL.md files were found in the provided GitHub source.", - ); - } - return { skills, warnings }; - } - - if (await probeGiteaHost(repoHost)) { + // Only probe non-github.com hosts. Unknown hosts that aren't Gitea (e.g. + // GitHub Enterprise) fall through to the GitHub path, which handles + // arbitrary hostnames via parseGitHubSourceUrl. + if (!isGitHubDotCom && (await probeGiteaHost(repoHost))) { return await readGiteaUrlSkillImports(companyId, sourceUrl, requestedSkillSlug); } + return await readGitHubUrlSkillImports(companyId, sourceUrl, requestedSkillSlug); } if (url.startsWith("http://") || url.startsWith("https://")) { @@ -1277,6 +1186,105 @@ async function readUrlSkillImports( throw unprocessable("Unsupported skill source. Use a local path or URL."); } +async function readGitHubUrlSkillImports( + companyId: string, + sourceUrl: string, + requestedSkillSlug: string | null = null, +): Promise<{ skills: ImportedSkill[]; warnings: string[] }> { + const warnings: string[] = []; + const parsed = parseGitHubSourceUrl(sourceUrl); + const apiBase = gitHubApiBase(parsed.hostname); + const { pinnedRef, trackingRef } = await resolveGitHubPinnedRef(parsed); + const ref = pinnedRef; + const tree = await fetchJson<{ tree?: Array<{ path: string; type: string }> }>( + `${apiBase}/repos/${parsed.owner}/${parsed.repo}/git/trees/${ref}?recursive=1`, + ).catch(() => { + throw unprocessable(`Failed to read GitHub tree for ${sourceUrl}`); + }); + const allPaths = (tree.tree ?? []) + .filter((entry) => entry.type === "blob") + .map((entry) => entry.path) + .filter((entry): entry is string => typeof entry === "string"); + const basePrefix = parsed.basePath ? `${parsed.basePath.replace(/^\/+|\/+$/g, "")}/` : ""; + const scopedPaths = basePrefix + ? allPaths.filter((entry) => entry.startsWith(basePrefix)) + : allPaths; + const relativePaths = scopedPaths.map((entry) => basePrefix ? entry.slice(basePrefix.length) : entry); + const filteredPaths = parsed.filePath + ? relativePaths.filter((entry) => entry === path.posix.relative(parsed.basePath || ".", parsed.filePath!)) + : relativePaths; + const skillPaths = filteredPaths.filter( + (entry) => path.posix.basename(entry).toLowerCase() === "skill.md", + ); + if (skillPaths.length === 0) { + throw unprocessable( + "No SKILL.md files were found in the provided GitHub source.", + ); + } + const skills: ImportedSkill[] = []; + for (const relativeSkillPath of skillPaths) { + const repoSkillPath = basePrefix ? `${basePrefix}${relativeSkillPath}` : relativeSkillPath; + const markdown = await fetchText(resolveRawGitHubUrl(parsed.hostname, parsed.owner, parsed.repo, ref, repoSkillPath)); + const parsedMarkdown = parseFrontmatterMarkdown(markdown); + const skillDir = path.posix.dirname(relativeSkillPath); + const slug = deriveImportedSkillSlug(parsedMarkdown.frontmatter, path.posix.basename(skillDir)); + const skillKey = readCanonicalSkillKey( + parsedMarkdown.frontmatter, + isPlainRecord(parsedMarkdown.frontmatter.metadata) ? parsedMarkdown.frontmatter.metadata : null, + ); + if (requestedSkillSlug && !matchesRequestedSkill(relativeSkillPath, requestedSkillSlug) && slug !== requestedSkillSlug) { + continue; + } + const metadata = { + ...(skillKey ? { skillKey } : {}), + sourceKind: "github", + ...(parsed.hostname !== "github.com" ? { hostname: parsed.hostname } : {}), + owner: parsed.owner, + repo: parsed.repo, + ref, + trackingRef, + repoSkillDir: normalizeGitHubSkillDirectory( + basePrefix ? `${basePrefix}${skillDir}` : skillDir, + slug, + ), + }; + const inventory = filteredPaths + .filter((entry) => entry === relativeSkillPath || entry.startsWith(`${skillDir}/`)) + .map((entry) => ({ + path: entry === relativeSkillPath ? "SKILL.md" : entry.slice(skillDir.length + 1), + kind: classifyInventoryKind(entry === relativeSkillPath ? "SKILL.md" : entry.slice(skillDir.length + 1)), + })) + .sort((left, right) => left.path.localeCompare(right.path)); + skills.push({ + key: deriveCanonicalSkillKey(companyId, { + slug, + sourceType: "github", + sourceLocator: sourceUrl, + metadata, + }), + slug, + name: asString(parsedMarkdown.frontmatter.name) ?? slug, + description: asString(parsedMarkdown.frontmatter.description), + markdown, + sourceType: "github", + sourceLocator: sourceUrl, + sourceRef: ref, + trustLevel: deriveTrustLevel(inventory), + compatibility: "compatible", + fileInventory: inventory, + metadata, + }); + } + if (skills.length === 0) { + throw unprocessable( + requestedSkillSlug + ? `Skill ${requestedSkillSlug} was not found in the provided GitHub source.` + : "No SKILL.md files were found in the provided GitHub source.", + ); + } + return { skills, warnings }; +} + async function readGiteaUrlSkillImports( companyId: string, sourceUrl: string, diff --git a/server/src/services/gitea-fetch.ts b/server/src/services/gitea-fetch.ts index fd9a6a1e..039bb44b 100644 --- a/server/src/services/gitea-fetch.ts +++ b/server/src/services/gitea-fetch.ts @@ -2,6 +2,41 @@ import { unprocessable } from "../errors.js"; const PROBE_CACHE_MAX_ENTRIES = 1024; +/** + * Reject hostnames that resolve to loopback, link-local, or RFC1918 ranges + * supplied as literal IPs. Hostnames that resolve via DNS to private IPs are + * not blocked here — this is a cheap surface-level guard against an operator + * pasting `http://192.168.1.10/...` into a skill-source field, not a full + * SSRF defence. + */ +export function isPrivateOrLoopbackHost(hostname: string): boolean { + const host = hostname.toLowerCase().trim(); + if (host === "localhost" || host === "ip6-localhost" || host === "ip6-loopback") return true; + if (host.endsWith(".localhost")) return true; + if (host === "::1" || host === "[::1]") return true; + if (host.startsWith("fe80:") || host.startsWith("[fe80:")) return true; + if (host.startsWith("fc") || host.startsWith("fd") || host.startsWith("[fc") || host.startsWith("[fd")) return true; + const ipv4 = host.match(/^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$/); + if (ipv4) { + const [a, b] = [Number(ipv4[1]), Number(ipv4[2])]; + if (a === 10) return true; + if (a === 127) return true; + if (a === 0) return true; + if (a === 169 && b === 254) return true; + if (a === 172 && b >= 16 && b <= 31) return true; + if (a === 192 && b === 168) return true; + } + return false; +} + +export function assertPublicHost(hostname: string): void { + if (isPrivateOrLoopbackHost(hostname)) { + throw unprocessable( + `Refusing to contact ${hostname}: private, loopback, and link-local hosts are not allowed as skill sources.`, + ); + } +} + /** * Process-lifetime cache of Gitea/Forgejo probe results. * Keyed by lowercased hostname. Positive and negative results are both cached diff --git a/server/src/services/gitea-skills.ts b/server/src/services/gitea-skills.ts index a67f58f6..c05eacb8 100644 --- a/server/src/services/gitea-skills.ts +++ b/server/src/services/gitea-skills.ts @@ -1,18 +1,22 @@ import path from "node:path"; import { unprocessable } from "../errors.js"; import { + assertPublicHost, giteaApiBase, giteaFetch, getGiteaHostProbe, giteaHostProbeCache, + isPrivateOrLoopbackHost, resolveRawGiteaUrl, resolveRawGiteaUrlLegacy, setGiteaHostProbe, } from "./gitea-fetch.js"; export { + assertPublicHost, giteaApiBase, giteaHostProbeCache, + isPrivateOrLoopbackHost, resolveRawGiteaUrl, resolveRawGiteaUrlLegacy, setGiteaHostProbe, @@ -41,10 +45,6 @@ export type GiteaRepoResponse = { default_branch?: string; }; -export type GiteaCommitResponse = { - sha?: string; -}; - export type GiteaTreeEntry = { path?: string; type?: string; @@ -95,6 +95,9 @@ export function parseGiteaSourceUrl(rawUrl: string): GiteaSourceUrl { } const owner = parts[0]!; const repo = parts[1]!.replace(/\.git$/i, ""); + if (!owner || !repo) { + throw unprocessable("Invalid Gitea URL: owner and repo are required"); + } let ref = "main"; let basePath = ""; let filePath: string | null = null; @@ -109,6 +112,7 @@ export function parseGiteaSourceUrl(rawUrl: string): GiteaSourceUrl { basePath = filePath ? path.posix.dirname(filePath) : ""; explicitRef = true; } + assertPublicHost(url.hostname); return { hostname: url.hostname, owner, repo, ref, basePath, filePath, explicitRef }; } @@ -116,12 +120,20 @@ export function parseGiteaSourceUrl(rawUrl: string): GiteaSourceUrl { * Probe a hostname to determine if it hosts a Gitea/Forgejo instance. * GETs `https://{host}/api/v1/version` with a short timeout. Cached for * the process lifetime in giteaHostProbeCache. + * + * Returns false without contacting the host for loopback / link-local / + * RFC1918 literal-IP hosts, to avoid being used as an SSRF probe. */ export async function probeGiteaHost(hostname: string): Promise { const key = hostname.toLowerCase(); const cached = getGiteaHostProbe(key); if (cached !== undefined) return cached; + if (isPrivateOrLoopbackHost(key)) { + setGiteaHostProbe(key, false); + return false; + } + const controller = new AbortController(); const timer = setTimeout(() => controller.abort(), PROBE_TIMEOUT_MS); let result = false; @@ -156,27 +168,6 @@ export async function resolveGiteaDefaultBranch( return asString(response.default_branch) ?? "main"; } -export async function resolveGiteaCommitSha( - owner: string, - repo: string, - ref: string, - apiBase: string, -): Promise { - if (!/^[0-9a-f]{40}$/i.test(ref.trim())) { - throw unprocessable( - `Gitea /commits endpoint only resolves SHAs; got "${ref}". Use fetchGiteaBranch for branch names.`, - ); - } - const response = await fetchGiteaJson( - `${apiBase}/repos/${owner}/${repo}/commits/${encodeURIComponent(ref)}`, - ); - const sha = asString(response.sha); - if (!sha) { - throw unprocessable(`Failed to resolve Gitea ref ${ref}`); - } - return sha; -} - /** * Resolve a parsed Gitea URL into a pinned commit SHA and a tracking ref. * Mirrors resolveGitHubPinnedRef (server/src/services/company-skills.ts:662-676). @@ -220,6 +211,7 @@ export async function fetchGiteaTreeBlobPaths( let page = 1; // hard cap so a misconfigured host can't make us loop forever const MAX_PAGES = 50; + let stillTruncated = false; for (let i = 0; i < MAX_PAGES; i += 1) { const url = page === 1 @@ -232,9 +224,17 @@ export async function fetchGiteaTreeBlobPaths( all.push(entry.path); } } - if (!data.truncated) break; + stillTruncated = Boolean(data.truncated); + if (!stillTruncated) break; page += 1; } + if (stillTruncated) { + // Tree still truncated at the page cap — refuse rather than silently + // import a partial skill listing, which would hide SKILL.md files. + throw unprocessable( + `Gitea repo tree for ${owner}/${repo}@${ref} exceeds ${MAX_PAGES * GITEA_TREE_PAGE_LIMIT} entries; refusing to import a partial listing.`, + ); + } return all; }