fork: address PR #19 review findings for Gitea skill support
- Fix GitHub Enterprise regression: dispatcher now probes for Gitea only on non-github.com hosts and falls back to the GitHub path for unknown hosts, preserving GHE support that the earlier strict github.com match broke. - Refactor readUrlSkillImports into a flat dispatcher with a sibling readGitHubUrlSkillImports helper, mirroring readGiteaUrlSkillImports. - Add SSRF guard (isPrivateOrLoopbackHost + assertPublicHost) in gitea-fetch; short-circuit probeGiteaHost and reject parseGiteaSourceUrl for loopback / RFC1918 / link-local literal IPs. - Throw on fetchGiteaTreeBlobPaths cap-hit instead of silently returning a partial blob listing (would hide SKILL.md files). - Validate non-empty repo in parseGiteaSourceUrl after .git strip. - Remove dead resolveGiteaCommitSha + GiteaCommitResponse (unused since the branches-endpoint follow-up). - Tests updated and extended. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -5,9 +5,9 @@ import {
|
||||
fetchGiteaTreeBlobPaths,
|
||||
giteaApiBase,
|
||||
giteaHostProbeCache,
|
||||
isPrivateOrLoopbackHost,
|
||||
parseGiteaSourceUrl,
|
||||
probeGiteaHost,
|
||||
resolveGiteaCommitSha,
|
||||
resolveGiteaPinnedRef,
|
||||
resolveRawGiteaUrl,
|
||||
resolveRawGiteaUrlLegacy,
|
||||
@@ -109,6 +109,52 @@ describe("parseGiteaSourceUrl", () => {
|
||||
it("rejects URLs with fewer than 2 path segments", () => {
|
||||
expect(() => parseGiteaSourceUrl("https://git.example.com/acme")).toThrow(/Invalid Gitea URL/);
|
||||
});
|
||||
|
||||
it("rejects URLs with empty repo after .git strip", () => {
|
||||
expect(() => parseGiteaSourceUrl("https://git.example.com/acme/.git")).toThrow(
|
||||
/owner and repo are required/,
|
||||
);
|
||||
});
|
||||
|
||||
it("rejects URLs pointing at private/loopback hosts", () => {
|
||||
expect(() => parseGiteaSourceUrl("https://192.168.1.10/acme/skills")).toThrow(
|
||||
/private, loopback/,
|
||||
);
|
||||
expect(() => parseGiteaSourceUrl("https://localhost/acme/skills")).toThrow(
|
||||
/private, loopback/,
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe("isPrivateOrLoopbackHost", () => {
|
||||
it("flags loopback and localhost variants", () => {
|
||||
expect(isPrivateOrLoopbackHost("localhost")).toBe(true);
|
||||
expect(isPrivateOrLoopbackHost("127.0.0.1")).toBe(true);
|
||||
expect(isPrivateOrLoopbackHost("127.99.99.99")).toBe(true);
|
||||
expect(isPrivateOrLoopbackHost("::1")).toBe(true);
|
||||
expect(isPrivateOrLoopbackHost("foo.localhost")).toBe(true);
|
||||
});
|
||||
|
||||
it("flags RFC1918 ranges", () => {
|
||||
expect(isPrivateOrLoopbackHost("10.0.0.1")).toBe(true);
|
||||
expect(isPrivateOrLoopbackHost("172.16.0.1")).toBe(true);
|
||||
expect(isPrivateOrLoopbackHost("172.31.255.254")).toBe(true);
|
||||
expect(isPrivateOrLoopbackHost("192.168.1.1")).toBe(true);
|
||||
});
|
||||
|
||||
it("flags link-local and 0.0.0.0", () => {
|
||||
expect(isPrivateOrLoopbackHost("169.254.169.254")).toBe(true);
|
||||
expect(isPrivateOrLoopbackHost("0.0.0.0")).toBe(true);
|
||||
expect(isPrivateOrLoopbackHost("fe80::1")).toBe(true);
|
||||
expect(isPrivateOrLoopbackHost("fd00::1")).toBe(true);
|
||||
});
|
||||
|
||||
it("allows public hosts", () => {
|
||||
expect(isPrivateOrLoopbackHost("git.example.com")).toBe(false);
|
||||
expect(isPrivateOrLoopbackHost("gitea.com")).toBe(false);
|
||||
expect(isPrivateOrLoopbackHost("172.32.0.1")).toBe(false);
|
||||
expect(isPrivateOrLoopbackHost("11.0.0.1")).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe("probeGiteaHost", () => {
|
||||
@@ -170,6 +216,15 @@ describe("probeGiteaHost", () => {
|
||||
expect(result).toBe(true);
|
||||
expect(fetchMock).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("short-circuits to false for private/loopback hosts without making a request", async () => {
|
||||
const fetchMock = vi.fn();
|
||||
vi.stubGlobal("fetch", fetchMock);
|
||||
expect(await probeGiteaHost("127.0.0.1")).toBe(false);
|
||||
expect(await probeGiteaHost("192.168.1.1")).toBe(false);
|
||||
expect(await probeGiteaHost("localhost")).toBe(false);
|
||||
expect(fetchMock).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
describe("resolveGiteaPinnedRef", () => {
|
||||
@@ -201,27 +256,6 @@ describe("resolveGiteaPinnedRef", () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe("resolveGiteaCommitSha", () => {
|
||||
it("returns the sha from a commit response when given a 40-hex ref", async () => {
|
||||
const sha = "abc123abc123abc123abc123abc123abc123abcd";
|
||||
vi.stubGlobal(
|
||||
"fetch",
|
||||
vi.fn().mockResolvedValue(jsonResponse({ sha })),
|
||||
);
|
||||
const result = await resolveGiteaCommitSha("acme", "skills", sha, giteaApiBase("git.example.com"));
|
||||
expect(result).toBe(sha);
|
||||
});
|
||||
|
||||
it("refuses to call the API for a non-SHA ref", async () => {
|
||||
const fetchMock = vi.fn();
|
||||
vi.stubGlobal("fetch", fetchMock);
|
||||
await expect(
|
||||
resolveGiteaCommitSha("acme", "skills", "main", giteaApiBase("git.example.com")),
|
||||
).rejects.toMatchObject({ status: 422 });
|
||||
expect(fetchMock).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
describe("fetchGiteaTreeBlobPaths", () => {
|
||||
it("returns blob paths from a single-page tree", async () => {
|
||||
vi.stubGlobal(
|
||||
@@ -263,6 +297,22 @@ describe("fetchGiteaTreeBlobPaths", () => {
|
||||
expect(fetchMock).toHaveBeenCalledTimes(2);
|
||||
expect(String(fetchMock.mock.calls[1]?.[0])).toContain("page=2");
|
||||
});
|
||||
|
||||
it("throws when the page cap is hit while the tree is still truncated", async () => {
|
||||
// Return truncated=true on every page so the loop hits MAX_PAGES (50).
|
||||
vi.stubGlobal(
|
||||
"fetch",
|
||||
vi.fn().mockResolvedValue(
|
||||
jsonResponse({
|
||||
tree: [{ path: "page.md", type: "blob" }],
|
||||
truncated: true,
|
||||
}),
|
||||
),
|
||||
);
|
||||
await expect(
|
||||
fetchGiteaTreeBlobPaths(giteaApiBase("git.example.com"), "acme", "skills", "main"),
|
||||
).rejects.toThrow(/exceeds .* entries/);
|
||||
});
|
||||
});
|
||||
|
||||
describe("fetchGiteaText", () => {
|
||||
|
||||
@@ -1133,18 +1133,73 @@ async function readUrlSkillImports(
|
||||
return segments.length >= 2 && !parsed.pathname.endsWith(".md");
|
||||
} catch { return false; } })();
|
||||
if (looksLikeRepoUrl) {
|
||||
const repoUrl = new URL(url);
|
||||
const repoHost = repoUrl.hostname.toLowerCase();
|
||||
const repoHost = new URL(url).hostname.toLowerCase();
|
||||
const isGitHubDotCom = repoHost === "github.com" || repoHost === "www.github.com";
|
||||
if (isGitHubDotCom) {
|
||||
const parsed = parseGitHubSourceUrl(url);
|
||||
// Only probe non-github.com hosts. Unknown hosts that aren't Gitea (e.g.
|
||||
// GitHub Enterprise) fall through to the GitHub path, which handles
|
||||
// arbitrary hostnames via parseGitHubSourceUrl.
|
||||
if (!isGitHubDotCom && (await probeGiteaHost(repoHost))) {
|
||||
return await readGiteaUrlSkillImports(companyId, sourceUrl, requestedSkillSlug);
|
||||
}
|
||||
return await readGitHubUrlSkillImports(companyId, sourceUrl, requestedSkillSlug);
|
||||
}
|
||||
|
||||
if (url.startsWith("http://") || url.startsWith("https://")) {
|
||||
const markdown = await fetchText(url);
|
||||
const parsedMarkdown = parseFrontmatterMarkdown(markdown);
|
||||
const urlObj = new URL(url);
|
||||
const fileName = path.posix.basename(urlObj.pathname);
|
||||
const slug = deriveImportedSkillSlug(parsedMarkdown.frontmatter, fileName.replace(/\.md$/i, ""));
|
||||
const skillKey = readCanonicalSkillKey(
|
||||
parsedMarkdown.frontmatter,
|
||||
isPlainRecord(parsedMarkdown.frontmatter.metadata) ? parsedMarkdown.frontmatter.metadata : null,
|
||||
);
|
||||
const metadata = {
|
||||
...(skillKey ? { skillKey } : {}),
|
||||
sourceKind: "url",
|
||||
};
|
||||
const inventory: CompanySkillFileInventoryEntry[] = [{ path: "SKILL.md", kind: "skill" }];
|
||||
return {
|
||||
skills: [{
|
||||
key: deriveCanonicalSkillKey(companyId, {
|
||||
slug,
|
||||
sourceType: "url",
|
||||
sourceLocator: url,
|
||||
metadata,
|
||||
}),
|
||||
slug,
|
||||
name: asString(parsedMarkdown.frontmatter.name) ?? slug,
|
||||
description: asString(parsedMarkdown.frontmatter.description),
|
||||
markdown,
|
||||
sourceType: "url",
|
||||
sourceLocator: url,
|
||||
sourceRef: null,
|
||||
trustLevel: deriveTrustLevel(inventory),
|
||||
compatibility: "compatible",
|
||||
fileInventory: inventory,
|
||||
metadata,
|
||||
}],
|
||||
warnings,
|
||||
};
|
||||
}
|
||||
|
||||
throw unprocessable("Unsupported skill source. Use a local path or URL.");
|
||||
}
|
||||
|
||||
async function readGitHubUrlSkillImports(
|
||||
companyId: string,
|
||||
sourceUrl: string,
|
||||
requestedSkillSlug: string | null = null,
|
||||
): Promise<{ skills: ImportedSkill[]; warnings: string[] }> {
|
||||
const warnings: string[] = [];
|
||||
const parsed = parseGitHubSourceUrl(sourceUrl);
|
||||
const apiBase = gitHubApiBase(parsed.hostname);
|
||||
const { pinnedRef, trackingRef } = await resolveGitHubPinnedRef(parsed);
|
||||
let ref = pinnedRef;
|
||||
const ref = pinnedRef;
|
||||
const tree = await fetchJson<{ tree?: Array<{ path: string; type: string }> }>(
|
||||
`${apiBase}/repos/${parsed.owner}/${parsed.repo}/git/trees/${ref}?recursive=1`,
|
||||
).catch(() => {
|
||||
throw unprocessable(`Failed to read GitHub tree for ${url}`);
|
||||
throw unprocessable(`Failed to read GitHub tree for ${sourceUrl}`);
|
||||
});
|
||||
const allPaths = (tree.tree ?? [])
|
||||
.filter((entry) => entry.type === "blob")
|
||||
@@ -1228,53 +1283,6 @@ async function readUrlSkillImports(
|
||||
);
|
||||
}
|
||||
return { skills, warnings };
|
||||
}
|
||||
|
||||
if (await probeGiteaHost(repoHost)) {
|
||||
return await readGiteaUrlSkillImports(companyId, sourceUrl, requestedSkillSlug);
|
||||
}
|
||||
}
|
||||
|
||||
if (url.startsWith("http://") || url.startsWith("https://")) {
|
||||
const markdown = await fetchText(url);
|
||||
const parsedMarkdown = parseFrontmatterMarkdown(markdown);
|
||||
const urlObj = new URL(url);
|
||||
const fileName = path.posix.basename(urlObj.pathname);
|
||||
const slug = deriveImportedSkillSlug(parsedMarkdown.frontmatter, fileName.replace(/\.md$/i, ""));
|
||||
const skillKey = readCanonicalSkillKey(
|
||||
parsedMarkdown.frontmatter,
|
||||
isPlainRecord(parsedMarkdown.frontmatter.metadata) ? parsedMarkdown.frontmatter.metadata : null,
|
||||
);
|
||||
const metadata = {
|
||||
...(skillKey ? { skillKey } : {}),
|
||||
sourceKind: "url",
|
||||
};
|
||||
const inventory: CompanySkillFileInventoryEntry[] = [{ path: "SKILL.md", kind: "skill" }];
|
||||
return {
|
||||
skills: [{
|
||||
key: deriveCanonicalSkillKey(companyId, {
|
||||
slug,
|
||||
sourceType: "url",
|
||||
sourceLocator: url,
|
||||
metadata,
|
||||
}),
|
||||
slug,
|
||||
name: asString(parsedMarkdown.frontmatter.name) ?? slug,
|
||||
description: asString(parsedMarkdown.frontmatter.description),
|
||||
markdown,
|
||||
sourceType: "url",
|
||||
sourceLocator: url,
|
||||
sourceRef: null,
|
||||
trustLevel: deriveTrustLevel(inventory),
|
||||
compatibility: "compatible",
|
||||
fileInventory: inventory,
|
||||
metadata,
|
||||
}],
|
||||
warnings,
|
||||
};
|
||||
}
|
||||
|
||||
throw unprocessable("Unsupported skill source. Use a local path or URL.");
|
||||
}
|
||||
|
||||
async function readGiteaUrlSkillImports(
|
||||
|
||||
@@ -2,6 +2,41 @@ import { unprocessable } from "../errors.js";
|
||||
|
||||
const PROBE_CACHE_MAX_ENTRIES = 1024;
|
||||
|
||||
/**
|
||||
* Reject hostnames that resolve to loopback, link-local, or RFC1918 ranges
|
||||
* supplied as literal IPs. Hostnames that resolve via DNS to private IPs are
|
||||
* not blocked here — this is a cheap surface-level guard against an operator
|
||||
* pasting `http://192.168.1.10/...` into a skill-source field, not a full
|
||||
* SSRF defence.
|
||||
*/
|
||||
export function isPrivateOrLoopbackHost(hostname: string): boolean {
|
||||
const host = hostname.toLowerCase().trim();
|
||||
if (host === "localhost" || host === "ip6-localhost" || host === "ip6-loopback") return true;
|
||||
if (host.endsWith(".localhost")) return true;
|
||||
if (host === "::1" || host === "[::1]") return true;
|
||||
if (host.startsWith("fe80:") || host.startsWith("[fe80:")) return true;
|
||||
if (host.startsWith("fc") || host.startsWith("fd") || host.startsWith("[fc") || host.startsWith("[fd")) return true;
|
||||
const ipv4 = host.match(/^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$/);
|
||||
if (ipv4) {
|
||||
const [a, b] = [Number(ipv4[1]), Number(ipv4[2])];
|
||||
if (a === 10) return true;
|
||||
if (a === 127) return true;
|
||||
if (a === 0) return true;
|
||||
if (a === 169 && b === 254) return true;
|
||||
if (a === 172 && b >= 16 && b <= 31) return true;
|
||||
if (a === 192 && b === 168) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
export function assertPublicHost(hostname: string): void {
|
||||
if (isPrivateOrLoopbackHost(hostname)) {
|
||||
throw unprocessable(
|
||||
`Refusing to contact ${hostname}: private, loopback, and link-local hosts are not allowed as skill sources.`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Process-lifetime cache of Gitea/Forgejo probe results.
|
||||
* Keyed by lowercased hostname. Positive and negative results are both cached
|
||||
|
||||
@@ -1,18 +1,22 @@
|
||||
import path from "node:path";
|
||||
import { unprocessable } from "../errors.js";
|
||||
import {
|
||||
assertPublicHost,
|
||||
giteaApiBase,
|
||||
giteaFetch,
|
||||
getGiteaHostProbe,
|
||||
giteaHostProbeCache,
|
||||
isPrivateOrLoopbackHost,
|
||||
resolveRawGiteaUrl,
|
||||
resolveRawGiteaUrlLegacy,
|
||||
setGiteaHostProbe,
|
||||
} from "./gitea-fetch.js";
|
||||
|
||||
export {
|
||||
assertPublicHost,
|
||||
giteaApiBase,
|
||||
giteaHostProbeCache,
|
||||
isPrivateOrLoopbackHost,
|
||||
resolveRawGiteaUrl,
|
||||
resolveRawGiteaUrlLegacy,
|
||||
setGiteaHostProbe,
|
||||
@@ -41,10 +45,6 @@ export type GiteaRepoResponse = {
|
||||
default_branch?: string;
|
||||
};
|
||||
|
||||
export type GiteaCommitResponse = {
|
||||
sha?: string;
|
||||
};
|
||||
|
||||
export type GiteaTreeEntry = {
|
||||
path?: string;
|
||||
type?: string;
|
||||
@@ -95,6 +95,9 @@ export function parseGiteaSourceUrl(rawUrl: string): GiteaSourceUrl {
|
||||
}
|
||||
const owner = parts[0]!;
|
||||
const repo = parts[1]!.replace(/\.git$/i, "");
|
||||
if (!owner || !repo) {
|
||||
throw unprocessable("Invalid Gitea URL: owner and repo are required");
|
||||
}
|
||||
let ref = "main";
|
||||
let basePath = "";
|
||||
let filePath: string | null = null;
|
||||
@@ -109,6 +112,7 @@ export function parseGiteaSourceUrl(rawUrl: string): GiteaSourceUrl {
|
||||
basePath = filePath ? path.posix.dirname(filePath) : "";
|
||||
explicitRef = true;
|
||||
}
|
||||
assertPublicHost(url.hostname);
|
||||
return { hostname: url.hostname, owner, repo, ref, basePath, filePath, explicitRef };
|
||||
}
|
||||
|
||||
@@ -116,12 +120,20 @@ export function parseGiteaSourceUrl(rawUrl: string): GiteaSourceUrl {
|
||||
* Probe a hostname to determine if it hosts a Gitea/Forgejo instance.
|
||||
* GETs `https://{host}/api/v1/version` with a short timeout. Cached for
|
||||
* the process lifetime in giteaHostProbeCache.
|
||||
*
|
||||
* Returns false without contacting the host for loopback / link-local /
|
||||
* RFC1918 literal-IP hosts, to avoid being used as an SSRF probe.
|
||||
*/
|
||||
export async function probeGiteaHost(hostname: string): Promise<boolean> {
|
||||
const key = hostname.toLowerCase();
|
||||
const cached = getGiteaHostProbe(key);
|
||||
if (cached !== undefined) return cached;
|
||||
|
||||
if (isPrivateOrLoopbackHost(key)) {
|
||||
setGiteaHostProbe(key, false);
|
||||
return false;
|
||||
}
|
||||
|
||||
const controller = new AbortController();
|
||||
const timer = setTimeout(() => controller.abort(), PROBE_TIMEOUT_MS);
|
||||
let result = false;
|
||||
@@ -156,27 +168,6 @@ export async function resolveGiteaDefaultBranch(
|
||||
return asString(response.default_branch) ?? "main";
|
||||
}
|
||||
|
||||
export async function resolveGiteaCommitSha(
|
||||
owner: string,
|
||||
repo: string,
|
||||
ref: string,
|
||||
apiBase: string,
|
||||
): Promise<string> {
|
||||
if (!/^[0-9a-f]{40}$/i.test(ref.trim())) {
|
||||
throw unprocessable(
|
||||
`Gitea /commits endpoint only resolves SHAs; got "${ref}". Use fetchGiteaBranch for branch names.`,
|
||||
);
|
||||
}
|
||||
const response = await fetchGiteaJson<GiteaCommitResponse>(
|
||||
`${apiBase}/repos/${owner}/${repo}/commits/${encodeURIComponent(ref)}`,
|
||||
);
|
||||
const sha = asString(response.sha);
|
||||
if (!sha) {
|
||||
throw unprocessable(`Failed to resolve Gitea ref ${ref}`);
|
||||
}
|
||||
return sha;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve a parsed Gitea URL into a pinned commit SHA and a tracking ref.
|
||||
* Mirrors resolveGitHubPinnedRef (server/src/services/company-skills.ts:662-676).
|
||||
@@ -220,6 +211,7 @@ export async function fetchGiteaTreeBlobPaths(
|
||||
let page = 1;
|
||||
// hard cap so a misconfigured host can't make us loop forever
|
||||
const MAX_PAGES = 50;
|
||||
let stillTruncated = false;
|
||||
for (let i = 0; i < MAX_PAGES; i += 1) {
|
||||
const url =
|
||||
page === 1
|
||||
@@ -232,9 +224,17 @@ export async function fetchGiteaTreeBlobPaths(
|
||||
all.push(entry.path);
|
||||
}
|
||||
}
|
||||
if (!data.truncated) break;
|
||||
stillTruncated = Boolean(data.truncated);
|
||||
if (!stillTruncated) break;
|
||||
page += 1;
|
||||
}
|
||||
if (stillTruncated) {
|
||||
// Tree still truncated at the page cap — refuse rather than silently
|
||||
// import a partial skill listing, which would hide SKILL.md files.
|
||||
throw unprocessable(
|
||||
`Gitea repo tree for ${owner}/${repo}@${ref} exceeds ${MAX_PAGES * GITEA_TREE_PAGE_LIMIT} entries; refusing to import a partial listing.`,
|
||||
);
|
||||
}
|
||||
return all;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user