fork: address PR #19 review findings for Gitea skill support
- Fix GitHub Enterprise regression: dispatcher now probes for Gitea only on non-github.com hosts and falls back to the GitHub path for unknown hosts, preserving GHE support that the earlier strict github.com match broke. - Refactor readUrlSkillImports into a flat dispatcher with a sibling readGitHubUrlSkillImports helper, mirroring readGiteaUrlSkillImports. - Add SSRF guard (isPrivateOrLoopbackHost + assertPublicHost) in gitea-fetch; short-circuit probeGiteaHost and reject parseGiteaSourceUrl for loopback / RFC1918 / link-local literal IPs. - Throw on fetchGiteaTreeBlobPaths cap-hit instead of silently returning a partial blob listing (would hide SKILL.md files). - Validate non-empty repo in parseGiteaSourceUrl after .git strip. - Remove dead resolveGiteaCommitSha + GiteaCommitResponse (unused since the branches-endpoint follow-up). - Tests updated and extended. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -5,9 +5,9 @@ import {
|
|||||||
fetchGiteaTreeBlobPaths,
|
fetchGiteaTreeBlobPaths,
|
||||||
giteaApiBase,
|
giteaApiBase,
|
||||||
giteaHostProbeCache,
|
giteaHostProbeCache,
|
||||||
|
isPrivateOrLoopbackHost,
|
||||||
parseGiteaSourceUrl,
|
parseGiteaSourceUrl,
|
||||||
probeGiteaHost,
|
probeGiteaHost,
|
||||||
resolveGiteaCommitSha,
|
|
||||||
resolveGiteaPinnedRef,
|
resolveGiteaPinnedRef,
|
||||||
resolveRawGiteaUrl,
|
resolveRawGiteaUrl,
|
||||||
resolveRawGiteaUrlLegacy,
|
resolveRawGiteaUrlLegacy,
|
||||||
@@ -109,6 +109,52 @@ describe("parseGiteaSourceUrl", () => {
|
|||||||
it("rejects URLs with fewer than 2 path segments", () => {
|
it("rejects URLs with fewer than 2 path segments", () => {
|
||||||
expect(() => parseGiteaSourceUrl("https://git.example.com/acme")).toThrow(/Invalid Gitea URL/);
|
expect(() => parseGiteaSourceUrl("https://git.example.com/acme")).toThrow(/Invalid Gitea URL/);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("rejects URLs with empty repo after .git strip", () => {
|
||||||
|
expect(() => parseGiteaSourceUrl("https://git.example.com/acme/.git")).toThrow(
|
||||||
|
/owner and repo are required/,
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("rejects URLs pointing at private/loopback hosts", () => {
|
||||||
|
expect(() => parseGiteaSourceUrl("https://192.168.1.10/acme/skills")).toThrow(
|
||||||
|
/private, loopback/,
|
||||||
|
);
|
||||||
|
expect(() => parseGiteaSourceUrl("https://localhost/acme/skills")).toThrow(
|
||||||
|
/private, loopback/,
|
||||||
|
);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("isPrivateOrLoopbackHost", () => {
|
||||||
|
it("flags loopback and localhost variants", () => {
|
||||||
|
expect(isPrivateOrLoopbackHost("localhost")).toBe(true);
|
||||||
|
expect(isPrivateOrLoopbackHost("127.0.0.1")).toBe(true);
|
||||||
|
expect(isPrivateOrLoopbackHost("127.99.99.99")).toBe(true);
|
||||||
|
expect(isPrivateOrLoopbackHost("::1")).toBe(true);
|
||||||
|
expect(isPrivateOrLoopbackHost("foo.localhost")).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("flags RFC1918 ranges", () => {
|
||||||
|
expect(isPrivateOrLoopbackHost("10.0.0.1")).toBe(true);
|
||||||
|
expect(isPrivateOrLoopbackHost("172.16.0.1")).toBe(true);
|
||||||
|
expect(isPrivateOrLoopbackHost("172.31.255.254")).toBe(true);
|
||||||
|
expect(isPrivateOrLoopbackHost("192.168.1.1")).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("flags link-local and 0.0.0.0", () => {
|
||||||
|
expect(isPrivateOrLoopbackHost("169.254.169.254")).toBe(true);
|
||||||
|
expect(isPrivateOrLoopbackHost("0.0.0.0")).toBe(true);
|
||||||
|
expect(isPrivateOrLoopbackHost("fe80::1")).toBe(true);
|
||||||
|
expect(isPrivateOrLoopbackHost("fd00::1")).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("allows public hosts", () => {
|
||||||
|
expect(isPrivateOrLoopbackHost("git.example.com")).toBe(false);
|
||||||
|
expect(isPrivateOrLoopbackHost("gitea.com")).toBe(false);
|
||||||
|
expect(isPrivateOrLoopbackHost("172.32.0.1")).toBe(false);
|
||||||
|
expect(isPrivateOrLoopbackHost("11.0.0.1")).toBe(false);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("probeGiteaHost", () => {
|
describe("probeGiteaHost", () => {
|
||||||
@@ -170,6 +216,15 @@ describe("probeGiteaHost", () => {
|
|||||||
expect(result).toBe(true);
|
expect(result).toBe(true);
|
||||||
expect(fetchMock).not.toHaveBeenCalled();
|
expect(fetchMock).not.toHaveBeenCalled();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("short-circuits to false for private/loopback hosts without making a request", async () => {
|
||||||
|
const fetchMock = vi.fn();
|
||||||
|
vi.stubGlobal("fetch", fetchMock);
|
||||||
|
expect(await probeGiteaHost("127.0.0.1")).toBe(false);
|
||||||
|
expect(await probeGiteaHost("192.168.1.1")).toBe(false);
|
||||||
|
expect(await probeGiteaHost("localhost")).toBe(false);
|
||||||
|
expect(fetchMock).not.toHaveBeenCalled();
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("resolveGiteaPinnedRef", () => {
|
describe("resolveGiteaPinnedRef", () => {
|
||||||
@@ -201,27 +256,6 @@ describe("resolveGiteaPinnedRef", () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("resolveGiteaCommitSha", () => {
|
|
||||||
it("returns the sha from a commit response when given a 40-hex ref", async () => {
|
|
||||||
const sha = "abc123abc123abc123abc123abc123abc123abcd";
|
|
||||||
vi.stubGlobal(
|
|
||||||
"fetch",
|
|
||||||
vi.fn().mockResolvedValue(jsonResponse({ sha })),
|
|
||||||
);
|
|
||||||
const result = await resolveGiteaCommitSha("acme", "skills", sha, giteaApiBase("git.example.com"));
|
|
||||||
expect(result).toBe(sha);
|
|
||||||
});
|
|
||||||
|
|
||||||
it("refuses to call the API for a non-SHA ref", async () => {
|
|
||||||
const fetchMock = vi.fn();
|
|
||||||
vi.stubGlobal("fetch", fetchMock);
|
|
||||||
await expect(
|
|
||||||
resolveGiteaCommitSha("acme", "skills", "main", giteaApiBase("git.example.com")),
|
|
||||||
).rejects.toMatchObject({ status: 422 });
|
|
||||||
expect(fetchMock).not.toHaveBeenCalled();
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
describe("fetchGiteaTreeBlobPaths", () => {
|
describe("fetchGiteaTreeBlobPaths", () => {
|
||||||
it("returns blob paths from a single-page tree", async () => {
|
it("returns blob paths from a single-page tree", async () => {
|
||||||
vi.stubGlobal(
|
vi.stubGlobal(
|
||||||
@@ -263,6 +297,22 @@ describe("fetchGiteaTreeBlobPaths", () => {
|
|||||||
expect(fetchMock).toHaveBeenCalledTimes(2);
|
expect(fetchMock).toHaveBeenCalledTimes(2);
|
||||||
expect(String(fetchMock.mock.calls[1]?.[0])).toContain("page=2");
|
expect(String(fetchMock.mock.calls[1]?.[0])).toContain("page=2");
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("throws when the page cap is hit while the tree is still truncated", async () => {
|
||||||
|
// Return truncated=true on every page so the loop hits MAX_PAGES (50).
|
||||||
|
vi.stubGlobal(
|
||||||
|
"fetch",
|
||||||
|
vi.fn().mockResolvedValue(
|
||||||
|
jsonResponse({
|
||||||
|
tree: [{ path: "page.md", type: "blob" }],
|
||||||
|
truncated: true,
|
||||||
|
}),
|
||||||
|
),
|
||||||
|
);
|
||||||
|
await expect(
|
||||||
|
fetchGiteaTreeBlobPaths(giteaApiBase("git.example.com"), "acme", "skills", "main"),
|
||||||
|
).rejects.toThrow(/exceeds .* entries/);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("fetchGiteaText", () => {
|
describe("fetchGiteaText", () => {
|
||||||
|
|||||||
@@ -1133,106 +1133,15 @@ async function readUrlSkillImports(
|
|||||||
return segments.length >= 2 && !parsed.pathname.endsWith(".md");
|
return segments.length >= 2 && !parsed.pathname.endsWith(".md");
|
||||||
} catch { return false; } })();
|
} catch { return false; } })();
|
||||||
if (looksLikeRepoUrl) {
|
if (looksLikeRepoUrl) {
|
||||||
const repoUrl = new URL(url);
|
const repoHost = new URL(url).hostname.toLowerCase();
|
||||||
const repoHost = repoUrl.hostname.toLowerCase();
|
|
||||||
const isGitHubDotCom = repoHost === "github.com" || repoHost === "www.github.com";
|
const isGitHubDotCom = repoHost === "github.com" || repoHost === "www.github.com";
|
||||||
if (isGitHubDotCom) {
|
// Only probe non-github.com hosts. Unknown hosts that aren't Gitea (e.g.
|
||||||
const parsed = parseGitHubSourceUrl(url);
|
// GitHub Enterprise) fall through to the GitHub path, which handles
|
||||||
const apiBase = gitHubApiBase(parsed.hostname);
|
// arbitrary hostnames via parseGitHubSourceUrl.
|
||||||
const { pinnedRef, trackingRef } = await resolveGitHubPinnedRef(parsed);
|
if (!isGitHubDotCom && (await probeGiteaHost(repoHost))) {
|
||||||
let ref = pinnedRef;
|
|
||||||
const tree = await fetchJson<{ tree?: Array<{ path: string; type: string }> }>(
|
|
||||||
`${apiBase}/repos/${parsed.owner}/${parsed.repo}/git/trees/${ref}?recursive=1`,
|
|
||||||
).catch(() => {
|
|
||||||
throw unprocessable(`Failed to read GitHub tree for ${url}`);
|
|
||||||
});
|
|
||||||
const allPaths = (tree.tree ?? [])
|
|
||||||
.filter((entry) => entry.type === "blob")
|
|
||||||
.map((entry) => entry.path)
|
|
||||||
.filter((entry): entry is string => typeof entry === "string");
|
|
||||||
const basePrefix = parsed.basePath ? `${parsed.basePath.replace(/^\/+|\/+$/g, "")}/` : "";
|
|
||||||
const scopedPaths = basePrefix
|
|
||||||
? allPaths.filter((entry) => entry.startsWith(basePrefix))
|
|
||||||
: allPaths;
|
|
||||||
const relativePaths = scopedPaths.map((entry) => basePrefix ? entry.slice(basePrefix.length) : entry);
|
|
||||||
const filteredPaths = parsed.filePath
|
|
||||||
? relativePaths.filter((entry) => entry === path.posix.relative(parsed.basePath || ".", parsed.filePath!))
|
|
||||||
: relativePaths;
|
|
||||||
const skillPaths = filteredPaths.filter(
|
|
||||||
(entry) => path.posix.basename(entry).toLowerCase() === "skill.md",
|
|
||||||
);
|
|
||||||
if (skillPaths.length === 0) {
|
|
||||||
throw unprocessable(
|
|
||||||
"No SKILL.md files were found in the provided GitHub source.",
|
|
||||||
);
|
|
||||||
}
|
|
||||||
const skills: ImportedSkill[] = [];
|
|
||||||
for (const relativeSkillPath of skillPaths) {
|
|
||||||
const repoSkillPath = basePrefix ? `${basePrefix}${relativeSkillPath}` : relativeSkillPath;
|
|
||||||
const markdown = await fetchText(resolveRawGitHubUrl(parsed.hostname, parsed.owner, parsed.repo, ref, repoSkillPath));
|
|
||||||
const parsedMarkdown = parseFrontmatterMarkdown(markdown);
|
|
||||||
const skillDir = path.posix.dirname(relativeSkillPath);
|
|
||||||
const slug = deriveImportedSkillSlug(parsedMarkdown.frontmatter, path.posix.basename(skillDir));
|
|
||||||
const skillKey = readCanonicalSkillKey(
|
|
||||||
parsedMarkdown.frontmatter,
|
|
||||||
isPlainRecord(parsedMarkdown.frontmatter.metadata) ? parsedMarkdown.frontmatter.metadata : null,
|
|
||||||
);
|
|
||||||
if (requestedSkillSlug && !matchesRequestedSkill(relativeSkillPath, requestedSkillSlug) && slug !== requestedSkillSlug) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
const metadata = {
|
|
||||||
...(skillKey ? { skillKey } : {}),
|
|
||||||
sourceKind: "github",
|
|
||||||
...(parsed.hostname !== "github.com" ? { hostname: parsed.hostname } : {}),
|
|
||||||
owner: parsed.owner,
|
|
||||||
repo: parsed.repo,
|
|
||||||
ref,
|
|
||||||
trackingRef,
|
|
||||||
repoSkillDir: normalizeGitHubSkillDirectory(
|
|
||||||
basePrefix ? `${basePrefix}${skillDir}` : skillDir,
|
|
||||||
slug,
|
|
||||||
),
|
|
||||||
};
|
|
||||||
const inventory = filteredPaths
|
|
||||||
.filter((entry) => entry === relativeSkillPath || entry.startsWith(`${skillDir}/`))
|
|
||||||
.map((entry) => ({
|
|
||||||
path: entry === relativeSkillPath ? "SKILL.md" : entry.slice(skillDir.length + 1),
|
|
||||||
kind: classifyInventoryKind(entry === relativeSkillPath ? "SKILL.md" : entry.slice(skillDir.length + 1)),
|
|
||||||
}))
|
|
||||||
.sort((left, right) => left.path.localeCompare(right.path));
|
|
||||||
skills.push({
|
|
||||||
key: deriveCanonicalSkillKey(companyId, {
|
|
||||||
slug,
|
|
||||||
sourceType: "github",
|
|
||||||
sourceLocator: sourceUrl,
|
|
||||||
metadata,
|
|
||||||
}),
|
|
||||||
slug,
|
|
||||||
name: asString(parsedMarkdown.frontmatter.name) ?? slug,
|
|
||||||
description: asString(parsedMarkdown.frontmatter.description),
|
|
||||||
markdown,
|
|
||||||
sourceType: "github",
|
|
||||||
sourceLocator: sourceUrl,
|
|
||||||
sourceRef: ref,
|
|
||||||
trustLevel: deriveTrustLevel(inventory),
|
|
||||||
compatibility: "compatible",
|
|
||||||
fileInventory: inventory,
|
|
||||||
metadata,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
if (skills.length === 0) {
|
|
||||||
throw unprocessable(
|
|
||||||
requestedSkillSlug
|
|
||||||
? `Skill ${requestedSkillSlug} was not found in the provided GitHub source.`
|
|
||||||
: "No SKILL.md files were found in the provided GitHub source.",
|
|
||||||
);
|
|
||||||
}
|
|
||||||
return { skills, warnings };
|
|
||||||
}
|
|
||||||
|
|
||||||
if (await probeGiteaHost(repoHost)) {
|
|
||||||
return await readGiteaUrlSkillImports(companyId, sourceUrl, requestedSkillSlug);
|
return await readGiteaUrlSkillImports(companyId, sourceUrl, requestedSkillSlug);
|
||||||
}
|
}
|
||||||
|
return await readGitHubUrlSkillImports(companyId, sourceUrl, requestedSkillSlug);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (url.startsWith("http://") || url.startsWith("https://")) {
|
if (url.startsWith("http://") || url.startsWith("https://")) {
|
||||||
@@ -1277,6 +1186,105 @@ async function readUrlSkillImports(
|
|||||||
throw unprocessable("Unsupported skill source. Use a local path or URL.");
|
throw unprocessable("Unsupported skill source. Use a local path or URL.");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function readGitHubUrlSkillImports(
|
||||||
|
companyId: string,
|
||||||
|
sourceUrl: string,
|
||||||
|
requestedSkillSlug: string | null = null,
|
||||||
|
): Promise<{ skills: ImportedSkill[]; warnings: string[] }> {
|
||||||
|
const warnings: string[] = [];
|
||||||
|
const parsed = parseGitHubSourceUrl(sourceUrl);
|
||||||
|
const apiBase = gitHubApiBase(parsed.hostname);
|
||||||
|
const { pinnedRef, trackingRef } = await resolveGitHubPinnedRef(parsed);
|
||||||
|
const ref = pinnedRef;
|
||||||
|
const tree = await fetchJson<{ tree?: Array<{ path: string; type: string }> }>(
|
||||||
|
`${apiBase}/repos/${parsed.owner}/${parsed.repo}/git/trees/${ref}?recursive=1`,
|
||||||
|
).catch(() => {
|
||||||
|
throw unprocessable(`Failed to read GitHub tree for ${sourceUrl}`);
|
||||||
|
});
|
||||||
|
const allPaths = (tree.tree ?? [])
|
||||||
|
.filter((entry) => entry.type === "blob")
|
||||||
|
.map((entry) => entry.path)
|
||||||
|
.filter((entry): entry is string => typeof entry === "string");
|
||||||
|
const basePrefix = parsed.basePath ? `${parsed.basePath.replace(/^\/+|\/+$/g, "")}/` : "";
|
||||||
|
const scopedPaths = basePrefix
|
||||||
|
? allPaths.filter((entry) => entry.startsWith(basePrefix))
|
||||||
|
: allPaths;
|
||||||
|
const relativePaths = scopedPaths.map((entry) => basePrefix ? entry.slice(basePrefix.length) : entry);
|
||||||
|
const filteredPaths = parsed.filePath
|
||||||
|
? relativePaths.filter((entry) => entry === path.posix.relative(parsed.basePath || ".", parsed.filePath!))
|
||||||
|
: relativePaths;
|
||||||
|
const skillPaths = filteredPaths.filter(
|
||||||
|
(entry) => path.posix.basename(entry).toLowerCase() === "skill.md",
|
||||||
|
);
|
||||||
|
if (skillPaths.length === 0) {
|
||||||
|
throw unprocessable(
|
||||||
|
"No SKILL.md files were found in the provided GitHub source.",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
const skills: ImportedSkill[] = [];
|
||||||
|
for (const relativeSkillPath of skillPaths) {
|
||||||
|
const repoSkillPath = basePrefix ? `${basePrefix}${relativeSkillPath}` : relativeSkillPath;
|
||||||
|
const markdown = await fetchText(resolveRawGitHubUrl(parsed.hostname, parsed.owner, parsed.repo, ref, repoSkillPath));
|
||||||
|
const parsedMarkdown = parseFrontmatterMarkdown(markdown);
|
||||||
|
const skillDir = path.posix.dirname(relativeSkillPath);
|
||||||
|
const slug = deriveImportedSkillSlug(parsedMarkdown.frontmatter, path.posix.basename(skillDir));
|
||||||
|
const skillKey = readCanonicalSkillKey(
|
||||||
|
parsedMarkdown.frontmatter,
|
||||||
|
isPlainRecord(parsedMarkdown.frontmatter.metadata) ? parsedMarkdown.frontmatter.metadata : null,
|
||||||
|
);
|
||||||
|
if (requestedSkillSlug && !matchesRequestedSkill(relativeSkillPath, requestedSkillSlug) && slug !== requestedSkillSlug) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const metadata = {
|
||||||
|
...(skillKey ? { skillKey } : {}),
|
||||||
|
sourceKind: "github",
|
||||||
|
...(parsed.hostname !== "github.com" ? { hostname: parsed.hostname } : {}),
|
||||||
|
owner: parsed.owner,
|
||||||
|
repo: parsed.repo,
|
||||||
|
ref,
|
||||||
|
trackingRef,
|
||||||
|
repoSkillDir: normalizeGitHubSkillDirectory(
|
||||||
|
basePrefix ? `${basePrefix}${skillDir}` : skillDir,
|
||||||
|
slug,
|
||||||
|
),
|
||||||
|
};
|
||||||
|
const inventory = filteredPaths
|
||||||
|
.filter((entry) => entry === relativeSkillPath || entry.startsWith(`${skillDir}/`))
|
||||||
|
.map((entry) => ({
|
||||||
|
path: entry === relativeSkillPath ? "SKILL.md" : entry.slice(skillDir.length + 1),
|
||||||
|
kind: classifyInventoryKind(entry === relativeSkillPath ? "SKILL.md" : entry.slice(skillDir.length + 1)),
|
||||||
|
}))
|
||||||
|
.sort((left, right) => left.path.localeCompare(right.path));
|
||||||
|
skills.push({
|
||||||
|
key: deriveCanonicalSkillKey(companyId, {
|
||||||
|
slug,
|
||||||
|
sourceType: "github",
|
||||||
|
sourceLocator: sourceUrl,
|
||||||
|
metadata,
|
||||||
|
}),
|
||||||
|
slug,
|
||||||
|
name: asString(parsedMarkdown.frontmatter.name) ?? slug,
|
||||||
|
description: asString(parsedMarkdown.frontmatter.description),
|
||||||
|
markdown,
|
||||||
|
sourceType: "github",
|
||||||
|
sourceLocator: sourceUrl,
|
||||||
|
sourceRef: ref,
|
||||||
|
trustLevel: deriveTrustLevel(inventory),
|
||||||
|
compatibility: "compatible",
|
||||||
|
fileInventory: inventory,
|
||||||
|
metadata,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
if (skills.length === 0) {
|
||||||
|
throw unprocessable(
|
||||||
|
requestedSkillSlug
|
||||||
|
? `Skill ${requestedSkillSlug} was not found in the provided GitHub source.`
|
||||||
|
: "No SKILL.md files were found in the provided GitHub source.",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
return { skills, warnings };
|
||||||
|
}
|
||||||
|
|
||||||
async function readGiteaUrlSkillImports(
|
async function readGiteaUrlSkillImports(
|
||||||
companyId: string,
|
companyId: string,
|
||||||
sourceUrl: string,
|
sourceUrl: string,
|
||||||
|
|||||||
@@ -2,6 +2,41 @@ import { unprocessable } from "../errors.js";
|
|||||||
|
|
||||||
const PROBE_CACHE_MAX_ENTRIES = 1024;
|
const PROBE_CACHE_MAX_ENTRIES = 1024;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reject hostnames that resolve to loopback, link-local, or RFC1918 ranges
|
||||||
|
* supplied as literal IPs. Hostnames that resolve via DNS to private IPs are
|
||||||
|
* not blocked here — this is a cheap surface-level guard against an operator
|
||||||
|
* pasting `http://192.168.1.10/...` into a skill-source field, not a full
|
||||||
|
* SSRF defence.
|
||||||
|
*/
|
||||||
|
export function isPrivateOrLoopbackHost(hostname: string): boolean {
|
||||||
|
const host = hostname.toLowerCase().trim();
|
||||||
|
if (host === "localhost" || host === "ip6-localhost" || host === "ip6-loopback") return true;
|
||||||
|
if (host.endsWith(".localhost")) return true;
|
||||||
|
if (host === "::1" || host === "[::1]") return true;
|
||||||
|
if (host.startsWith("fe80:") || host.startsWith("[fe80:")) return true;
|
||||||
|
if (host.startsWith("fc") || host.startsWith("fd") || host.startsWith("[fc") || host.startsWith("[fd")) return true;
|
||||||
|
const ipv4 = host.match(/^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$/);
|
||||||
|
if (ipv4) {
|
||||||
|
const [a, b] = [Number(ipv4[1]), Number(ipv4[2])];
|
||||||
|
if (a === 10) return true;
|
||||||
|
if (a === 127) return true;
|
||||||
|
if (a === 0) return true;
|
||||||
|
if (a === 169 && b === 254) return true;
|
||||||
|
if (a === 172 && b >= 16 && b <= 31) return true;
|
||||||
|
if (a === 192 && b === 168) return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function assertPublicHost(hostname: string): void {
|
||||||
|
if (isPrivateOrLoopbackHost(hostname)) {
|
||||||
|
throw unprocessable(
|
||||||
|
`Refusing to contact ${hostname}: private, loopback, and link-local hosts are not allowed as skill sources.`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Process-lifetime cache of Gitea/Forgejo probe results.
|
* Process-lifetime cache of Gitea/Forgejo probe results.
|
||||||
* Keyed by lowercased hostname. Positive and negative results are both cached
|
* Keyed by lowercased hostname. Positive and negative results are both cached
|
||||||
|
|||||||
@@ -1,18 +1,22 @@
|
|||||||
import path from "node:path";
|
import path from "node:path";
|
||||||
import { unprocessable } from "../errors.js";
|
import { unprocessable } from "../errors.js";
|
||||||
import {
|
import {
|
||||||
|
assertPublicHost,
|
||||||
giteaApiBase,
|
giteaApiBase,
|
||||||
giteaFetch,
|
giteaFetch,
|
||||||
getGiteaHostProbe,
|
getGiteaHostProbe,
|
||||||
giteaHostProbeCache,
|
giteaHostProbeCache,
|
||||||
|
isPrivateOrLoopbackHost,
|
||||||
resolveRawGiteaUrl,
|
resolveRawGiteaUrl,
|
||||||
resolveRawGiteaUrlLegacy,
|
resolveRawGiteaUrlLegacy,
|
||||||
setGiteaHostProbe,
|
setGiteaHostProbe,
|
||||||
} from "./gitea-fetch.js";
|
} from "./gitea-fetch.js";
|
||||||
|
|
||||||
export {
|
export {
|
||||||
|
assertPublicHost,
|
||||||
giteaApiBase,
|
giteaApiBase,
|
||||||
giteaHostProbeCache,
|
giteaHostProbeCache,
|
||||||
|
isPrivateOrLoopbackHost,
|
||||||
resolveRawGiteaUrl,
|
resolveRawGiteaUrl,
|
||||||
resolveRawGiteaUrlLegacy,
|
resolveRawGiteaUrlLegacy,
|
||||||
setGiteaHostProbe,
|
setGiteaHostProbe,
|
||||||
@@ -41,10 +45,6 @@ export type GiteaRepoResponse = {
|
|||||||
default_branch?: string;
|
default_branch?: string;
|
||||||
};
|
};
|
||||||
|
|
||||||
export type GiteaCommitResponse = {
|
|
||||||
sha?: string;
|
|
||||||
};
|
|
||||||
|
|
||||||
export type GiteaTreeEntry = {
|
export type GiteaTreeEntry = {
|
||||||
path?: string;
|
path?: string;
|
||||||
type?: string;
|
type?: string;
|
||||||
@@ -95,6 +95,9 @@ export function parseGiteaSourceUrl(rawUrl: string): GiteaSourceUrl {
|
|||||||
}
|
}
|
||||||
const owner = parts[0]!;
|
const owner = parts[0]!;
|
||||||
const repo = parts[1]!.replace(/\.git$/i, "");
|
const repo = parts[1]!.replace(/\.git$/i, "");
|
||||||
|
if (!owner || !repo) {
|
||||||
|
throw unprocessable("Invalid Gitea URL: owner and repo are required");
|
||||||
|
}
|
||||||
let ref = "main";
|
let ref = "main";
|
||||||
let basePath = "";
|
let basePath = "";
|
||||||
let filePath: string | null = null;
|
let filePath: string | null = null;
|
||||||
@@ -109,6 +112,7 @@ export function parseGiteaSourceUrl(rawUrl: string): GiteaSourceUrl {
|
|||||||
basePath = filePath ? path.posix.dirname(filePath) : "";
|
basePath = filePath ? path.posix.dirname(filePath) : "";
|
||||||
explicitRef = true;
|
explicitRef = true;
|
||||||
}
|
}
|
||||||
|
assertPublicHost(url.hostname);
|
||||||
return { hostname: url.hostname, owner, repo, ref, basePath, filePath, explicitRef };
|
return { hostname: url.hostname, owner, repo, ref, basePath, filePath, explicitRef };
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -116,12 +120,20 @@ export function parseGiteaSourceUrl(rawUrl: string): GiteaSourceUrl {
|
|||||||
* Probe a hostname to determine if it hosts a Gitea/Forgejo instance.
|
* Probe a hostname to determine if it hosts a Gitea/Forgejo instance.
|
||||||
* GETs `https://{host}/api/v1/version` with a short timeout. Cached for
|
* GETs `https://{host}/api/v1/version` with a short timeout. Cached for
|
||||||
* the process lifetime in giteaHostProbeCache.
|
* the process lifetime in giteaHostProbeCache.
|
||||||
|
*
|
||||||
|
* Returns false without contacting the host for loopback / link-local /
|
||||||
|
* RFC1918 literal-IP hosts, to avoid being used as an SSRF probe.
|
||||||
*/
|
*/
|
||||||
export async function probeGiteaHost(hostname: string): Promise<boolean> {
|
export async function probeGiteaHost(hostname: string): Promise<boolean> {
|
||||||
const key = hostname.toLowerCase();
|
const key = hostname.toLowerCase();
|
||||||
const cached = getGiteaHostProbe(key);
|
const cached = getGiteaHostProbe(key);
|
||||||
if (cached !== undefined) return cached;
|
if (cached !== undefined) return cached;
|
||||||
|
|
||||||
|
if (isPrivateOrLoopbackHost(key)) {
|
||||||
|
setGiteaHostProbe(key, false);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
const controller = new AbortController();
|
const controller = new AbortController();
|
||||||
const timer = setTimeout(() => controller.abort(), PROBE_TIMEOUT_MS);
|
const timer = setTimeout(() => controller.abort(), PROBE_TIMEOUT_MS);
|
||||||
let result = false;
|
let result = false;
|
||||||
@@ -156,27 +168,6 @@ export async function resolveGiteaDefaultBranch(
|
|||||||
return asString(response.default_branch) ?? "main";
|
return asString(response.default_branch) ?? "main";
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function resolveGiteaCommitSha(
|
|
||||||
owner: string,
|
|
||||||
repo: string,
|
|
||||||
ref: string,
|
|
||||||
apiBase: string,
|
|
||||||
): Promise<string> {
|
|
||||||
if (!/^[0-9a-f]{40}$/i.test(ref.trim())) {
|
|
||||||
throw unprocessable(
|
|
||||||
`Gitea /commits endpoint only resolves SHAs; got "${ref}". Use fetchGiteaBranch for branch names.`,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
const response = await fetchGiteaJson<GiteaCommitResponse>(
|
|
||||||
`${apiBase}/repos/${owner}/${repo}/commits/${encodeURIComponent(ref)}`,
|
|
||||||
);
|
|
||||||
const sha = asString(response.sha);
|
|
||||||
if (!sha) {
|
|
||||||
throw unprocessable(`Failed to resolve Gitea ref ${ref}`);
|
|
||||||
}
|
|
||||||
return sha;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Resolve a parsed Gitea URL into a pinned commit SHA and a tracking ref.
|
* Resolve a parsed Gitea URL into a pinned commit SHA and a tracking ref.
|
||||||
* Mirrors resolveGitHubPinnedRef (server/src/services/company-skills.ts:662-676).
|
* Mirrors resolveGitHubPinnedRef (server/src/services/company-skills.ts:662-676).
|
||||||
@@ -220,6 +211,7 @@ export async function fetchGiteaTreeBlobPaths(
|
|||||||
let page = 1;
|
let page = 1;
|
||||||
// hard cap so a misconfigured host can't make us loop forever
|
// hard cap so a misconfigured host can't make us loop forever
|
||||||
const MAX_PAGES = 50;
|
const MAX_PAGES = 50;
|
||||||
|
let stillTruncated = false;
|
||||||
for (let i = 0; i < MAX_PAGES; i += 1) {
|
for (let i = 0; i < MAX_PAGES; i += 1) {
|
||||||
const url =
|
const url =
|
||||||
page === 1
|
page === 1
|
||||||
@@ -232,9 +224,17 @@ export async function fetchGiteaTreeBlobPaths(
|
|||||||
all.push(entry.path);
|
all.push(entry.path);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!data.truncated) break;
|
stillTruncated = Boolean(data.truncated);
|
||||||
|
if (!stillTruncated) break;
|
||||||
page += 1;
|
page += 1;
|
||||||
}
|
}
|
||||||
|
if (stillTruncated) {
|
||||||
|
// Tree still truncated at the page cap — refuse rather than silently
|
||||||
|
// import a partial skill listing, which would hide SKILL.md files.
|
||||||
|
throw unprocessable(
|
||||||
|
`Gitea repo tree for ${owner}/${repo}@${ref} exceeds ${MAX_PAGES * GITEA_TREE_PAGE_LIMIT} entries; refusing to import a partial listing.`,
|
||||||
|
);
|
||||||
|
}
|
||||||
return all;
|
return all;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user