forked from farhoodlabs/paperclip
fork: address PR #19 review findings for Gitea skill support
- Fix GitHub Enterprise regression: dispatcher now probes for Gitea only on non-github.com hosts and falls back to the GitHub path for unknown hosts, preserving GHE support that the earlier strict github.com match broke. - Refactor readUrlSkillImports into a flat dispatcher with a sibling readGitHubUrlSkillImports helper, mirroring readGiteaUrlSkillImports. - Add SSRF guard (isPrivateOrLoopbackHost + assertPublicHost) in gitea-fetch; short-circuit probeGiteaHost and reject parseGiteaSourceUrl for loopback / RFC1918 / link-local literal IPs. - Throw on fetchGiteaTreeBlobPaths cap-hit instead of silently returning a partial blob listing (would hide SKILL.md files). - Validate non-empty repo in parseGiteaSourceUrl after .git strip. - Remove dead resolveGiteaCommitSha + GiteaCommitResponse (unused since the branches-endpoint follow-up). - Tests updated and extended. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,18 +1,22 @@
|
||||
import path from "node:path";
|
||||
import { unprocessable } from "../errors.js";
|
||||
import {
|
||||
assertPublicHost,
|
||||
giteaApiBase,
|
||||
giteaFetch,
|
||||
getGiteaHostProbe,
|
||||
giteaHostProbeCache,
|
||||
isPrivateOrLoopbackHost,
|
||||
resolveRawGiteaUrl,
|
||||
resolveRawGiteaUrlLegacy,
|
||||
setGiteaHostProbe,
|
||||
} from "./gitea-fetch.js";
|
||||
|
||||
export {
|
||||
assertPublicHost,
|
||||
giteaApiBase,
|
||||
giteaHostProbeCache,
|
||||
isPrivateOrLoopbackHost,
|
||||
resolveRawGiteaUrl,
|
||||
resolveRawGiteaUrlLegacy,
|
||||
setGiteaHostProbe,
|
||||
@@ -41,10 +45,6 @@ export type GiteaRepoResponse = {
|
||||
default_branch?: string;
|
||||
};
|
||||
|
||||
export type GiteaCommitResponse = {
|
||||
sha?: string;
|
||||
};
|
||||
|
||||
export type GiteaTreeEntry = {
|
||||
path?: string;
|
||||
type?: string;
|
||||
@@ -95,6 +95,9 @@ export function parseGiteaSourceUrl(rawUrl: string): GiteaSourceUrl {
|
||||
}
|
||||
const owner = parts[0]!;
|
||||
const repo = parts[1]!.replace(/\.git$/i, "");
|
||||
if (!owner || !repo) {
|
||||
throw unprocessable("Invalid Gitea URL: owner and repo are required");
|
||||
}
|
||||
let ref = "main";
|
||||
let basePath = "";
|
||||
let filePath: string | null = null;
|
||||
@@ -109,6 +112,7 @@ export function parseGiteaSourceUrl(rawUrl: string): GiteaSourceUrl {
|
||||
basePath = filePath ? path.posix.dirname(filePath) : "";
|
||||
explicitRef = true;
|
||||
}
|
||||
assertPublicHost(url.hostname);
|
||||
return { hostname: url.hostname, owner, repo, ref, basePath, filePath, explicitRef };
|
||||
}
|
||||
|
||||
@@ -116,12 +120,20 @@ export function parseGiteaSourceUrl(rawUrl: string): GiteaSourceUrl {
|
||||
* Probe a hostname to determine if it hosts a Gitea/Forgejo instance.
|
||||
* GETs `https://{host}/api/v1/version` with a short timeout. Cached for
|
||||
* the process lifetime in giteaHostProbeCache.
|
||||
*
|
||||
* Returns false without contacting the host for loopback / link-local /
|
||||
* RFC1918 literal-IP hosts, to avoid being used as an SSRF probe.
|
||||
*/
|
||||
export async function probeGiteaHost(hostname: string): Promise<boolean> {
|
||||
const key = hostname.toLowerCase();
|
||||
const cached = getGiteaHostProbe(key);
|
||||
if (cached !== undefined) return cached;
|
||||
|
||||
if (isPrivateOrLoopbackHost(key)) {
|
||||
setGiteaHostProbe(key, false);
|
||||
return false;
|
||||
}
|
||||
|
||||
const controller = new AbortController();
|
||||
const timer = setTimeout(() => controller.abort(), PROBE_TIMEOUT_MS);
|
||||
let result = false;
|
||||
@@ -156,27 +168,6 @@ export async function resolveGiteaDefaultBranch(
|
||||
return asString(response.default_branch) ?? "main";
|
||||
}
|
||||
|
||||
export async function resolveGiteaCommitSha(
|
||||
owner: string,
|
||||
repo: string,
|
||||
ref: string,
|
||||
apiBase: string,
|
||||
): Promise<string> {
|
||||
if (!/^[0-9a-f]{40}$/i.test(ref.trim())) {
|
||||
throw unprocessable(
|
||||
`Gitea /commits endpoint only resolves SHAs; got "${ref}". Use fetchGiteaBranch for branch names.`,
|
||||
);
|
||||
}
|
||||
const response = await fetchGiteaJson<GiteaCommitResponse>(
|
||||
`${apiBase}/repos/${owner}/${repo}/commits/${encodeURIComponent(ref)}`,
|
||||
);
|
||||
const sha = asString(response.sha);
|
||||
if (!sha) {
|
||||
throw unprocessable(`Failed to resolve Gitea ref ${ref}`);
|
||||
}
|
||||
return sha;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve a parsed Gitea URL into a pinned commit SHA and a tracking ref.
|
||||
* Mirrors resolveGitHubPinnedRef (server/src/services/company-skills.ts:662-676).
|
||||
@@ -220,6 +211,7 @@ export async function fetchGiteaTreeBlobPaths(
|
||||
let page = 1;
|
||||
// hard cap so a misconfigured host can't make us loop forever
|
||||
const MAX_PAGES = 50;
|
||||
let stillTruncated = false;
|
||||
for (let i = 0; i < MAX_PAGES; i += 1) {
|
||||
const url =
|
||||
page === 1
|
||||
@@ -232,9 +224,17 @@ export async function fetchGiteaTreeBlobPaths(
|
||||
all.push(entry.path);
|
||||
}
|
||||
}
|
||||
if (!data.truncated) break;
|
||||
stillTruncated = Boolean(data.truncated);
|
||||
if (!stillTruncated) break;
|
||||
page += 1;
|
||||
}
|
||||
if (stillTruncated) {
|
||||
// Tree still truncated at the page cap — refuse rather than silently
|
||||
// import a partial skill listing, which would hide SKILL.md files.
|
||||
throw unprocessable(
|
||||
`Gitea repo tree for ${owner}/${repo}@${ref} exceeds ${MAX_PAGES * GITEA_TREE_PAGE_LIMIT} entries; refusing to import a partial listing.`,
|
||||
);
|
||||
}
|
||||
return all;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user