forked from farhoodlabs/paperclip
33ab4f8cdd
- Fix GitHub Enterprise regression: dispatcher now probes for Gitea only on non-github.com hosts and falls back to the GitHub path for unknown hosts, preserving GHE support that the earlier strict github.com match broke. - Refactor readUrlSkillImports into a flat dispatcher with a sibling readGitHubUrlSkillImports helper, mirroring readGiteaUrlSkillImports. - Add SSRF guard (isPrivateOrLoopbackHost + assertPublicHost) in gitea-fetch; short-circuit probeGiteaHost and reject parseGiteaSourceUrl for loopback / RFC1918 / link-local literal IPs. - Throw on fetchGiteaTreeBlobPaths cap-hit instead of silently returning a partial blob listing (would hide SKILL.md files). - Validate non-empty repo in parseGiteaSourceUrl after .git strip. - Remove dead resolveGiteaCommitSha + GiteaCommitResponse (unused since the branches-endpoint follow-up). - Tests updated and extended. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
301 lines
8.7 KiB
TypeScript
301 lines
8.7 KiB
TypeScript
import path from "node:path";
|
|
import { unprocessable } from "../errors.js";
|
|
import {
|
|
assertPublicHost,
|
|
giteaApiBase,
|
|
giteaFetch,
|
|
getGiteaHostProbe,
|
|
giteaHostProbeCache,
|
|
isPrivateOrLoopbackHost,
|
|
resolveRawGiteaUrl,
|
|
resolveRawGiteaUrlLegacy,
|
|
setGiteaHostProbe,
|
|
} from "./gitea-fetch.js";
|
|
|
|
export {
|
|
assertPublicHost,
|
|
giteaApiBase,
|
|
giteaHostProbeCache,
|
|
isPrivateOrLoopbackHost,
|
|
resolveRawGiteaUrl,
|
|
resolveRawGiteaUrlLegacy,
|
|
setGiteaHostProbe,
|
|
getGiteaHostProbe,
|
|
};
|
|
|
|
const PROBE_TIMEOUT_MS = 3000;
|
|
const GITEA_TREE_PAGE_LIMIT = 1000;
|
|
|
|
export type GiteaSourceUrl = {
|
|
hostname: string;
|
|
owner: string;
|
|
repo: string;
|
|
ref: string;
|
|
basePath: string;
|
|
filePath: string | null;
|
|
explicitRef: boolean;
|
|
};
|
|
|
|
export type GiteaBranchResponse = {
|
|
name?: string;
|
|
commit?: { id?: string; url?: string };
|
|
};
|
|
|
|
export type GiteaRepoResponse = {
|
|
default_branch?: string;
|
|
};
|
|
|
|
export type GiteaTreeEntry = {
|
|
path?: string;
|
|
type?: string;
|
|
mode?: string;
|
|
sha?: string;
|
|
size?: number;
|
|
url?: string;
|
|
};
|
|
|
|
export type GiteaTreeResponse = {
|
|
sha?: string;
|
|
tree?: GiteaTreeEntry[];
|
|
truncated?: boolean;
|
|
};
|
|
|
|
function asString(value: unknown): string | null {
|
|
if (typeof value !== "string") return null;
|
|
const trimmed = value.trim();
|
|
return trimmed.length > 0 ? trimmed : null;
|
|
}
|
|
|
|
function isPlainRecord(value: unknown): value is Record<string, unknown> {
|
|
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
}
|
|
|
|
/**
|
|
* Parse a Gitea/Forgejo HTTPS repo URL into its components.
|
|
* Mirrors parseGitHubSourceUrl (server/src/services/company-skills.ts:634-660).
|
|
* Accepts:
|
|
* https://{host}/{owner}/{repo}
|
|
* https://{host}/{owner}/{repo}.git
|
|
* https://{host}/{owner}/{repo}/tree/{ref}/{basePath...}
|
|
* https://{host}/{owner}/{repo}/blob/{ref}/{filePath}
|
|
*/
|
|
export function parseGiteaSourceUrl(rawUrl: string): GiteaSourceUrl {
|
|
let url: URL;
|
|
try {
|
|
url = new URL(rawUrl);
|
|
} catch {
|
|
throw unprocessable("Invalid Gitea URL");
|
|
}
|
|
if (url.protocol !== "https:") {
|
|
throw unprocessable("Gitea source URL must use HTTPS");
|
|
}
|
|
const parts = url.pathname.split("/").filter(Boolean);
|
|
if (parts.length < 2) {
|
|
throw unprocessable("Invalid Gitea URL");
|
|
}
|
|
const owner = parts[0]!;
|
|
const repo = parts[1]!.replace(/\.git$/i, "");
|
|
if (!owner || !repo) {
|
|
throw unprocessable("Invalid Gitea URL: owner and repo are required");
|
|
}
|
|
let ref = "main";
|
|
let basePath = "";
|
|
let filePath: string | null = null;
|
|
let explicitRef = false;
|
|
if (parts[2] === "tree") {
|
|
ref = parts[3] ?? "main";
|
|
basePath = parts.slice(4).join("/");
|
|
explicitRef = true;
|
|
} else if (parts[2] === "blob") {
|
|
ref = parts[3] ?? "main";
|
|
filePath = parts.slice(4).join("/");
|
|
basePath = filePath ? path.posix.dirname(filePath) : "";
|
|
explicitRef = true;
|
|
}
|
|
assertPublicHost(url.hostname);
|
|
return { hostname: url.hostname, owner, repo, ref, basePath, filePath, explicitRef };
|
|
}
|
|
|
|
/**
|
|
* Probe a hostname to determine if it hosts a Gitea/Forgejo instance.
|
|
* GETs `https://{host}/api/v1/version` with a short timeout. Cached for
|
|
* the process lifetime in giteaHostProbeCache.
|
|
*
|
|
* Returns false without contacting the host for loopback / link-local /
|
|
* RFC1918 literal-IP hosts, to avoid being used as an SSRF probe.
|
|
*/
|
|
export async function probeGiteaHost(hostname: string): Promise<boolean> {
|
|
const key = hostname.toLowerCase();
|
|
const cached = getGiteaHostProbe(key);
|
|
if (cached !== undefined) return cached;
|
|
|
|
if (isPrivateOrLoopbackHost(key)) {
|
|
setGiteaHostProbe(key, false);
|
|
return false;
|
|
}
|
|
|
|
const controller = new AbortController();
|
|
const timer = setTimeout(() => controller.abort(), PROBE_TIMEOUT_MS);
|
|
let result = false;
|
|
try {
|
|
const response = await fetch(`https://${key}/api/v1/version`, {
|
|
method: "GET",
|
|
signal: controller.signal,
|
|
headers: { accept: "application/json" },
|
|
});
|
|
if (response.ok) {
|
|
const data = (await response.json().catch(() => null)) as unknown;
|
|
if (isPlainRecord(data) && typeof data.version === "string") {
|
|
result = true;
|
|
}
|
|
}
|
|
} catch {
|
|
// network error, abort, parse error — all treated as "not gitea"
|
|
} finally {
|
|
clearTimeout(timer);
|
|
}
|
|
|
|
setGiteaHostProbe(key, result);
|
|
return result;
|
|
}
|
|
|
|
export async function resolveGiteaDefaultBranch(
|
|
owner: string,
|
|
repo: string,
|
|
apiBase: string,
|
|
): Promise<string> {
|
|
const response = await fetchGiteaJson<GiteaRepoResponse>(`${apiBase}/repos/${owner}/${repo}`);
|
|
return asString(response.default_branch) ?? "main";
|
|
}
|
|
|
|
/**
|
|
* Resolve a parsed Gitea URL into a pinned commit SHA and a tracking ref.
|
|
* Mirrors resolveGitHubPinnedRef (server/src/services/company-skills.ts:662-676).
|
|
*/
|
|
export async function resolveGiteaPinnedRef(parsed: GiteaSourceUrl): Promise<{
|
|
pinnedRef: string;
|
|
trackingRef: string | null;
|
|
}> {
|
|
if (/^[0-9a-f]{40}$/i.test(parsed.ref.trim())) {
|
|
return {
|
|
pinnedRef: parsed.ref,
|
|
trackingRef: parsed.explicitRef ? parsed.ref : null,
|
|
};
|
|
}
|
|
|
|
const apiBase = giteaApiBase(parsed.hostname);
|
|
const trackingRef = parsed.explicitRef
|
|
? parsed.ref
|
|
: await resolveGiteaDefaultBranch(parsed.owner, parsed.repo, apiBase);
|
|
// Gitea's /repos/{o}/{r}/commits/{ref} endpoint only resolves SHAs — a branch
|
|
// name returns 404. The branches endpoint accepts both branch names and tags.
|
|
const branch = await fetchGiteaBranch(apiBase, parsed.owner, parsed.repo, trackingRef);
|
|
const pinnedRef = asString(branch.commit?.id);
|
|
if (!pinnedRef) {
|
|
throw unprocessable(`Failed to resolve Gitea ref ${trackingRef}`);
|
|
}
|
|
return { pinnedRef, trackingRef };
|
|
}
|
|
|
|
/**
|
|
* Fetch the full list of blob paths in a repo tree at a given ref.
|
|
* Paginates with `?page=N&limit=1000` when the response is truncated.
|
|
*/
|
|
export async function fetchGiteaTreeBlobPaths(
|
|
apiBase: string,
|
|
owner: string,
|
|
repo: string,
|
|
ref: string,
|
|
): Promise<string[]> {
|
|
const all: string[] = [];
|
|
let page = 1;
|
|
// hard cap so a misconfigured host can't make us loop forever
|
|
const MAX_PAGES = 50;
|
|
let stillTruncated = false;
|
|
for (let i = 0; i < MAX_PAGES; i += 1) {
|
|
const url =
|
|
page === 1
|
|
? `${apiBase}/repos/${owner}/${repo}/git/trees/${ref}?recursive=true&limit=${GITEA_TREE_PAGE_LIMIT}`
|
|
: `${apiBase}/repos/${owner}/${repo}/git/trees/${ref}?recursive=true&limit=${GITEA_TREE_PAGE_LIMIT}&page=${page}`;
|
|
const data = await fetchGiteaJson<GiteaTreeResponse>(url);
|
|
const entries = Array.isArray(data.tree) ? data.tree : [];
|
|
for (const entry of entries) {
|
|
if (entry.type === "blob" && typeof entry.path === "string") {
|
|
all.push(entry.path);
|
|
}
|
|
}
|
|
stillTruncated = Boolean(data.truncated);
|
|
if (!stillTruncated) break;
|
|
page += 1;
|
|
}
|
|
if (stillTruncated) {
|
|
// Tree still truncated at the page cap — refuse rather than silently
|
|
// import a partial skill listing, which would hide SKILL.md files.
|
|
throw unprocessable(
|
|
`Gitea repo tree for ${owner}/${repo}@${ref} exceeds ${MAX_PAGES * GITEA_TREE_PAGE_LIMIT} entries; refusing to import a partial listing.`,
|
|
);
|
|
}
|
|
return all;
|
|
}
|
|
|
|
/**
|
|
* Fetch a raw file from a Gitea/Forgejo repo. Tries the modern
|
|
* /raw/branch/{ref}/{path} URL first, falling back to legacy
|
|
* /raw/{ref}/{path} on 404.
|
|
*/
|
|
export async function fetchGiteaText(
|
|
hostname: string,
|
|
owner: string,
|
|
repo: string,
|
|
ref: string,
|
|
filePath: string,
|
|
): Promise<string> {
|
|
const canonical = resolveRawGiteaUrl(hostname, owner, repo, ref, filePath);
|
|
const canonicalResponse = await giteaFetch(canonical, {
|
|
headers: { accept: "text/plain" },
|
|
});
|
|
if (canonicalResponse.ok) {
|
|
return canonicalResponse.text();
|
|
}
|
|
if (canonicalResponse.status !== 404) {
|
|
throw unprocessable(
|
|
`Failed to fetch ${canonical}: ${canonicalResponse.status}`,
|
|
);
|
|
}
|
|
const legacy = resolveRawGiteaUrlLegacy(hostname, owner, repo, ref, filePath);
|
|
const legacyResponse = await giteaFetch(legacy, {
|
|
headers: { accept: "text/plain" },
|
|
});
|
|
if (!legacyResponse.ok) {
|
|
throw unprocessable(
|
|
`Failed to fetch ${legacy}: ${legacyResponse.status}`,
|
|
);
|
|
}
|
|
return legacyResponse.text();
|
|
}
|
|
|
|
/**
|
|
* Fetch a branch record by name. Used for update checks to resolve
|
|
* the latest commit SHA on the tracking branch.
|
|
*/
|
|
export async function fetchGiteaBranch(
|
|
apiBase: string,
|
|
owner: string,
|
|
repo: string,
|
|
branch: string,
|
|
): Promise<GiteaBranchResponse> {
|
|
return fetchGiteaJson<GiteaBranchResponse>(
|
|
`${apiBase}/repos/${owner}/${repo}/branches/${encodeURIComponent(branch)}`,
|
|
);
|
|
}
|
|
|
|
export async function fetchGiteaJson<T>(url: string): Promise<T> {
|
|
const response = await giteaFetch(url, {
|
|
headers: { accept: "application/json" },
|
|
});
|
|
if (!response.ok) {
|
|
throw unprocessable(`Failed to fetch ${url}: ${response.status}`);
|
|
}
|
|
return (await response.json()) as T;
|
|
}
|