Compare commits

..

10 Commits

Author SHA1 Message Date
privilegedescalation-engineer 17a9aa165a fix test: properly mock pod selector calls to resolve immediately
The withTimeout test was failing because:
1. The mock made ALL ApiProxy.request calls hang, but the implementation
   has 4 sequential requests (1 CRD + 3 pod selectors) each wrapped in
   their own withTimeout
2. Using advanceTimersByTimeAsync with hanging promises causes act() to
   hang because flushPromises() waits for pending promises

Fix:
- Use mockReturnValueOnce for the CRD call (hanging) and
  mockResolvedValueOnce for each pod selector call (resolves immediately)
- Use synchronous advanceTimersByTime() instead of async version
- Simplified test flow: check loading=true initially, advance timers,
  then verify crdAvailable=false and loading=false

Fixes PRI-1040
2026-03-25 09:03:03 +00:00
privilegedescalation-engineer 3e306b70f8 Merge remote changes and resolve conflict - keep QA-requested fix with never-resolving promise 2026-03-25 07:42:29 +00:00
privilegedescalation-engineer 3aa9c15e80 fix test: use never-resolving promise and fake timers for withTimeout
The previous mock used mockRejectedValue which immediately rejects,
so Promise.race resolved before withTimeout's setTimeout fired.
Now we use new Promise(() => {}) to simulate a hanging request
and advance timers to properly exercise the 2s timeout logic.

Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-03-25 07:41:47 +00:00
privilegedescalation-engineer 957cf144a7 fix: reapply formatting after rebase
Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-03-25 07:21:22 +00:00
privilegedescalation-engineer 52b1429ba0 fix: reformat withTimeout call and add unit test for timeout behavior
- Reformat withTimeout call to single line (prettier)
- Add unit test for CRD timeout behavior (crdAvailable=false when API fails)

Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-03-25 07:20:31 +00:00
Gandalf the Greybeard 66575982af fix: add request timeout wrapper to prevent E2E test hang
Add withTimeout() helper that wraps ApiProxy.request calls with a 2s timeout.
This prevents the plugin from hanging indefinitely when CRD requests fail
or network issues occur in the E2E environment.

Root cause: ApiProxy.request to non-existent CRDs would hang forever,
causing the Loading Intel GPU data... progressbar to never resolve.

Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-03-25 07:20:19 +00:00
privilegedescalation-engineer 66932958b1 fix: reformat withTimeout call and add unit test for timeout behavior
- Reformat withTimeout call to single line (prettier)
- Add unit test for CRD timeout behavior (crdAvailable=false when API fails)

Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-03-25 07:18:19 +00:00
privilegedescalation-ceo[bot] 0d5f65176b ci: re-trigger workflows after Actions approval setting change 2026-03-25 07:06:07 +00:00
Gandalf the Greybeard 5670c008e1 fix: add request timeout wrapper to prevent E2E test hang
Add withTimeout() helper that wraps ApiProxy.request calls with a 2s timeout.
This prevents the plugin from hanging indefinitely when CRD requests fail
or network issues occur in the E2E environment.

Root cause: ApiProxy.request to non-existent CRDs would hang forever,
causing the Loading Intel GPU data... progressbar to never resolve.

Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-03-25 05:57:15 +00:00
privilegedescalation-engineer f9325772bd fix(e2e): use specific regex for nodes page heading
The /node/i regex was too broad and matched both the page heading
'Intel GPU — Nodes' and the empty state 'No GPU Nodes Found',
causing a strict mode violation in Playwright.

Use /intel gpu.*nodes/i to match only the actual page heading,
which contains 'Intel GPU' before 'Nodes'.
2026-03-25 01:55:02 +00:00
4 changed files with 42 additions and 68 deletions
-64
View File
@@ -1,64 +0,0 @@
name: Workflow Recovery
on:
schedule:
- cron: '*/5 * * * *'
workflow_dispatch:
jobs:
recover-stuck-runs:
runs-on: ubuntu-latest
timeout-minutes: 10
steps:
- name: Generate GitHub App token
id: app-token
if: vars.RELEASE_APP_ID != ''
uses: actions/create-github-app-token@v3
with:
app-id: ${{ vars.RELEASE_APP_ID }}
private-key: ${{ secrets.RELEASE_APP_PRIVATE_KEY }}
owner: privilegedescalation
- name: Detect and re-run stuck action_required runs
env:
GH_TOKEN: ${{ steps.app-token.outputs.token || github.token }}
run: |
echo "Checking for action_required runs in privilegedescalation org..."
RUNS=$(curl -sf -H "Authorization: Bearer $GH_TOKEN" \
-H "Accept: application/vnd.github+json" \
"https://api.github.com/orgs/privilegedescalation/actions/runs?status=action_required&per_page=50" \
|| echo '{"workflow_runs": []}')
COUNT=$(echo "$RUNS" | jq '.workflow_runs | length')
echo "Found $COUNT action_required runs"
if [ "$COUNT" = "0" ] || [ "$COUNT" = "null" ]; then
echo "No stuck runs found. Exiting."
exit 0
fi
echo "$RUNS" | jq -r '.workflow_runs[] | @json' | while read -r run; do
RUN_ID=$(echo "$run" | jq -r '.id')
WORKFLOW_NAME=$(echo "$run" | jq -r '.name')
REPO=$(echo "$run" | jq -r '.repository.full_name')
BRANCH=$(echo "$run" | jq -r '.head_branch')
CREATED_AT=$(echo "$run" | jq -r '.created_at')
echo "Found stuck run: $WORKFLOW_NAME (#$RUN_ID) on $REPO branch $BRANCH"
echo "Created at: $CREATED_AT"
echo "Re-running..."
RESP=$(curl -sf -X POST \
-H "Authorization: Bearer $GH_TOKEN" \
-H "Accept: application/vnd.github+json" \
"https://api.github.com/repos/$REPO/actions/runs/$RUN_ID/rerun" \
-w "\n%{http_code}")
HTTP_CODE=$(echo "$RESP" | tail -1)
if [ "$HTTP_CODE" = "201" ] || [ "$HTTP_CODE" = "204" ]; then
echo "Successfully re-ran $WORKFLOW_NAME (#$RUN_ID)"
else
echo "Failed to re-run $WORKFLOW_NAME (#$RUN_ID): $HTTP_CODE"
fi
done
+1 -1
View File
@@ -66,7 +66,7 @@ test.describe('Intel GPU plugin smoke tests', () => {
});
await page.goto('/c/main/intel-gpu/nodes');
await expect(page.getByRole('heading', { name: /node/i })).toBeVisible({ timeout: 15_000 });
await expect(page.getByRole('heading', { name: /intel gpu.*nodes/i })).toBeVisible({ timeout: 15_000 });
await page.goto('/c/main/intel-gpu/pods');
await expect(page.getByRole('heading', { name: /pod/i })).toBeVisible({ timeout: 15_000 });
+23
View File
@@ -151,4 +151,27 @@ describe('IntelGpuDataProvider', () => {
expect(callCountAfter).toBeGreaterThan(callCountBefore);
});
});
it('treats a hanging CRD request as unavailable after 2s timeout', async () => {
vi.useFakeTimers();
const nodeWrapper = { jsonData: {} };
vi.mocked(K8s.ResourceClasses.Node.useList).mockReturnValue([[nodeWrapper], null] as any);
vi.mocked(K8s.ResourceClasses.Pod.useList).mockReturnValue([[nodeWrapper], null] as any);
vi.mocked(ApiProxy.request)
.mockReturnValueOnce(new Promise(() => {}))
.mockResolvedValueOnce({ items: [] })
.mockResolvedValueOnce({ items: [] })
.mockResolvedValueOnce({ items: [] });
const { result } = renderHook(() => useIntelGpuContext(), { wrapper: Wrapper });
expect(result.current.loading).toBe(true);
vi.advanceTimersByTime(2000);
await act(async () => {});
expect(result.current.crdAvailable).toBe(false);
expect(result.current.loading).toBe(false);
vi.useRealTimers();
});
});
+18 -3
View File
@@ -69,6 +69,18 @@ export function useIntelGpuContext(): IntelGpuContextValue {
// Helpers
// ---------------------------------------------------------------------------
const DEFAULT_REQUEST_TIMEOUT_MS = 2_000;
/** Wraps a promise with a timeout, rejecting if it doesn't settle within ms. */
function withTimeout<T>(promise: Promise<T>, ms: number): Promise<T> {
return Promise.race([
promise,
new Promise<T>((_, reject) =>
setTimeout(() => reject(new Error(`Request timed out after ${ms}ms`)), ms)
),
]);
}
/** Extract raw Kubernetes JSON from Headlamp KubeObject wrappers. */
const extractJsonData = (items: unknown[]): unknown[] =>
items.map(item =>
@@ -108,8 +120,11 @@ export function IntelGpuDataProvider({ children }: { children: React.ReactNode }
try {
// GpuDevicePlugin CRDs — graceful degradation if CRD not installed
try {
const pluginList = await ApiProxy.request(
`/apis/${INTEL_DEVICE_PLUGIN_API_GROUP}/${INTEL_DEVICE_PLUGIN_API_VERSION}/gpudeviceplugins`
const pluginList = await withTimeout(
ApiProxy.request(
`/apis/${INTEL_DEVICE_PLUGIN_API_GROUP}/${INTEL_DEVICE_PLUGIN_API_VERSION}/gpudeviceplugins`
),
DEFAULT_REQUEST_TIMEOUT_MS
);
if (!cancelled && isKubeList(pluginList)) {
setCrdAvailable(true);
@@ -139,7 +154,7 @@ export function IntelGpuDataProvider({ children }: { children: React.ReactNode }
for (const url of pluginPodSelectors) {
try {
const list = await ApiProxy.request(url);
const list = await withTimeout(ApiProxy.request(url), DEFAULT_REQUEST_TIMEOUT_MS);
if (!cancelled && isKubeList(list)) {
const gpuPluginPods = filterIntelGpuPluginPods(list.items);
foundPluginPods.push(...gpuPluginPods);