Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| a3629127b4 |
@@ -16,5 +16,3 @@ jobs:
|
||||
dual-approval:
|
||||
uses: privilegedescalation/.github/.github/workflows/dual-approval-check.yaml@main
|
||||
secrets: inherit
|
||||
with:
|
||||
pr_number: ${{ github.event.pull_request.number }}
|
||||
|
||||
@@ -0,0 +1,64 @@
|
||||
name: Workflow Recovery
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: '*/5 * * * *'
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
recover-stuck-runs:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
steps:
|
||||
- name: Generate GitHub App token
|
||||
id: app-token
|
||||
if: vars.RELEASE_APP_ID != ''
|
||||
uses: actions/create-github-app-token@v3
|
||||
with:
|
||||
app-id: ${{ vars.RELEASE_APP_ID }}
|
||||
private-key: ${{ secrets.RELEASE_APP_PRIVATE_KEY }}
|
||||
owner: privilegedescalation
|
||||
|
||||
- name: Detect and re-run stuck action_required runs
|
||||
env:
|
||||
GH_TOKEN: ${{ steps.app-token.outputs.token || github.token }}
|
||||
run: |
|
||||
echo "Checking for action_required runs in privilegedescalation org..."
|
||||
|
||||
RUNS=$(curl -sf -H "Authorization: Bearer $GH_TOKEN" \
|
||||
-H "Accept: application/vnd.github+json" \
|
||||
"https://api.github.com/orgs/privilegedescalation/actions/runs?status=action_required&per_page=50" \
|
||||
|| echo '{"workflow_runs": []}')
|
||||
|
||||
COUNT=$(echo "$RUNS" | jq '.workflow_runs | length')
|
||||
echo "Found $COUNT action_required runs"
|
||||
|
||||
if [ "$COUNT" = "0" ] || [ "$COUNT" = "null" ]; then
|
||||
echo "No stuck runs found. Exiting."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "$RUNS" | jq -r '.workflow_runs[] | @json' | while read -r run; do
|
||||
RUN_ID=$(echo "$run" | jq -r '.id')
|
||||
WORKFLOW_NAME=$(echo "$run" | jq -r '.name')
|
||||
REPO=$(echo "$run" | jq -r '.repository.full_name')
|
||||
BRANCH=$(echo "$run" | jq -r '.head_branch')
|
||||
CREATED_AT=$(echo "$run" | jq -r '.created_at')
|
||||
|
||||
echo "Found stuck run: $WORKFLOW_NAME (#$RUN_ID) on $REPO branch $BRANCH"
|
||||
echo "Created at: $CREATED_AT"
|
||||
echo "Re-running..."
|
||||
|
||||
RESP=$(curl -sf -X POST \
|
||||
-H "Authorization: Bearer $GH_TOKEN" \
|
||||
-H "Accept: application/vnd.github+json" \
|
||||
"https://api.github.com/repos/$REPO/actions/runs/$RUN_ID/rerun" \
|
||||
-w "\n%{http_code}")
|
||||
|
||||
HTTP_CODE=$(echo "$RESP" | tail -1)
|
||||
if [ "$HTTP_CODE" = "201" ] || [ "$HTTP_CODE" = "204" ]; then
|
||||
echo "Successfully re-ran $WORKFLOW_NAME (#$RUN_ID)"
|
||||
else
|
||||
echo "Failed to re-run $WORKFLOW_NAME (#$RUN_ID): $HTTP_CODE"
|
||||
fi
|
||||
done
|
||||
+3
-3
@@ -1,4 +1,4 @@
|
||||
version: "1.1.0"
|
||||
version: "1.0.0"
|
||||
name: headlamp-intel-gpu
|
||||
displayName: Intel GPU
|
||||
description: >-
|
||||
@@ -99,7 +99,7 @@ screenshots:
|
||||
url: https://raw.githubusercontent.com/privilegedescalation/headlamp-intel-gpu-plugin/main/docs/screenshots/03-metrics.svg
|
||||
|
||||
annotations:
|
||||
headlamp/plugin/archive-url: "https://github.com/privilegedescalation/headlamp-intel-gpu-plugin/releases/download/v1.1.0/intel-gpu-1.1.0.tar.gz"
|
||||
headlamp/plugin/archive-checksum: sha256:e212381f38c331383604b06f6552997fcba5c8b42a3bd828e3b43ed3e5028448
|
||||
headlamp/plugin/archive-url: "https://github.com/privilegedescalation/headlamp-intel-gpu-plugin/releases/download/v1.0.0/intel-gpu-1.0.0.tar.gz"
|
||||
headlamp/plugin/archive-checksum: sha256:93d6c531e7c12440c9625138f0645fc0c3521b574d0089492759699b324943f0
|
||||
headlamp/plugin/version-compat: ">=0.20.0"
|
||||
headlamp/plugin/distro-compat: "in-cluster,web,app"
|
||||
|
||||
@@ -19,14 +19,14 @@ test.describe('Intel GPU plugin smoke tests', () => {
|
||||
|
||||
// Should navigate to the overview route
|
||||
await expect(page).toHaveURL(/\/intel-gpu$/);
|
||||
await expect(page.getByRole('heading', { name: /Intel GPU — Overview/i })).toBeVisible();
|
||||
await expect(page.getByRole('heading', { name: /intel.gpu/i })).toBeVisible();
|
||||
});
|
||||
|
||||
test('overview page renders GPU device list or empty state', async ({ page }) => {
|
||||
await page.goto('/c/main/intel-gpu');
|
||||
|
||||
// Overview heading should be present
|
||||
await expect(page.getByRole('heading', { name: /Intel GPU — Overview/i })).toBeVisible({
|
||||
await expect(page.getByRole('heading', { name: /intel.gpu/i })).toBeVisible({
|
||||
timeout: 15_000,
|
||||
});
|
||||
|
||||
@@ -43,7 +43,7 @@ test.describe('Intel GPU plugin smoke tests', () => {
|
||||
test('device plugins page renders or shows empty state', async ({ page }) => {
|
||||
await page.goto('/c/main/intel-gpu/device-plugins');
|
||||
|
||||
await expect(page.getByRole('heading', { name: /Intel GPU — Device Plugins/i })).toBeVisible({
|
||||
await expect(page.getByRole('heading', { name: /device plugin/i })).toBeVisible({
|
||||
timeout: 15_000,
|
||||
});
|
||||
|
||||
@@ -61,18 +61,18 @@ test.describe('Intel GPU plugin smoke tests', () => {
|
||||
// not after clicking the parent entry from the overview. Test route
|
||||
// accessibility via direct navigation — each route must render its heading.
|
||||
await page.goto('/c/main/intel-gpu');
|
||||
await expect(page.getByRole('heading', { name: /Intel GPU — Overview/i })).toBeVisible({
|
||||
await expect(page.getByRole('heading', { name: /intel.gpu/i })).toBeVisible({
|
||||
timeout: 15_000,
|
||||
});
|
||||
|
||||
await page.goto('/c/main/intel-gpu/nodes');
|
||||
await expect(page.getByRole('heading', { name: /Intel GPU — Nodes/i })).toBeVisible({ timeout: 15_000 });
|
||||
await expect(page.getByRole('heading', { name: /node/i })).toBeVisible({ timeout: 15_000 });
|
||||
|
||||
await page.goto('/c/main/intel-gpu/pods');
|
||||
await expect(page.getByRole('heading', { name: /Intel GPU — Pods/i })).toBeVisible({ timeout: 15_000 });
|
||||
await expect(page.getByRole('heading', { name: /pod/i })).toBeVisible({ timeout: 15_000 });
|
||||
|
||||
await page.goto('/c/main/intel-gpu/metrics');
|
||||
await expect(page.getByRole('heading', { name: /Intel GPU — Metrics/i })).toBeVisible({ timeout: 15_000 });
|
||||
await expect(page.getByRole('heading', { name: /metric/i })).toBeVisible({ timeout: 15_000 });
|
||||
});
|
||||
|
||||
test('plugin settings page shows intel-gpu plugin entry', async ({ page }) => {
|
||||
|
||||
Generated
+2
-2
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "intel-gpu",
|
||||
"version": "1.1.0",
|
||||
"version": "1.0.0",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "intel-gpu",
|
||||
"version": "1.1.0",
|
||||
"version": "1.0.0",
|
||||
"license": "Apache-2.0",
|
||||
"devDependencies": {
|
||||
"@kinvolk/headlamp-plugin": "^0.13.0",
|
||||
|
||||
+1
-1
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "intel-gpu",
|
||||
"version": "1.1.0",
|
||||
"version": "1.0.0",
|
||||
"description": "Headlamp plugin for Intel GPU device plugin visibility and monitoring",
|
||||
"repository": {
|
||||
"type": "git",
|
||||
|
||||
@@ -151,27 +151,4 @@ describe('IntelGpuDataProvider', () => {
|
||||
expect(callCountAfter).toBeGreaterThan(callCountBefore);
|
||||
});
|
||||
});
|
||||
|
||||
it('treats a hanging CRD request as unavailable after 2s timeout', async () => {
|
||||
vi.useFakeTimers();
|
||||
const nodeWrapper = { jsonData: {} };
|
||||
vi.mocked(K8s.ResourceClasses.Node.useList).mockReturnValue([[nodeWrapper], null] as any);
|
||||
vi.mocked(K8s.ResourceClasses.Pod.useList).mockReturnValue([[nodeWrapper], null] as any);
|
||||
vi.mocked(ApiProxy.request)
|
||||
.mockReturnValueOnce(new Promise(() => {}))
|
||||
.mockResolvedValueOnce({ items: [] })
|
||||
.mockResolvedValueOnce({ items: [] })
|
||||
.mockResolvedValueOnce({ items: [] });
|
||||
|
||||
const { result } = renderHook(() => useIntelGpuContext(), { wrapper: Wrapper });
|
||||
|
||||
expect(result.current.loading).toBe(true);
|
||||
|
||||
vi.advanceTimersByTime(2000);
|
||||
await act(async () => {});
|
||||
expect(result.current.crdAvailable).toBe(false);
|
||||
expect(result.current.loading).toBe(false);
|
||||
|
||||
vi.useRealTimers();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -69,18 +69,6 @@ export function useIntelGpuContext(): IntelGpuContextValue {
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const DEFAULT_REQUEST_TIMEOUT_MS = 2_000;
|
||||
|
||||
/** Wraps a promise with a timeout, rejecting if it doesn't settle within ms. */
|
||||
function withTimeout<T>(promise: Promise<T>, ms: number): Promise<T> {
|
||||
return Promise.race([
|
||||
promise,
|
||||
new Promise<T>((_, reject) =>
|
||||
setTimeout(() => reject(new Error(`Request timed out after ${ms}ms`)), ms)
|
||||
),
|
||||
]);
|
||||
}
|
||||
|
||||
/** Extract raw Kubernetes JSON from Headlamp KubeObject wrappers. */
|
||||
const extractJsonData = (items: unknown[]): unknown[] =>
|
||||
items.map(item =>
|
||||
@@ -120,11 +108,8 @@ export function IntelGpuDataProvider({ children }: { children: React.ReactNode }
|
||||
try {
|
||||
// GpuDevicePlugin CRDs — graceful degradation if CRD not installed
|
||||
try {
|
||||
const pluginList = await withTimeout(
|
||||
ApiProxy.request(
|
||||
`/apis/${INTEL_DEVICE_PLUGIN_API_GROUP}/${INTEL_DEVICE_PLUGIN_API_VERSION}/gpudeviceplugins`
|
||||
),
|
||||
DEFAULT_REQUEST_TIMEOUT_MS
|
||||
const pluginList = await ApiProxy.request(
|
||||
`/apis/${INTEL_DEVICE_PLUGIN_API_GROUP}/${INTEL_DEVICE_PLUGIN_API_VERSION}/gpudeviceplugins`
|
||||
);
|
||||
if (!cancelled && isKubeList(pluginList)) {
|
||||
setCrdAvailable(true);
|
||||
@@ -154,7 +139,7 @@ export function IntelGpuDataProvider({ children }: { children: React.ReactNode }
|
||||
|
||||
for (const url of pluginPodSelectors) {
|
||||
try {
|
||||
const list = await withTimeout(ApiProxy.request(url), DEFAULT_REQUEST_TIMEOUT_MS);
|
||||
const list = await ApiProxy.request(url);
|
||||
if (!cancelled && isKubeList(list)) {
|
||||
const gpuPluginPods = filterIntelGpuPluginPods(list.items);
|
||||
foundPluginPods.push(...gpuPluginPods);
|
||||
|
||||
@@ -106,13 +106,11 @@ describe('MetricsPage', () => {
|
||||
vi.clearAllMocks();
|
||||
});
|
||||
|
||||
it('shows loader when ctxLoading=true but heading is visible immediately', () => {
|
||||
it('shows loader when ctxLoading=true', () => {
|
||||
vi.mocked(useIntelGpuContext).mockReturnValue(makeContext({ loading: true }));
|
||||
// fetchGpuMetrics should never be called in loading state
|
||||
vi.mocked(fetchGpuMetrics).mockResolvedValue(null);
|
||||
render(<MetricsPage />);
|
||||
// Heading renders immediately, loader appears below it while waiting for context
|
||||
expect(screen.getByText('Intel GPU — Metrics')).toBeInTheDocument();
|
||||
expect(screen.getByTestId('loader')).toHaveTextContent('Loading Intel GPU data...');
|
||||
});
|
||||
|
||||
|
||||
@@ -230,6 +230,10 @@ export default function MetricsPage() {
|
||||
};
|
||||
}, [ctxLoading, fetchSeq]);
|
||||
|
||||
if (ctxLoading) {
|
||||
return <Loader title="Loading Intel GPU data..." />;
|
||||
}
|
||||
|
||||
return (
|
||||
<>
|
||||
<div
|
||||
@@ -243,7 +247,7 @@ export default function MetricsPage() {
|
||||
<SectionHeader title="Intel GPU — Metrics" />
|
||||
<button
|
||||
onClick={() => void doFetch()}
|
||||
disabled={fetching || ctxLoading}
|
||||
disabled={fetching}
|
||||
aria-label="Refresh metrics"
|
||||
style={{
|
||||
padding: '6px 16px',
|
||||
@@ -251,18 +255,15 @@ export default function MetricsPage() {
|
||||
color: 'var(--mui-palette-primary-main, #0071c5)',
|
||||
border: '1px solid var(--mui-palette-primary-main, #0071c5)',
|
||||
borderRadius: '4px',
|
||||
cursor: fetching || ctxLoading ? 'not-allowed' : 'pointer',
|
||||
cursor: 'pointer',
|
||||
fontSize: '13px',
|
||||
fontWeight: 500,
|
||||
opacity: fetching || ctxLoading ? 0.6 : 1,
|
||||
}}
|
||||
>
|
||||
{fetching ? 'Refreshing…' : 'Refresh'}
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{ctxLoading && <Loader title="Loading Intel GPU data..." />}
|
||||
|
||||
<MetricRequirements />
|
||||
|
||||
{fetching && !metrics && <Loader title="Querying Prometheus for GPU metrics..." />}
|
||||
|
||||
Reference in New Issue
Block a user