Compare commits
11 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 2d9c447467 | |||
| 191d2edc55 | |||
| c7920b5b8e | |||
| c99e235caa | |||
| 85c839bc19 | |||
| 00c29e36dd | |||
| 823e590513 | |||
| 3cc0094842 | |||
| 161d817e6c | |||
| 375f43265d | |||
| b81f25ad74 |
@@ -16,3 +16,5 @@ jobs:
|
||||
dual-approval:
|
||||
uses: privilegedescalation/.github/.github/workflows/dual-approval-check.yaml@main
|
||||
secrets: inherit
|
||||
with:
|
||||
pr_number: ${{ github.event.pull_request.number }}
|
||||
|
||||
@@ -10,94 +10,13 @@ on:
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
# Only one E2E run at a time: the shared E2E_RELEASE (headlamp-e2e) in
|
||||
# privilegedescalation-dev cannot be shared across concurrent runs.
|
||||
# cancel-in-progress: false (queue, don't cancel) — cancelling in-flight
|
||||
# runs may skip the if: always() teardown, leaving dangling cluster resources.
|
||||
concurrency:
|
||||
group: e2e-${{ github.repository }}
|
||||
cancel-in-progress: false
|
||||
|
||||
env:
|
||||
E2E_NAMESPACE: privilegedescalation-dev
|
||||
E2E_RELEASE: headlamp-e2e
|
||||
# Pin to a known-good Headlamp version. Using :latest is risky because
|
||||
# the tag can change between CI runs, causing flaky failures when a newer
|
||||
# image is pulled on some nodes but not others (IfNotPresent pull policy).
|
||||
# Update this when Headlamp is upgraded in production (kube-system).
|
||||
HEADLAMP_VERSION: v0.40.1
|
||||
|
||||
jobs:
|
||||
e2e:
|
||||
runs-on: runners-privilegedescalation
|
||||
timeout-minutes: 15
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6
|
||||
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v6
|
||||
with:
|
||||
node-version: '22'
|
||||
cache: 'npm'
|
||||
|
||||
- name: Setup kubectl
|
||||
uses: azure/setup-kubectl@v4
|
||||
|
||||
- name: Install dependencies
|
||||
run: npm ci
|
||||
|
||||
- name: Build plugin
|
||||
run: npx @kinvolk/headlamp-plugin build
|
||||
|
||||
- name: Deploy E2E Headlamp instance
|
||||
run: scripts/deploy-e2e-headlamp.sh
|
||||
|
||||
- name: Load E2E environment
|
||||
run: |
|
||||
if [ -f .env.e2e ]; then
|
||||
cat .env.e2e >> "$GITHUB_ENV"
|
||||
else
|
||||
echo "::error::deploy-e2e-headlamp.sh did not produce .env.e2e"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Install Playwright browsers
|
||||
run: npx playwright install --with-deps chromium
|
||||
|
||||
- name: Run E2E tests
|
||||
run: npm run e2e
|
||||
env:
|
||||
HEADLAMP_URL: ${{ env.HEADLAMP_URL }}
|
||||
HEADLAMP_TOKEN: ${{ env.HEADLAMP_TOKEN }}
|
||||
|
||||
- name: Collect deployment diagnostics on failure
|
||||
if: failure()
|
||||
run: |
|
||||
echo "=== Pod state ==="
|
||||
kubectl get pods -n "$E2E_NAMESPACE" -l "app.kubernetes.io/instance=$E2E_RELEASE" 2>&1 || true
|
||||
echo "=== Pod describe ==="
|
||||
kubectl describe pods -n "$E2E_NAMESPACE" -l "app.kubernetes.io/instance=$E2E_RELEASE" 2>&1 || true
|
||||
echo "=== Recent namespace events ==="
|
||||
kubectl get events -n "$E2E_NAMESPACE" --sort-by='.lastTimestamp' 2>&1 | tail -20 || true
|
||||
|
||||
- name: Teardown E2E instance
|
||||
if: always()
|
||||
run: scripts/teardown-e2e-headlamp.sh
|
||||
|
||||
- name: Upload Playwright report
|
||||
uses: actions/upload-artifact@v7
|
||||
if: failure()
|
||||
with:
|
||||
name: playwright-report
|
||||
path: playwright-report/
|
||||
retention-days: 7
|
||||
|
||||
- name: Upload test results
|
||||
uses: actions/upload-artifact@v7
|
||||
if: failure()
|
||||
with:
|
||||
name: test-results
|
||||
path: test-results/
|
||||
retention-days: 7
|
||||
uses: privilegedescalation/.github/.github/workflows/plugin-e2e.yaml@main
|
||||
with:
|
||||
node-version: "22"
|
||||
headlamp-version: v0.40.1
|
||||
|
||||
+3
-3
@@ -1,4 +1,4 @@
|
||||
version: "1.0.0"
|
||||
version: "1.1.0"
|
||||
name: headlamp-intel-gpu
|
||||
displayName: Intel GPU
|
||||
description: >-
|
||||
@@ -99,7 +99,7 @@ screenshots:
|
||||
url: https://raw.githubusercontent.com/privilegedescalation/headlamp-intel-gpu-plugin/main/docs/screenshots/03-metrics.svg
|
||||
|
||||
annotations:
|
||||
headlamp/plugin/archive-url: "https://github.com/privilegedescalation/headlamp-intel-gpu-plugin/releases/download/v1.0.0/intel-gpu-1.0.0.tar.gz"
|
||||
headlamp/plugin/archive-checksum: sha256:93d6c531e7c12440c9625138f0645fc0c3521b574d0089492759699b324943f0
|
||||
headlamp/plugin/archive-url: "https://github.com/privilegedescalation/headlamp-intel-gpu-plugin/releases/download/v1.1.0/intel-gpu-1.1.0.tar.gz"
|
||||
headlamp/plugin/archive-checksum: sha256:e212381f38c331383604b06f6552997fcba5c8b42a3bd828e3b43ed3e5028448
|
||||
headlamp/plugin/version-compat: ">=0.20.0"
|
||||
headlamp/plugin/distro-compat: "in-cluster,web,app"
|
||||
|
||||
+21
-13
@@ -19,16 +19,18 @@ test.describe('Intel GPU plugin smoke tests', () => {
|
||||
|
||||
// Should navigate to the overview route
|
||||
await expect(page).toHaveURL(/\/intel-gpu$/);
|
||||
await expect(page.getByRole('heading', { name: /intel.gpu/i })).toBeVisible();
|
||||
await expect(
|
||||
page.locator('main').getByRole('heading', { name: 'Intel GPU — Overview' })
|
||||
).toBeVisible();
|
||||
});
|
||||
|
||||
test('overview page renders GPU device list or empty state', async ({ page }) => {
|
||||
await page.goto('/c/main/intel-gpu');
|
||||
|
||||
// Overview heading should be present
|
||||
await expect(page.getByRole('heading', { name: /intel.gpu/i })).toBeVisible({
|
||||
timeout: 15_000,
|
||||
});
|
||||
await expect(
|
||||
page.locator('main').getByRole('heading', { name: 'Intel GPU — Overview' })
|
||||
).toBeVisible({ timeout: 15_000 });
|
||||
|
||||
// Either a populated table/list or an empty-state indicator must be visible
|
||||
const hasTable = await page.locator('table').first().isVisible().catch(() => false);
|
||||
@@ -43,9 +45,9 @@ test.describe('Intel GPU plugin smoke tests', () => {
|
||||
test('device plugins page renders or shows empty state', async ({ page }) => {
|
||||
await page.goto('/c/main/intel-gpu/device-plugins');
|
||||
|
||||
await expect(page.getByRole('heading', { name: /device plugin/i })).toBeVisible({
|
||||
timeout: 15_000,
|
||||
});
|
||||
await expect(
|
||||
page.locator('main').getByRole('heading', { name: 'Intel GPU — Device Plugins' })
|
||||
).toBeVisible({ timeout: 15_000 });
|
||||
|
||||
const hasTable = await page.locator('table').first().isVisible().catch(() => false);
|
||||
const hasEmptyState = await page
|
||||
@@ -61,18 +63,24 @@ test.describe('Intel GPU plugin smoke tests', () => {
|
||||
// not after clicking the parent entry from the overview. Test route
|
||||
// accessibility via direct navigation — each route must render its heading.
|
||||
await page.goto('/c/main/intel-gpu');
|
||||
await expect(page.getByRole('heading', { name: /intel.gpu/i })).toBeVisible({
|
||||
timeout: 15_000,
|
||||
});
|
||||
await expect(
|
||||
page.locator('main').getByRole('heading', { name: 'Intel GPU — Overview' })
|
||||
).toBeVisible({ timeout: 15_000 });
|
||||
|
||||
await page.goto('/c/main/intel-gpu/nodes');
|
||||
await expect(page.getByRole('heading', { name: /node/i })).toBeVisible({ timeout: 15_000 });
|
||||
await expect(
|
||||
page.locator('main').getByRole('heading', { name: 'Intel GPU — Nodes' })
|
||||
).toBeVisible({ timeout: 15_000 });
|
||||
|
||||
await page.goto('/c/main/intel-gpu/pods');
|
||||
await expect(page.getByRole('heading', { name: /pod/i })).toBeVisible({ timeout: 15_000 });
|
||||
await expect(
|
||||
page.locator('main').getByRole('heading', { name: 'Intel GPU — Pods' })
|
||||
).toBeVisible({ timeout: 15_000 });
|
||||
|
||||
await page.goto('/c/main/intel-gpu/metrics');
|
||||
await expect(page.getByRole('heading', { name: /metric/i })).toBeVisible({ timeout: 15_000 });
|
||||
await expect(
|
||||
page.locator('main').getByRole('heading', { name: 'Intel GPU — Metrics' })
|
||||
).toBeVisible({ timeout: 15_000 });
|
||||
});
|
||||
|
||||
test('plugin settings page shows intel-gpu plugin entry', async ({ page }) => {
|
||||
|
||||
Generated
+5
-5
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "intel-gpu",
|
||||
"version": "1.0.0",
|
||||
"version": "1.1.0",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "intel-gpu",
|
||||
"version": "1.0.0",
|
||||
"version": "1.1.0",
|
||||
"license": "Apache-2.0",
|
||||
"devDependencies": {
|
||||
"@kinvolk/headlamp-plugin": "^0.13.0",
|
||||
@@ -11600,9 +11600,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/lodash": {
|
||||
"version": "4.17.23",
|
||||
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.23.tgz",
|
||||
"integrity": "sha512-LgVTMpQtIopCi79SJeDiP0TfWi5CNEc/L/aRdTh3yIvmZXTnheWpKjSZhnvMl8iXbC1tFg9gdHHDMLoV7CnG+w==",
|
||||
"version": "4.18.1",
|
||||
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.18.1.tgz",
|
||||
"integrity": "sha512-dMInicTPVE8d1e5otfwmmjlxkZoUpiVLwyeTdUsi/Caj/gfzzblBcCE5sRHV/AsjuCmxWrte2TNGSYuCeCq+0Q==",
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
|
||||
+3
-2
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "intel-gpu",
|
||||
"version": "1.0.0",
|
||||
"version": "1.1.0",
|
||||
"description": "Headlamp plugin for Intel GPU device plugin visibility and monitoring",
|
||||
"repository": {
|
||||
"type": "git",
|
||||
@@ -44,6 +44,7 @@
|
||||
},
|
||||
"overrides": {
|
||||
"tar": "^7.5.11",
|
||||
"undici": "^7.24.3"
|
||||
"undici": "^7.24.3",
|
||||
"lodash": ">=4.18.0"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
# a ConfigMap volume mount. No custom Docker images — the plugin is built
|
||||
# in CI and injected as a ConfigMap.
|
||||
#
|
||||
# E2E resources are deployed to the `privilegedescalation-dev` namespace. Nothing
|
||||
# E2E resources are deployed to the `headlamp-dev` namespace. Nothing
|
||||
# persists beyond the test run — teardown cleans up all created resources.
|
||||
#
|
||||
# Prerequisites:
|
||||
@@ -14,7 +14,7 @@
|
||||
# - RBAC applied: kubectl apply -f deployment/e2e-ci-runner-rbac.yaml
|
||||
#
|
||||
# Environment:
|
||||
# E2E_NAMESPACE — namespace for E2E Headlamp (default: privilegedescalation-dev)
|
||||
# E2E_NAMESPACE — namespace for E2E Headlamp (default: headlamp-dev)
|
||||
# E2E_RELEASE — release/resource name prefix (default: headlamp-e2e)
|
||||
# HEADLAMP_VERSION — Headlamp image tag (default: latest)
|
||||
set -euo pipefail
|
||||
@@ -22,7 +22,7 @@ set -euo pipefail
|
||||
REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
|
||||
DIST_DIR="$REPO_ROOT/dist"
|
||||
|
||||
E2E_NAMESPACE="${E2E_NAMESPACE:-privilegedescalation-dev}"
|
||||
E2E_NAMESPACE="${E2E_NAMESPACE:-headlamp-dev}"
|
||||
E2E_RELEASE="${E2E_RELEASE:-headlamp-e2e}"
|
||||
HEADLAMP_VERSION="${HEADLAMP_VERSION:-latest}"
|
||||
|
||||
@@ -59,11 +59,21 @@ kubectl create configmap headlamp-intel-gpu-plugin \
|
||||
--from-file=package.json="$REPO_ROOT/package.json"
|
||||
|
||||
# --- Tear down any existing E2E deployment for a clean start ---
|
||||
# Deleting the Deployment forces a fresh pod (new ReplicaSet) regardless of
|
||||
# whether the pod spec changed. We do NOT delete the ServiceAccount — keeping
|
||||
# it avoids a token-race condition where kubelet tries to mount a volume using a
|
||||
# token that has been deleted but the new one isn't ready yet.
|
||||
# The Service is NOT deleted — leaving it in place avoids an
|
||||
# Endpoints UID race (FailedToUpdateEndpoint) that causes DNS resolution
|
||||
# failures. kubectl apply below upserts the Service in-place, and the new
|
||||
# pod's IP is added to the existing Endpoints automatically.
|
||||
echo ""
|
||||
echo "Removing any existing E2E deployment (clean-start)..."
|
||||
kubectl delete deployment "${E2E_RELEASE}" -n "$E2E_NAMESPACE" --ignore-not-found --wait
|
||||
kubectl delete service "${E2E_RELEASE}" -n "$E2E_NAMESPACE" --ignore-not-found --wait
|
||||
kubectl delete serviceaccount "${E2E_RELEASE}" -n "$E2E_NAMESPACE" --ignore-not-found --wait
|
||||
# ServiceAccount is kept — create it idempotently so the first run works too
|
||||
kubectl create serviceaccount "${E2E_RELEASE}" \
|
||||
-n "$E2E_NAMESPACE" \
|
||||
--dry-run=client -o yaml | kubectl apply -f -
|
||||
|
||||
# --- Deploy Headlamp via kubectl apply ---
|
||||
echo ""
|
||||
|
||||
@@ -4,13 +4,13 @@
|
||||
# Tears down the dedicated E2E Headlamp instance deployed by deploy-e2e-headlamp.sh.
|
||||
#
|
||||
# Environment:
|
||||
# E2E_NAMESPACE — namespace to clean up (default: privilegedescalation-dev)
|
||||
# E2E_NAMESPACE — namespace to clean up (default: headlamp-dev)
|
||||
# E2E_RELEASE — release/resource name prefix (default: headlamp-e2e)
|
||||
set -euo pipefail
|
||||
|
||||
REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
|
||||
|
||||
E2E_NAMESPACE="${E2E_NAMESPACE:-privilegedescalation-dev}"
|
||||
E2E_NAMESPACE="${E2E_NAMESPACE:-headlamp-dev}"
|
||||
E2E_RELEASE="${E2E_RELEASE:-headlamp-e2e}"
|
||||
|
||||
echo "=== E2E Headlamp Teardown ==="
|
||||
|
||||
@@ -151,4 +151,27 @@ describe('IntelGpuDataProvider', () => {
|
||||
expect(callCountAfter).toBeGreaterThan(callCountBefore);
|
||||
});
|
||||
});
|
||||
|
||||
it('treats a hanging CRD request as unavailable after 2s timeout', async () => {
|
||||
vi.useFakeTimers();
|
||||
const nodeWrapper = { jsonData: {} };
|
||||
vi.mocked(K8s.ResourceClasses.Node.useList).mockReturnValue([[nodeWrapper], null] as any);
|
||||
vi.mocked(K8s.ResourceClasses.Pod.useList).mockReturnValue([[nodeWrapper], null] as any);
|
||||
vi.mocked(ApiProxy.request)
|
||||
.mockReturnValueOnce(new Promise(() => {}))
|
||||
.mockResolvedValueOnce({ items: [] })
|
||||
.mockResolvedValueOnce({ items: [] })
|
||||
.mockResolvedValueOnce({ items: [] });
|
||||
|
||||
const { result } = renderHook(() => useIntelGpuContext(), { wrapper: Wrapper });
|
||||
|
||||
expect(result.current.loading).toBe(true);
|
||||
|
||||
vi.advanceTimersByTime(2000);
|
||||
await act(async () => {});
|
||||
expect(result.current.crdAvailable).toBe(false);
|
||||
expect(result.current.loading).toBe(false);
|
||||
|
||||
vi.useRealTimers();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -69,6 +69,18 @@ export function useIntelGpuContext(): IntelGpuContextValue {
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const DEFAULT_REQUEST_TIMEOUT_MS = 2_000;
|
||||
|
||||
/** Wraps a promise with a timeout, rejecting if it doesn't settle within ms. */
|
||||
function withTimeout<T>(promise: Promise<T>, ms: number): Promise<T> {
|
||||
return Promise.race([
|
||||
promise,
|
||||
new Promise<T>((_, reject) =>
|
||||
setTimeout(() => reject(new Error(`Request timed out after ${ms}ms`)), ms)
|
||||
),
|
||||
]);
|
||||
}
|
||||
|
||||
/** Extract raw Kubernetes JSON from Headlamp KubeObject wrappers. */
|
||||
const extractJsonData = (items: unknown[]): unknown[] =>
|
||||
items.map(item =>
|
||||
@@ -108,8 +120,11 @@ export function IntelGpuDataProvider({ children }: { children: React.ReactNode }
|
||||
try {
|
||||
// GpuDevicePlugin CRDs — graceful degradation if CRD not installed
|
||||
try {
|
||||
const pluginList = await ApiProxy.request(
|
||||
`/apis/${INTEL_DEVICE_PLUGIN_API_GROUP}/${INTEL_DEVICE_PLUGIN_API_VERSION}/gpudeviceplugins`
|
||||
const pluginList = await withTimeout(
|
||||
ApiProxy.request(
|
||||
`/apis/${INTEL_DEVICE_PLUGIN_API_GROUP}/${INTEL_DEVICE_PLUGIN_API_VERSION}/gpudeviceplugins`
|
||||
),
|
||||
DEFAULT_REQUEST_TIMEOUT_MS
|
||||
);
|
||||
if (!cancelled && isKubeList(pluginList)) {
|
||||
setCrdAvailable(true);
|
||||
@@ -139,7 +154,7 @@ export function IntelGpuDataProvider({ children }: { children: React.ReactNode }
|
||||
|
||||
for (const url of pluginPodSelectors) {
|
||||
try {
|
||||
const list = await ApiProxy.request(url);
|
||||
const list = await withTimeout(ApiProxy.request(url), DEFAULT_REQUEST_TIMEOUT_MS);
|
||||
if (!cancelled && isKubeList(list)) {
|
||||
const gpuPluginPods = filterIntelGpuPluginPods(list.items);
|
||||
foundPluginPods.push(...gpuPluginPods);
|
||||
|
||||
@@ -106,11 +106,13 @@ describe('MetricsPage', () => {
|
||||
vi.clearAllMocks();
|
||||
});
|
||||
|
||||
it('shows loader when ctxLoading=true', () => {
|
||||
it('shows loader when ctxLoading=true but heading is visible immediately', () => {
|
||||
vi.mocked(useIntelGpuContext).mockReturnValue(makeContext({ loading: true }));
|
||||
// fetchGpuMetrics should never be called in loading state
|
||||
vi.mocked(fetchGpuMetrics).mockResolvedValue(null);
|
||||
render(<MetricsPage />);
|
||||
// Heading renders immediately, loader appears below it while waiting for context
|
||||
expect(screen.getByText('Intel GPU — Metrics')).toBeInTheDocument();
|
||||
expect(screen.getByTestId('loader')).toHaveTextContent('Loading Intel GPU data...');
|
||||
});
|
||||
|
||||
|
||||
@@ -230,10 +230,6 @@ export default function MetricsPage() {
|
||||
};
|
||||
}, [ctxLoading, fetchSeq]);
|
||||
|
||||
if (ctxLoading) {
|
||||
return <Loader title="Loading Intel GPU data..." />;
|
||||
}
|
||||
|
||||
return (
|
||||
<>
|
||||
<div
|
||||
@@ -247,7 +243,7 @@ export default function MetricsPage() {
|
||||
<SectionHeader title="Intel GPU — Metrics" />
|
||||
<button
|
||||
onClick={() => void doFetch()}
|
||||
disabled={fetching}
|
||||
disabled={fetching || ctxLoading}
|
||||
aria-label="Refresh metrics"
|
||||
style={{
|
||||
padding: '6px 16px',
|
||||
@@ -255,15 +251,18 @@ export default function MetricsPage() {
|
||||
color: 'var(--mui-palette-primary-main, #0071c5)',
|
||||
border: '1px solid var(--mui-palette-primary-main, #0071c5)',
|
||||
borderRadius: '4px',
|
||||
cursor: 'pointer',
|
||||
cursor: fetching || ctxLoading ? 'not-allowed' : 'pointer',
|
||||
fontSize: '13px',
|
||||
fontWeight: 500,
|
||||
opacity: fetching || ctxLoading ? 0.6 : 1,
|
||||
}}
|
||||
>
|
||||
{fetching ? 'Refreshing…' : 'Refresh'}
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{ctxLoading && <Loader title="Loading Intel GPU data..." />}
|
||||
|
||||
<MetricRequirements />
|
||||
|
||||
{fetching && !metrics && <Loader title="Querying Prometheus for GPU metrics..." />}
|
||||
|
||||
Reference in New Issue
Block a user