From 3aff1b7891878ac59cc7ad2617f74dec2a0ce8ac Mon Sep 17 00:00:00 2001
From: Bartosz Majsak <bartosz.majsak@gmail.com>
Date: Fri, 11 Jul 2025 16:01:54 +0200
Subject: [PATCH 01/38] test(e2e): introduces e2e test skeleton and single-node
 cpu model

Enables end-to-end (aka fvt) pytests from LLMInferenceService. This change
includes:

- flexible, parameterized-based test skeleton that allows deploying
  models by simply combining LLMInferenceServiceConfig refs
- can be mixed with partial spec (as another parameterized test)
- simple happy path test as a basis for further cases
- supports pytest parallelizm
- gh-action: support for `metallb` addon for minikube

> [!IMPORTANT]
> See test/e2e/llmisvc/README.md for test design ideas.
> Refer to pkg/controller/llmisvc/DEV.md for updated description how to
run the tests.

Fixes [RHOAIENG-30183](https://issues.redhat.com/browse/RHOAIENG-30183)

Signed-off-by: Bartosz Majsak <bartosz.majsak@gmail.com>

; Conflicts:
;	config/default/kustomization.yaml

Signed-off-by: Bartosz Majsak <bartosz.majsak@gmail.com>
---
 .github/actions/kserve-dep-setup/action.yml   |   6 +-
 .github/actions/minikube-setup/action.yml     |  29 +-
 .github/workflows/e2e-test-llmisvc.yml        | 172 +++++++++++
 .../test/clusterresources/kustomization.yaml  |   1 +
 pkg/controller/llmisvc/DEV.md                 |  97 +++---
 test/e2e/llmisvc/README.md                    |  28 ++
 test/e2e/llmisvc/__init__.py                  |   0
 test/e2e/llmisvc/conftest.py                  |  26 ++
 test/e2e/llmisvc/pytest.ini                   |  14 +
 test/e2e/llmisvc/test_configs.py              | 179 +++++++++++
 .../e2e/llmisvc/test_llm_inference_service.py | 289 ++++++++++++++++++
 test/e2e/pytest.ini                           |   1 +
 test/scripts/gh-actions/run-e2e-tests.sh      |   1 +
 13 files changed, 804 insertions(+), 39 deletions(-)
 create mode 100644 .github/workflows/e2e-test-llmisvc.yml
 create mode 100644 test/e2e/llmisvc/README.md
 create mode 100644 test/e2e/llmisvc/__init__.py
 create mode 100644 test/e2e/llmisvc/conftest.py
 create mode 100644 test/e2e/llmisvc/pytest.ini
 create mode 100644 test/e2e/llmisvc/test_configs.py
 create mode 100644 test/e2e/llmisvc/test_llm_inference_service.py

diff --git a/.github/actions/kserve-dep-setup/action.yml b/.github/actions/kserve-dep-setup/action.yml
index de7f22dc411..3cf3da3958d 100644
--- a/.github/actions/kserve-dep-setup/action.yml
+++ b/.github/actions/kserve-dep-setup/action.yml
@@ -13,6 +13,10 @@ inputs:
     description: 'Enable KEDA for autoscaling'
     required: false
     default: 'false'
+  enable-lws:
+    description: 'Enable Leader Worker Set'
+    required: false
+    default: 'false'
 runs:
   using: "composite"
   steps:
@@ -26,7 +30,7 @@ runs:
           ./test/scripts/gh-actions/setup-kourier.sh
         else
           echo "Selected network layer ${{ inputs.network-layer }}"
-          ./test/scripts/gh-actions/setup-deps.sh ${{ inputs.deployment-mode }} "${{ inputs.network-layer }}" "${{ inputs.enable-keda }}"
+          ./test/scripts/gh-actions/setup-deps.sh ${{ inputs.deployment-mode }} "${{ inputs.network-layer }}" "${{ inputs.enable-keda }}" "${{ inputs.enable-lws }}"
         fi
 
     - name: Update test overlays
diff --git a/.github/actions/minikube-setup/action.yml b/.github/actions/minikube-setup/action.yml
index f05e9678793..f38da79b2fa 100644
--- a/.github/actions/minikube-setup/action.yml
+++ b/.github/actions/minikube-setup/action.yml
@@ -14,6 +14,10 @@ inputs:
     description: 'Additional arguments to pass to minikube start'
     required: false
     default: ''
+  addons:
+    description: 'Choose optional addons to install. Valid options: metallb, ingress, gcp-auth, registry ...'
+    required: false
+    default: ''
 
 runs:
   using: "composite"
@@ -29,11 +33,34 @@ runs:
         minikube-version: '1.35.0'
         kubernetes-version: 'v1.30.7'
         driver: ${{ inputs.driver }}
+        addons: ${{ inputs.addons }}
         wait: 'all'
         cpus: 'max'
         memory: 'max'
         start-args: --wait-timeout=6m0s --nodes=${{ inputs.nodes }} ${{ inputs.start-args }}
-
+    - name: Configure MetalLB for Minikube
+      if: ${{ contains(inputs.addons, 'metallb') }}
+      shell: bash
+      run: |
+        IP=$(minikube ip)
+        PREFIX=${IP%.*}
+        START=${PREFIX}.200
+        END=${PREFIX}.235
+        
+        kubectl apply -f - <<EOF
+        apiVersion: v1
+        kind: ConfigMap
+        metadata:
+          namespace: metallb-system
+          name: config
+        data:
+          config: |
+            address-pools:
+            - name: default
+              protocol: layer2
+              addresses:
+              - ${START}-${END}
+        EOF
     - name: Check Kubernetes pods
       shell: bash
       run: kubectl get pods -n kube-system
diff --git a/.github/workflows/e2e-test-llmisvc.yml b/.github/workflows/e2e-test-llmisvc.yml
new file mode 100644
index 00000000000..09ec0603d40
--- /dev/null
+++ b/.github/workflows/e2e-test-llmisvc.yml
@@ -0,0 +1,172 @@
+name: LLMInferenceService E2E Tests
+
+on:
+  pull_request:
+    branches: [master, release*, feature-llmd-* ]
+    paths:
+      - "**"
+      - "!.github/**"
+      - "!docs/**"
+      - "!**.md"
+      - ".github/workflows/e2e-test-llmisvc.yml"
+  workflow_dispatch:
+
+env:
+  GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+  DOCKER_IMAGES_PATH: "/mnt/docker-images"
+  DOCKER_REPO: "kserve"
+  # artifact prefixes for bulk download
+  PREDICTOR_ARTIFACT_PREFIX: "pred"
+  EXPLAINER_ARTIFACT_PREFIX: "exp"
+  TRANSFORMER_ARTIFACT_PREFIX: "trans"
+  GRAPH_ARTIFACT_PREFIX: "graph"
+  BASE_ARTIFACT_PREFIX: "base"
+  # Controller images
+  CONTROLLER_IMG: "kserve-controller"
+  LOCALMODEL_CONTROLLER_IMG: "kserve-localmodel-controller"
+  LOCALMODEL_AGENT_IMG: "kserve-localmodelnode-agent"
+  STORAGE_INIT_IMG: "storage-initializer"
+  AGENT_IMG: "agent"
+  ROUTER_IMG: "router"
+  # Predictor runtime server images
+  SKLEARN_IMG: "sklearnserver"
+  XGB_IMG: "xgbserver"
+  LGB_IMG: "lgbserver"
+  PMML_IMG: "pmmlserver"
+  PADDLE_IMG: "paddleserver"
+  CUSTOM_MODEL_GRPC_IMG: "custom-model-grpc"
+  CUSTOM_MODEL_GRPC_IMG_TAG: "kserve/custom-model-grpc:${{ github.sha }}"
+  HUGGINGFACE_IMG: "huggingfaceserver"
+  # Explainer images
+  ART_IMG: "art-explainer"
+  # Transformer images
+  IMAGE_TRANSFORMER_IMG: "image-transformer"
+  IMAGE_TRANSFORMER_IMG_TAG: "kserve/image-transformer:${{ github.sha }}"
+  CUSTOM_TRANSFORMER_GRPC_IMG: "custom-image-transformer-grpc"
+  # Graph images
+  SUCCESS_200_ISVC_IMG: "success-200-isvc"
+  ERROR_404_ISVC_IMG: "error-404-isvc"
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  test-llmisvc:
+    runs-on: ubuntu-22.04
+    needs: [ kserve-image-build ]
+    steps:
+      - name: Checkout source
+        uses: actions/checkout@v4
+
+      - name: Free-up disk space
+        uses: ./.github/actions/free-up-disk-space
+
+      - name: Setup Go
+        uses: actions/setup-go@v5
+        with:
+          go-version-file: go.mod
+          
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.9"
+
+      - name: Setup Minikube
+        uses: ./.github/actions/minikube-setup
+        with:
+          addons: "metallb"
+
+      - name: KServe dependency setup
+        uses: ./.github/actions/kserve-dep-setup
+        with:
+          network-layer: 'istio-gatewayapi-ext'
+          enable-lws: 'true'
+
+      - name: Download base images
+        uses: ./.github/actions/base-download
+
+      - name: Install Poetry and version plugin
+        run: ./test/scripts/gh-actions/setup-poetry.sh
+
+      - name: Install KServe
+        run: |
+          ./test/scripts/gh-actions/setup-kserve.sh "raw" "istio-gatewayapi-ext"
+
+      - name: Run E2E tests
+        timeout-minutes: 30
+        run: |
+          ./test/scripts/gh-actions/run-e2e-tests.sh "llminferenceservice" "1" "istio-gatewayapi-ext"
+
+      - name: Check system status
+        if: always()
+        run: |
+          ./test/scripts/gh-actions/status-check.sh
+
+  kserve-image-build:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout source
+        uses: actions/checkout@v4
+
+      - name: Free-up disk space
+        uses: ./.github/actions/free-up-disk-space
+
+      - name: Setup Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Build KServe images
+        run: |
+          sudo mkdir -p ${DOCKER_IMAGES_PATH}
+          sudo chown -R $USER ${DOCKER_IMAGES_PATH}
+          ./test/scripts/gh-actions/build-images.sh
+          docker image ls
+          sudo ls -lh ${DOCKER_IMAGES_PATH}
+
+      - name: Upload controller image
+        uses: actions/upload-artifact@v4
+        with:
+          name: ${{ env.BASE_ARTIFACT_PREFIX }}-${{ env.CONTROLLER_IMG }}-${{ github.sha }}
+          path: ${{ env.DOCKER_IMAGES_PATH }}/${{ env.CONTROLLER_IMG }}-${{ github.sha }}
+          compression-level: 0
+          if-no-files-found: error
+
+      - name: Upload localmodel controller image
+        uses: actions/upload-artifact@v4
+        with:
+          name: ${{ env.BASE_ARTIFACT_PREFIX }}-${{ env.LOCALMODEL_CONTROLLER_IMG }}-${{ github.sha }}
+          path: ${{ env.DOCKER_IMAGES_PATH }}/${{ env.LOCALMODEL_CONTROLLER_IMG }}-${{ github.sha }}
+          compression-level: 0
+          if-no-files-found: error
+
+      - name: Upload localmodel agent image
+        uses: actions/upload-artifact@v4
+        with:
+          name: ${{ env.BASE_ARTIFACT_PREFIX }}-${{ env.LOCALMODEL_AGENT_IMG }}-${{ github.sha }}
+          path: ${{ env.DOCKER_IMAGES_PATH }}/${{ env.LOCALMODEL_AGENT_IMG }}-${{ github.sha }}
+          compression-level: 0
+          if-no-files-found: error
+
+      - name: Upload agent image
+        uses: actions/upload-artifact@v4
+        with:
+          name: ${{ env.BASE_ARTIFACT_PREFIX }}-${{ env.AGENT_IMG }}-${{ github.sha }}
+          path: ${{ env.DOCKER_IMAGES_PATH }}/${{ env.AGENT_IMG }}-${{ github.sha }}
+          compression-level: 0
+          if-no-files-found: error
+
+      - name: Upload storage initializer image
+        uses: actions/upload-artifact@v4
+        with:
+          name: ${{ env.BASE_ARTIFACT_PREFIX }}-${{ env.STORAGE_INIT_IMG }}-${{ github.sha }}
+          path: ${{ env.DOCKER_IMAGES_PATH }}/${{ env.STORAGE_INIT_IMG }}-${{ github.sha }}
+          compression-level: 0
+          if-no-files-found: error
+
+      - name: Upload router image
+        uses: actions/upload-artifact@v4
+        with:
+          name: ${{ env.BASE_ARTIFACT_PREFIX }}-${{ env.ROUTER_IMG }}-${{ github.sha }}
+          path: ${{ env.DOCKER_IMAGES_PATH }}/${{ env.ROUTER_IMG }}-${{ github.sha }}
+          compression-level: 0
+          if-no-files-found: error
diff --git a/config/overlays/test/clusterresources/kustomization.yaml b/config/overlays/test/clusterresources/kustomization.yaml
index ab1e56e51f6..520ae05cd67 100644
--- a/config/overlays/test/clusterresources/kustomization.yaml
+++ b/config/overlays/test/clusterresources/kustomization.yaml
@@ -4,6 +4,7 @@ kind: Kustomization
 resources:
 - ../../../runtimes
 - ../../../storagecontainers
+- ../../../llmisvc
 
 
 images:
diff --git a/pkg/controller/llmisvc/DEV.md b/pkg/controller/llmisvc/DEV.md
index 14f34cfe21b..b0d893e46e9 100644
--- a/pkg/controller/llmisvc/DEV.md
+++ b/pkg/controller/llmisvc/DEV.md
@@ -12,27 +12,34 @@ kind create cluster -n "kserve-llm-d"
 
 go install sigs.k8s.io/cloud-provider-kind@latest
 
-cloud-provider-kind> /dev/null 2>&1 &
+cloud-provider-kind > /dev/null 2>&1 &
 ```
 
 ##### Using `minikube`
 
 ```shell
-minikube start --cpus='12' --memory='16G'
+minikube start --cpus='12' --memory='16G' --kubernetes-version=v1.33.1
 minikube addons enable metallb
 
-# You need to configure metallb with an IP range. This depends on the minikube network.
-# You can find your current minikube ip with:
-# $ minikube ip
-#   192.168.39.118
-#
-# With the previous sample output, you would configure metallb with a range not including
-# the minikube IP (change only the last entry). E.g:
-minikube addons configure metallb
-# Minikube will ask two prompts. Notice the configured range 192.168.39.200-192.168.39.235 is
-# not including minikube IP:
-# -- Enter Load Balancer Start IP: 192.168.39.200
-# -- Enter Load Balancer End IP: 192.168.39.235
+IP=$(minikube ip)
+PREFIX=${IP%.*}
+START=${PREFIX}.200
+END=${PREFIX}.235
+
+kubectl apply -f - <<EOF
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  namespace: metallb-system
+  name: config
+data:
+  config: |
+    address-pools:
+    - name: default
+      protocol: layer2
+      addresses:
+      - ${START}-${END}
+EOF
 ```
 
 #### Install KServe (dev) in the created cluster
@@ -41,40 +48,56 @@ minikube addons configure metallb
 make deploy-dev-llm -e KO_DOCKER_REPO=<YOUR_REPO>
 ```
 
-#### Creating simple CPU model
+#### Validation
+
+##### pytest
+
+Set up pytest
+```shell
+cd python/kserve 
+python -m venv .venv
+pip install -e .
+pip install pytest pytest-asyncio requests portforward Jinja2 pytest-xdist
+cd -
+```
+
+Run the test
+
+```shell
+./test/scripts/gh-actions/run-e2e-tests.sh "llminferenceservice(type='cpu')" 1 "istio-gatewayapi-ext"
+
+Starting E2E functional tests ...
+No parallelism requested for pytest. Will use default value of 1
+pytest -m 'llminferenceservice(type='cpu')' --ignore=qpext --log-cli-level=INFO -n 1 --dist worksteal --network-layer istio-gatewayapi-ext
+===================================================================================== test session starts =====================================================================================
+platform linux -- Python 3.12.11, pytest-8.4.1, pluggy-1.6.0
+rootdir: /home/bartek/code/redhat/model-serving/kserve/kserve-test/test/e2e
+configfile: pytest.ini
+plugins: anyio-4.9.0, xdist-3.8.0, asyncio-1.1.0
+asyncio: mode=Mode.STRICT, asyncio_default_fixture_loop_scope=None, asyncio_default_test_loop_scope=function
+1 worker [1 item]s / 1 error
+scheduling tests via WorkStealingScheduling
+
+llmisvc/test_llm_inference_service.py::test_llm_inference_service[managed-single-cpu-fb-opt-125m]
+[gw0] [100%] PASSED llmisvc/test_llm_inference_service.py::test_llm_inference_service[managed-single-cpu-fb-opt-125m]
+```
+> [!NOTE] 
+> Ignore error from ERROR collecting graph/test_inference_graph.py, but we should fix it!
+
+##### Manual
+
+Create LLMInferenceService, e.g.:
 
 ```shell
 NS=llm-test
 kubectl create namespace ${NS} || true
 
-kubectl apply -f - <<EOF
-apiVersion: gateway.networking.k8s.io/v1
-kind: Gateway
-metadata:
-  name: kserve-ingress-gateway
-  namespace: kserve
-spec:
-  gatewayClassName: istio
-  listeners:
-   - name: http
-     port: 80
-     protocol: HTTP
-     allowedRoutes:
-       namespaces:
-         from: All
-  infrastructure:
-    labels:
-      serving.kserve.io/gateway: kserve-ingress-gateway
-EOF
-
 LLM_ISVC=docs/samples/llmisvc/opt-125m/llm-inference-service-facebook-opt-125m-cpu.yaml
 LLM_ISVC_NAME=$(cat $LLM_ISVC | yq .metadata.name)
 
 kubectl apply -n ${NS} -f ${LLM_ISVC}
 ```
 
-#### Validation
-
 ```shell
 LB_IP=$(kubectl get svc/kserve-ingress-gateway-istio -n kserve -o=jsonpath='{.status.loadBalancer.ingress[0].ip}')
 
diff --git a/test/e2e/llmisvc/README.md b/test/e2e/llmisvc/README.md
new file mode 100644
index 00000000000..2d2fdefb5d8
--- /dev/null
+++ b/test/e2e/llmisvc/README.md
@@ -0,0 +1,28 @@
+# LLM Inference Service E2E Tests
+
+## Configuration Composition Pattern
+
+Tests combine config fragments from different categories to create complete scenarios:
+```python
+["router-managed", "workload-single-cpu", "model-fb-opt-125m"]
+```
+
+The `llm_config_factory` fixture automatically creates/cleans up `LLMInferenceServiceConfig` objects.
+
+## Markers
+
+- `@pytest.mark.llminferenceservice(type="cpu")` - Resource type for selective test execution
+- Use `pytest -m "llminferenceservice and cpu"` to run specific resource tests
+
+## Config Naming Convention
+
+Use prefixed categories that get composed together:
+
+- **`workload-*`**: Container specs and resources (e.g., `workload-single-cpu`, `workload-multi-node-gpu`)
+- **`model-*`**: Model sources (e.g., `model-fb-opt-125m`, `model-gpt2`) 
+- **`router-*`**: Routing configs (e.g., `router-managed`, `router-with-scheduler`)
+
+## Adding New Configs
+
+1. Add to `LLMINFERENCESERVICE_CONFIGS` in `test_configs.py`
+2. Follow `category-descriptor` naming (prefix automatically stripped from test IDs) 
\ No newline at end of file
diff --git a/test/e2e/llmisvc/__init__.py b/test/e2e/llmisvc/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/test/e2e/llmisvc/conftest.py b/test/e2e/llmisvc/conftest.py
new file mode 100644
index 00000000000..8722251fc6f
--- /dev/null
+++ b/test/e2e/llmisvc/conftest.py
@@ -0,0 +1,26 @@
+# Copyright 2025 The KServe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+
+
+@pytest.fixture
+def config_names(request):
+    return request.param
+
+
+def pytest_configure(config):
+    config.addinivalue_line(
+        "markers", "llminferenceservice: mark test as an LLM inference service test"
+    )
diff --git a/test/e2e/llmisvc/pytest.ini b/test/e2e/llmisvc/pytest.ini
new file mode 100644
index 00000000000..c1a0cbaa961
--- /dev/null
+++ b/test/e2e/llmisvc/pytest.ini
@@ -0,0 +1,14 @@
+[tool:pytest]
+testpaths = .
+python_files = test_*.py
+python_functions = test_*
+python_classes = Test*
+addopts = 
+    -v
+    --tb=short
+    --strict-markers
+    --disable-warnings
+markers =
+    llminferenceservice: LLM inference service tests
+    asyncio: AsyncIO tests
+asyncio_mode = auto 
\ No newline at end of file
diff --git a/test/e2e/llmisvc/test_configs.py b/test/e2e/llmisvc/test_configs.py
new file mode 100644
index 00000000000..87e6da0a89a
--- /dev/null
+++ b/test/e2e/llmisvc/test_configs.py
@@ -0,0 +1,179 @@
+# Copyright 2025 The KServe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import pytest
+from kubernetes import client
+from kubernetes.client.rest import ApiException
+from kserve import KServeClient, constants
+
+KSERVE_PLURAL_LLMINFERENCESERVICECONFIG = "llminferenceserviceconfigs"
+KSERVE_TEST_NAMESPACE = "kserve-ci-e2e-test"
+
+LLMINFERENCESERVICE_CONFIGS = {
+    "workload-single-cpu": {
+        "template": {
+            "containers": [
+                {
+                    "name": "main",
+                    "image": "quay.io/pierdipi/vllm-cpu:latest",
+                    "env": [{"name": "VLLM_LOGGING_LEVEL", "value": "DEBUG"}],
+                    "resources": {
+                        "limits": {"cpu": "1", "memory": "10Gi"},
+                        "requests": {"cpu": "100m", "memory": "8Gi"},
+                    },
+                    "livenessProbe": {
+                        "initialDelaySeconds": 30,
+                        "periodSeconds": 30,
+                        "timeoutSeconds": 30,
+                        "failureThreshold": 5,
+                    },
+                }
+            ]
+        },
+    },
+    "model-fb-opt-125m": {
+        "model": {"uri": "hf://facebook/opt-125m", "name": "facebook/opt-125m"},
+    },
+    "router-managed": {
+        "router": {"scheduler": {}, "route": {}, "gateway": {}},
+    },
+    "router-with-scheduler": {
+        "router": {
+            "scheduler": {"pool": {}, "template": {}},
+            "route": {},
+            "gateway": {},
+        },
+    },
+}
+
+
+def generate_test_id(config_names):
+    """Generate a test ID from config names by removing prefixes."""
+    parts = []
+    for config in config_names:
+        if "-" in config:
+            parts.append(config.split("-", 1)[1])  # Remove first prefix-
+        else:
+            parts.append(config)
+    return "-".join(parts)
+
+@pytest.fixture(scope="session")
+def llm_config_factory():
+    """Factory for creating/cleaning LLMInferenceServiceConfig once per session."""
+    created = []
+    client = KServeClient(config_file=os.environ.get("KUBECONFIG", "~/.kube/config"))
+
+    def _create_configs(names, namespace=KSERVE_TEST_NAMESPACE):
+        for name in names:
+            if name not in LLMINFERENCESERVICE_CONFIGS:
+                raise ValueError(f"Unknown config name: {name}")
+
+            spec = LLMINFERENCESERVICE_CONFIGS[name]
+
+            try:
+                get_llmisvc_config(client, name, namespace)
+                continue
+            except Exception as e:
+                is_404_api = isinstance(e, ApiException) and getattr(e, "status", None) == 404
+                is_404_runtime = isinstance(e, RuntimeError) and "not found" in str(e).lower()
+                if not (is_404_api or is_404_runtime):
+                    raise
+
+            body = {
+                "apiVersion": "serving.kserve.io/v1alpha1",
+                "kind":       "LLMInferenceServiceConfig",
+                "metadata":   {"name": name, "namespace": namespace},
+                "spec":       spec,
+            }
+
+            try:
+                create_llmisvc_config(client, body, namespace)
+                created.append((name, namespace))
+            except Exception as e:
+                if isinstance(e, ApiException) and getattr(e, "status", None) == 409:
+                    continue
+                if isinstance(e, RuntimeError) and "already exists" in str(e).lower():
+                    continue
+                # otherwise, real error
+                raise
+
+        return names
+
+    yield _create_configs
+
+    # teardown: best‑effort cleanup
+    for name, namespace in created:
+        try:
+            delete_llmisvc_config(client, name, namespace)
+        except Exception:
+            pass
+
+
+def create_llmisvc_config(kserve_client, llm_config, namespace=None):
+    version = llm_config["apiVersion"].split("/")[1]
+
+    if namespace is None:
+        namespace = llm_config.get("metadata", {}).get("namespace", "default")
+
+    try:
+        outputs = kserve_client.api_instance.create_namespaced_custom_object(
+            constants.KSERVE_GROUP,
+            version,
+            namespace,
+            KSERVE_PLURAL_LLMINFERENCESERVICECONFIG,
+            llm_config,
+        )
+        return outputs
+    except client.rest.ApiException as e:
+        raise RuntimeError(
+            f"Exception when calling CustomObjectsApi->"
+            f"create_namespaced_custom_object for LLMInferenceServiceConfig: {e}"
+        ) from e
+
+
+def delete_llmisvc_config(
+    kserve_client, name, namespace, version=constants.KSERVE_V1ALPHA1_VERSION
+):
+    try:
+        return kserve_client.api_instance.delete_namespaced_custom_object(
+            constants.KSERVE_GROUP,
+            version,
+            namespace,
+            KSERVE_PLURAL_LLMINFERENCESERVICECONFIG,
+            name,
+        )
+    except client.rest.ApiException as e:
+        raise RuntimeError(
+            f"Exception when calling CustomObjectsApi->"
+            f"delete_namespaced_custom_object for LLMInferenceServiceConfig: {e}"
+        ) from e
+
+
+def get_llmisvc_config(
+    kserve_client, name, namespace, version=constants.KSERVE_V1ALPHA1_VERSION
+):
+    try:
+        return kserve_client.api_instance.get_namespaced_custom_object(
+            constants.KSERVE_GROUP,
+            version,
+            namespace,
+            KSERVE_PLURAL_LLMINFERENCESERVICECONFIG,
+            name,
+        )
+    except client.rest.ApiException as e:
+        raise RuntimeError(
+            f"Exception when calling CustomObjectsApi->"
+            f"get_namespaced_custom_object for LLMInferenceServiceConfig: {e}"
+        ) from e
diff --git a/test/e2e/llmisvc/test_llm_inference_service.py b/test/e2e/llmisvc/test_llm_inference_service.py
new file mode 100644
index 00000000000..9a3ddd2f06d
--- /dev/null
+++ b/test/e2e/llmisvc/test_llm_inference_service.py
@@ -0,0 +1,289 @@
+# Copyright 2025 The KServe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import os
+import time
+
+import pytest
+import requests
+from kubernetes import client
+from kserve import KServeClient, V1alpha1LLMInferenceService, constants
+
+from .test_configs import (
+    LLMINFERENCESERVICE_CONFIGS,
+    generate_test_id,
+    llm_config_factory,
+    KSERVE_TEST_NAMESPACE,
+)
+
+KSERVE_PLURAL_LLMINFERENCESERVICE = "llminferenceservices"
+
+
+@pytest.mark.llminferenceservice(type="cpu")
+@pytest.mark.asyncio(scope="session")
+@pytest.mark.parametrize(
+    "config_names",
+    [
+        ["router-managed", "workload-single-cpu", "model-fb-opt-125m"],
+    ],
+    indirect=True,
+    ids=generate_test_id,
+)
+async def test_llm_inference_service(request, llm_config_factory, config_names):
+    created_config_names = llm_config_factory(config_names)
+    service_name = generate_service_name(request.node.name, config_names)
+
+    llm_isvc = V1alpha1LLMInferenceService(
+        api_version="serving.kserve.io/v1alpha1",
+        kind="LLMInferenceService",
+        metadata=client.V1ObjectMeta(
+            name=service_name, namespace=KSERVE_TEST_NAMESPACE
+        ),
+        spec={
+            "replicas": 1,
+            "baseRefs": [{"name": config_name} for config_name in created_config_names],
+        },
+    )
+
+    kserve_client = KServeClient(
+        config_file=os.environ.get("KUBECONFIG", "~/.kube/config")
+    )
+
+    try:
+        create_llmisvc(kserve_client, llm_isvc)
+        wait_for_model_response(
+            kserve_client,
+            service_name,
+            KSERVE_TEST_NAMESPACE,
+            model_name=get_model_name_from_configs(config_names),
+        )
+    except Exception as e:
+        print(f"ERROR: Failed to call llm inference service {service_name}: {e}")
+        collect_diagnostics(service_name, KSERVE_TEST_NAMESPACE)
+        raise
+    finally:
+        try:
+            delete_llmisvc(kserve_client, service_name, KSERVE_TEST_NAMESPACE)
+        except Exception as e:
+            print(f"Warning: Failed to cleanup service {service_name}: {e}")
+
+
+def create_llmisvc(kserve_client, llm_isvc, namespace=None):
+    from kserve.utils import utils
+
+    version = llm_isvc.api_version.split("/")[1]
+
+    if namespace is None:
+        namespace = utils.get_isvc_namespace(llm_isvc)
+
+    try:
+        outputs = kserve_client.api_instance.create_namespaced_custom_object(
+            constants.KSERVE_GROUP,
+            version,
+            namespace,
+            KSERVE_PLURAL_LLMINFERENCESERVICE,
+            llm_isvc,
+        )
+        return outputs
+    except client.rest.ApiException as e:
+        raise RuntimeError(
+            f"Exception when calling CustomObjectsApi->"
+            f"create_namespaced_custom_object for LLMInferenceService: {e}"
+        ) from e
+
+
+def delete_llmisvc(
+    kserve_client, name, namespace, version=constants.KSERVE_V1ALPHA1_VERSION
+):
+    try:
+        return kserve_client.api_instance.delete_namespaced_custom_object(
+            constants.KSERVE_GROUP,
+            version,
+            namespace,
+            KSERVE_PLURAL_LLMINFERENCESERVICE,
+            name,
+        )
+    except client.rest.ApiException as e:
+        raise RuntimeError(
+            f"Exception when calling CustomObjectsApi->"
+            f"delete_namespaced_custom_object for LLMInferenceService: {e}"
+        ) from e
+
+
+def get_llmisvc(
+    kserve_client, name, namespace, version=constants.KSERVE_V1ALPHA1_VERSION
+):
+    try:
+        return kserve_client.api_instance.get_namespaced_custom_object(
+            constants.KSERVE_GROUP,
+            version,
+            namespace,
+            KSERVE_PLURAL_LLMINFERENCESERVICE,
+            name,
+        )
+    except client.rest.ApiException as e:
+        raise RuntimeError(
+            f"Exception when calling CustomObjectsApi->"
+            f"get_namespaced_custom_object for LLMInferenceService: {e}"
+        ) from e
+
+
+def wait_for_model_response(
+    kserve_client,
+    name,
+    namespace,
+    timeout_seconds=600,
+    version=constants.KSERVE_V1ALPHA1_VERSION,
+    model_name=None,
+):
+    if model_name is None:
+        model_name = "default-model"
+
+    service_url = None
+
+    def assert_model_responds():
+        nonlocal service_url
+
+        try:
+            service_url = get_llm_service_url(kserve_client, name, namespace, version)
+        except Exception as e:
+            raise AssertionError(f"Failed to get service URL: {e}") from e
+
+        completion_url = f"{service_url}/v1/completions"
+        test_payload = {"model": model_name, "prompt": "test", "max_tokens": 1}
+
+        try:
+            response = requests.post(
+                completion_url,
+                headers={"Content-Type": "application/json"},
+                json=test_payload,
+                timeout=30,
+            )
+        except Exception as e:
+            raise AssertionError(f"Failed to call model: {e}") from e
+
+        assert (
+            response.status_code == 200
+        ), f"Service returned {response.status_code}: {response.text}"
+        return service_url
+
+    return wait_for(assert_model_responds, timeout=timeout_seconds, interval=10.0)
+
+
+def get_llm_service_url(
+    kserve_client, service_name, namespace, version=constants.KSERVE_V1ALPHA1_VERSION
+):
+    try:
+        llm_isvc = get_llmisvc(kserve_client, service_name, namespace, version)
+
+        if "status" not in llm_isvc:
+            raise ValueError(f"No status found in LLM inference service {service_name}")
+
+        status = llm_isvc["status"]
+
+        if "url" in status and status["url"]:
+            return status["url"]
+
+        if (
+            "addresses" in status
+            and status["addresses"]
+            and len(status["addresses"]) > 0
+        ):
+            first_address = status["addresses"][0]
+            if "url" in first_address:
+                return first_address["url"]
+
+        raise ValueError(f"No URL found in LLM inference service {service_name} status")
+
+    except Exception as e:
+        raise ValueError(
+            f"Failed to get URL for LLM inference service {service_name}: {e}"
+        ) from e
+
+
+def wait_for(assertion_fn, timeout: float = 5.0, interval: float = 0.1):
+    deadline = time.time() + timeout
+    while True:
+        try:
+            return assertion_fn()
+        except AssertionError:
+            if time.time() >= deadline:
+                raise
+            time.sleep(interval)
+
+
+def get_model_name_from_configs(config_names):
+    """Extract model name from model config."""
+    for config_name in config_names:
+        if config_name.startswith("model-"):
+            config = LLMINFERENCESERVICE_CONFIGS[config_name]
+            if "model" in config and "name" in config["model"]:
+                return config["model"]["name"]
+    return "default-model"
+
+
+def generate_service_name(test_name, config_names):
+    base_name = test_name.split("[")[0]  # Remove everything after [
+    base_name = base_name.replace("test_", "").replace("_", "-")
+    config_suffix = "-".join(sorted(config_names))
+    service_name = f"{base_name}-{config_suffix}"
+    service_name = service_name.lower()
+    service_name = service_name[:63].rstrip("-")
+    return service_name
+
+
+def collect_diagnostics(service_name, namespace):
+    try:
+        kserve_client = KServeClient(
+            config_file=os.environ.get("KUBECONFIG", "~/.kube/config")
+        )
+
+        print(f"\n{'='*60}")
+        print(f"DIAGNOSTIC INFORMATION FOR {service_name} in {namespace}")
+        print(f"{'='*60}")
+
+        print("\n--- LLM Inference Service ---")
+        try:
+            llm_isvc = get_llmisvc(kserve_client, service_name, namespace)
+            print(json.dumps(llm_isvc, indent=2, default=str))
+        except Exception as e:
+            print(f"Failed to get LLM inference service: {e}")
+
+        print("\n--- Events ---")
+        try:
+            core_v1 = client.CoreV1Api()
+            events = core_v1.list_namespaced_event(
+                namespace=namespace,
+                field_selector=f"involvedObject.name={service_name}",
+            )
+            if events.items:
+                sorted_events = sorted(
+                    events.items,
+                    key=lambda x: x.last_timestamp or x.first_timestamp,
+                    reverse=True,
+                )
+                for event in sorted_events[:5]:
+                    timestamp = event.last_timestamp or event.first_timestamp
+                    print(f"  {event.type}: {event.reason} - {event.message}")
+                    print(f"    Time: {timestamp}")
+            else:
+                print("  No events found")
+        except Exception as e:
+            print(f"Failed to list events: {e}")
+
+        print(f"\n{'='*60}")
+
+    except Exception as e:
+        print(f"Failed to collect diagnostics: {e}")
diff --git a/test/e2e/pytest.ini b/test/e2e/pytest.ini
index 3407bbfbc27..9aba5ff4e3e 100644
--- a/test/e2e/pytest.ini
+++ b/test/e2e/pytest.ini
@@ -17,4 +17,5 @@ markers =
     path_based_routing: e2e tests for path based routing
     llm: e2e tests for huggingface runtime
     vllm: e2e tests for huggingface runtime with vllm-openvino backend
+    llminferenceservice: e2e tests for llm inference service controller
     modelcache: e2e tests for model caching
\ No newline at end of file
diff --git a/test/scripts/gh-actions/run-e2e-tests.sh b/test/scripts/gh-actions/run-e2e-tests.sh
index 98d6af06f6e..ee8794c8d23 100755
--- a/test/scripts/gh-actions/run-e2e-tests.sh
+++ b/test/scripts/gh-actions/run-e2e-tests.sh
@@ -35,6 +35,7 @@ pushd test/e2e >/dev/null
     echo "Skipping explainer tests for raw deployment with ingress"
     pytest -m "$MARKER" --ignore=qpext --log-cli-level=INFO -n $PARALLELISM --dist worksteal --network-layer $NETWORK_LAYER --ignore=explainer/
   else
+    echo "pytest -m '$MARKER' --ignore=qpext --log-cli-level=INFO -n $PARALLELISM --dist worksteal --network-layer $NETWORK_LAYER"
     pytest -m "$MARKER" --ignore=qpext --log-cli-level=INFO -n $PARALLELISM --dist worksteal --network-layer $NETWORK_LAYER
   fi
 popd

From d3b53d27c572effec46870888776baa7011b80f0 Mon Sep 17 00:00:00 2001
From: Bartosz Majsak <bartosz.majsak@gmail.com>
Date: Thu, 24 Jul 2025 17:01:55 +0200
Subject: [PATCH 02/38] fix: apply crds first and wait for them to be ready

Signed-off-by: Bartosz Majsak <bartosz.majsak@gmail.com>
---
 Makefile                                                 | 2 ++
 config/overlays/test/clusterresources/kustomization.yaml | 1 -
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index bc670f76435..5aa8fca8d9c 100644
--- a/Makefile
+++ b/Makefile
@@ -253,6 +253,8 @@ deploy-dev-llm:
 	./hack/deploy_dev_llm.sh
 
 deploy-ci: manifests
+	kubectl apply --server-side=true -k config/crd || true
+	kubectl wait --for=condition=established --timeout=60s crd/llminferenceserviceconfigs.serving.kserve.io
 	kubectl apply --server-side=true -k config/overlays/test
 	# TODO: Add runtimes as part of default deployment
 	kubectl wait --for=condition=ready pod -l control-plane=kserve-controller-manager -n kserve --timeout=300s
diff --git a/config/overlays/test/clusterresources/kustomization.yaml b/config/overlays/test/clusterresources/kustomization.yaml
index 520ae05cd67..ab1e56e51f6 100644
--- a/config/overlays/test/clusterresources/kustomization.yaml
+++ b/config/overlays/test/clusterresources/kustomization.yaml
@@ -4,7 +4,6 @@ kind: Kustomization
 resources:
 - ../../../runtimes
 - ../../../storagecontainers
-- ../../../llmisvc
 
 
 images:

From c7e76bd196eac7ad157dd3de7d59a2906215aa9c Mon Sep 17 00:00:00 2001
From: Bartosz Majsak <bartosz.majsak@gmail.com>
Date: Thu, 24 Jul 2025 18:58:16 +0200
Subject: [PATCH 03/38] chore: limit gh-action tests to cpu

Signed-off-by: Bartosz Majsak <bartosz.majsak@gmail.com>
---
 .github/workflows/e2e-test-llmisvc.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/e2e-test-llmisvc.yml b/.github/workflows/e2e-test-llmisvc.yml
index 09ec0603d40..576a5aae993 100644
--- a/.github/workflows/e2e-test-llmisvc.yml
+++ b/.github/workflows/e2e-test-llmisvc.yml
@@ -96,7 +96,8 @@ jobs:
       - name: Run E2E tests
         timeout-minutes: 30
         run: |
-          ./test/scripts/gh-actions/run-e2e-tests.sh "llminferenceservice" "1" "istio-gatewayapi-ext"
+          # Run only CPU tests for now
+          ./test/scripts/gh-actions/run-e2e-tests.sh "llminferenceservice(type='cpu')" 2 "istio-gatewayapi-ext"
 
       - name: Check system status
         if: always()

From d2adcf3817ef50b62ec363191e73234bec3b025c Mon Sep 17 00:00:00 2001
From: Bartosz Majsak <bartosz.majsak@gmail.com>
Date: Thu, 24 Jul 2025 20:28:06 +0200
Subject: [PATCH 04/38] fix: single worker job

Signed-off-by: Bartosz Majsak <bartosz.majsak@gmail.com>
---
 .github/workflows/e2e-test-llmisvc.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/e2e-test-llmisvc.yml b/.github/workflows/e2e-test-llmisvc.yml
index 576a5aae993..2249a6bde5c 100644
--- a/.github/workflows/e2e-test-llmisvc.yml
+++ b/.github/workflows/e2e-test-llmisvc.yml
@@ -97,7 +97,7 @@ jobs:
         timeout-minutes: 30
         run: |
           # Run only CPU tests for now
-          ./test/scripts/gh-actions/run-e2e-tests.sh "llminferenceservice(type='cpu')" 2 "istio-gatewayapi-ext"
+          ./test/scripts/gh-actions/run-e2e-tests.sh "llminferenceservice(type='cpu')" 1 "istio-gatewayapi-ext"
 
       - name: Check system status
         if: always()

From 5e0e87492f109c8af71b465725893e9fd4b14f99 Mon Sep 17 00:00:00 2001
From: Bartosz Majsak <bartosz.majsak@gmail.com>
Date: Fri, 25 Jul 2025 09:36:15 +0200
Subject: [PATCH 05/38] feat: introduces cluster capability markers

Signed-off-by: Bartosz Majsak <bartosz.majsak@gmail.com>
---
 .github/workflows/e2e-test-llmisvc.yml        |  5 ++-
 pkg/controller/llmisvc/DEV.md                 | 19 ++++++++-
 test/e2e/llmisvc/README.md                    | 41 ++++++++++++++++---
 test/e2e/llmisvc/conftest.py                  | 22 ++++++++++
 test/e2e/llmisvc/pytest.ini                   |  6 ++-
 test/e2e/llmisvc/test_configs.py              | 17 +++-----
 .../e2e/llmisvc/test_llm_inference_service.py | 13 +++---
 test/e2e/pytest.ini                           |  6 ++-
 8 files changed, 101 insertions(+), 28 deletions(-)

diff --git a/.github/workflows/e2e-test-llmisvc.yml b/.github/workflows/e2e-test-llmisvc.yml
index 2249a6bde5c..65a82d93279 100644
--- a/.github/workflows/e2e-test-llmisvc.yml
+++ b/.github/workflows/e2e-test-llmisvc.yml
@@ -96,8 +96,9 @@ jobs:
       - name: Run E2E tests
         timeout-minutes: 30
         run: |
-          # Run only CPU tests for now
-          ./test/scripts/gh-actions/run-e2e-tests.sh "llminferenceservice(type='cpu')" 1 "istio-gatewayapi-ext"
+          # Run only CPU tests for now using pytest markers
+          # Available GPU vendors: amd, nvidia, intel
+          ./test/scripts/gh-actions/run-e2e-tests.sh "llminferenceservice and cpu" 1 "istio-gatewayapi-ext"
 
       - name: Check system status
         if: always()
diff --git a/pkg/controller/llmisvc/DEV.md b/pkg/controller/llmisvc/DEV.md
index b0d893e46e9..722faf660cf 100644
--- a/pkg/controller/llmisvc/DEV.md
+++ b/pkg/controller/llmisvc/DEV.md
@@ -64,11 +64,26 @@ cd -
 Run the test
 
 ```shell
-./test/scripts/gh-actions/run-e2e-tests.sh "llminferenceservice(type='cpu')" 1 "istio-gatewayapi-ext"
+# Use pytest markers for filtering
+
+# Run only CPU tests
+./test/scripts/gh-actions/run-e2e-tests.sh "llminferenceservice and cluster_cpu" 1 "istio-gatewayapi-ext"
+
+# Run only NVIDIA GPU tests
+./test/scripts/gh-actions/run-e2e-tests.sh "llminferenceservice and cluster_nvidia" 1 "istio-gatewayapi-ext"
+
+# Run all GPU tests (any vendor: amd, nvidia, intel)
+./test/scripts/gh-actions/run-e2e-tests.sh "llminferenceservice and (cluster_amd or cluster_nvidia or cluster_intel)" 1 "istio-gatewayapi-ext"
+
+# Run CPU and AMD GPU tests only
+./test/scripts/gh-actions/run-e2e-tests.sh "llminferenceservice and (cluster_cpu or cluster_amd)" 1 "istio-gatewayapi-ext"
+
+# Run all LLM inference service tests
+./test/scripts/gh-actions/run-e2e-tests.sh "llminferenceservice" 1 "istio-gatewayapi-ext"
 
 Starting E2E functional tests ...
 No parallelism requested for pytest. Will use default value of 1
-pytest -m 'llminferenceservice(type='cpu')' --ignore=qpext --log-cli-level=INFO -n 1 --dist worksteal --network-layer istio-gatewayapi-ext
+pytest -m 'llminferenceservice and cluster_cpu' --ignore=qpext --log-cli-level=INFO -n 1 --dist worksteal --network-layer istio-gatewayapi-ext
 ===================================================================================== test session starts =====================================================================================
 platform linux -- Python 3.12.11, pytest-8.4.1, pluggy-1.6.0
 rootdir: /home/bartek/code/redhat/model-serving/kserve/kserve-test/test/e2e
diff --git a/test/e2e/llmisvc/README.md b/test/e2e/llmisvc/README.md
index 2d2fdefb5d8..ba0efc76ca2 100644
--- a/test/e2e/llmisvc/README.md
+++ b/test/e2e/llmisvc/README.md
@@ -4,15 +4,38 @@
 
 Tests combine config fragments from different categories to create complete scenarios:
 ```python
-["router-managed", "workload-single-cpu", "model-fb-opt-125m"]
+pytest.param(["router-managed", "workload-single-cpu", "model-fb-opt-125m"], marks=pytest.mark.cluster_cpu)
 ```
 
 The `llm_config_factory` fixture automatically creates/cleans up `LLMInferenceServiceConfig` objects.
 
-## Markers
+## Test Filtering
 
-- `@pytest.mark.llminferenceservice(type="cpu")` - Resource type for selective test execution
-- Use `pytest -m "llminferenceservice and cpu"` to run specific resource tests
+Tests are marked with both general and cluster-specific capability markers:
+
+- `@pytest.mark.llminferenceservice` - All LLM inference service tests
+- `@pytest.mark.cluster_cpu` - CPU-only tests  
+- `@pytest.mark.cluster_amd` - AMD GPU tests
+- `@pytest.mark.cluster_nvidia` - NVIDIA GPU tests
+- `@pytest.mark.cluster_intel` - Intel GPU tests
+
+Examples:
+```bash
+# Run all LLM inference service tests
+pytest -m "llminferenceservice" test/e2e/llmisvc/
+
+# Run only CPU tests
+pytest -m "llminferenceservice and cluster_cpu" test/e2e/llmisvc/
+
+# Run only NVIDIA GPU tests
+pytest -m "llminferenceservice and cluster_nvidia" test/e2e/llmisvc/
+
+# Run all GPU tests (any vendor)
+pytest -m "llminferenceservice and (cluster_amd or cluster_nvidia or cluster_intel)" test/e2e/llmisvc/
+
+# Run CPU and AMD GPU tests only
+pytest -m "llminferenceservice and (cluster_cpu or cluster_amd)" test/e2e/llmisvc/
+```
 
 ## Config Naming Convention
 
@@ -22,7 +45,15 @@ Use prefixed categories that get composed together:
 - **`model-*`**: Model sources (e.g., `model-fb-opt-125m`, `model-gpt2`) 
 - **`router-*`**: Routing configs (e.g., `router-managed`, `router-with-scheduler`)
 
+Test IDs are generated by combining the cluster capability from pytest marks with all config names:
+- Test ID format: `{cluster_capability}-{config1}-{config2}-{config3}`
+- Example: `cluster_cpu-router-managed-workload-single-cpu-model-fb-opt-125m`
+
 ## Adding New Configs
 
 1. Add to `LLMINFERENCESERVICE_CONFIGS` in `test_configs.py`
-2. Follow `category-descriptor` naming (prefix automatically stripped from test IDs) 
\ No newline at end of file
+2. Follow `category-descriptor` naming (prefix automatically stripped from test IDs)
+3. Add new cluster capability test cases using `pytest.param` with appropriate marks:
+   ```python
+   pytest.param(["router-managed", "workload-nvidia-a100-gpu", "model-llama-70b"], marks=pytest.mark.cluster_qualcomm),
+   ``` 
\ No newline at end of file
diff --git a/test/e2e/llmisvc/conftest.py b/test/e2e/llmisvc/conftest.py
index 8722251fc6f..cdbad281060 100644
--- a/test/e2e/llmisvc/conftest.py
+++ b/test/e2e/llmisvc/conftest.py
@@ -19,6 +19,28 @@
 def config_names(request):
     return request.param
 
+def pytest_collection_modifyitems(config, items):
+    for item in items:
+        # only touch parameterized tests
+        if not hasattr(item, "callspec"):
+            continue
+
+        # if there's no [...] suffix (i.e. not parametrized), skip
+        if "[" not in item.nodeid:
+            continue
+        base, rest = item.nodeid.split("[", 1)
+        rest = rest.rstrip("]")
+
+        cluster_marks = [
+            m.name
+            for m in item.iter_markers()
+            if m.name.startswith("cluster_")
+        ]
+        if not cluster_marks:
+            continue
+
+        new_id = "-".join(cluster_marks + [rest])
+        item._nodeid = f"{base}[{new_id}]"
 
 def pytest_configure(config):
     config.addinivalue_line(
diff --git a/test/e2e/llmisvc/pytest.ini b/test/e2e/llmisvc/pytest.ini
index c1a0cbaa961..3c88282f9bc 100644
--- a/test/e2e/llmisvc/pytest.ini
+++ b/test/e2e/llmisvc/pytest.ini
@@ -1,4 +1,4 @@
-[tool:pytest]
+[pytest]
 testpaths = .
 python_files = test_*.py
 python_functions = test_*
@@ -10,5 +10,9 @@ addopts =
     --disable-warnings
 markers =
     llminferenceservice: LLM inference service tests
+    cluster.cpu: CPU tests
+    cluster.amd: AMD tests
+    cluster.intel: Intel tests
+    cluster.nvidia: NVIDIA tests
     asyncio: AsyncIO tests
 asyncio_mode = auto 
\ No newline at end of file
diff --git a/test/e2e/llmisvc/test_configs.py b/test/e2e/llmisvc/test_configs.py
index 87e6da0a89a..259ed056e78 100644
--- a/test/e2e/llmisvc/test_configs.py
+++ b/test/e2e/llmisvc/test_configs.py
@@ -58,17 +58,6 @@
     },
 }
 
-
-def generate_test_id(config_names):
-    """Generate a test ID from config names by removing prefixes."""
-    parts = []
-    for config in config_names:
-        if "-" in config:
-            parts.append(config.split("-", 1)[1])  # Remove first prefix-
-        else:
-            parts.append(config)
-    return "-".join(parts)
-
 @pytest.fixture(scope="session")
 def llm_config_factory():
     """Factory for creating/cleaning LLMInferenceServiceConfig once per session."""
@@ -120,6 +109,12 @@ def _create_configs(names, namespace=KSERVE_TEST_NAMESPACE):
         except Exception:
             pass
 
+def generate_test_id(config_names):
+    """Generate a test ID from config names by removing prefixes."""
+    parts = []
+    for config in config_names:
+        parts.append(config)
+    return "-".join(parts)
 
 def create_llmisvc_config(kserve_client, llm_config, namespace=None):
     version = llm_config["apiVersion"].split("/")[1]
diff --git a/test/e2e/llmisvc/test_llm_inference_service.py b/test/e2e/llmisvc/test_llm_inference_service.py
index 9a3ddd2f06d..64f32a54bf0 100644
--- a/test/e2e/llmisvc/test_llm_inference_service.py
+++ b/test/e2e/llmisvc/test_llm_inference_service.py
@@ -31,18 +31,19 @@
 KSERVE_PLURAL_LLMINFERENCESERVICE = "llminferenceservices"
 
 
-@pytest.mark.llminferenceservice(type="cpu")
+@pytest.mark.llminferenceservice
 @pytest.mark.asyncio(scope="session")
 @pytest.mark.parametrize(
     "config_names",
     [
-        ["router-managed", "workload-single-cpu", "model-fb-opt-125m"],
+        pytest.param(["router-managed", "workload-single-cpu", "model-fb-opt-125m"], marks=pytest.mark.cluster_cpu),
+        pytest.param(["router-managed", "workload-amd-gpu", "model-fb-opt-125m"], marks=pytest.mark.cluster_amd),
     ],
-    indirect=True,
+    indirect=["config_names"],
     ids=generate_test_id,
 )
 async def test_llm_inference_service(request, llm_config_factory, config_names):
-    created_config_names = llm_config_factory(config_names)
+    created_service_configs = llm_config_factory(config_names)
     service_name = generate_service_name(request.node.name, config_names)
 
     llm_isvc = V1alpha1LLMInferenceService(
@@ -52,8 +53,8 @@ async def test_llm_inference_service(request, llm_config_factory, config_names):
             name=service_name, namespace=KSERVE_TEST_NAMESPACE
         ),
         spec={
-            "replicas": 1,
-            "baseRefs": [{"name": config_name} for config_name in created_config_names],
+
+            "baseRefs": [{"name": config_name} for config_name in created_service_configs],
         },
     )
 
diff --git a/test/e2e/pytest.ini b/test/e2e/pytest.ini
index 9aba5ff4e3e..92c33eb8a26 100644
--- a/test/e2e/pytest.ini
+++ b/test/e2e/pytest.ini
@@ -18,4 +18,8 @@ markers =
     llm: e2e tests for huggingface runtime
     vllm: e2e tests for huggingface runtime with vllm-openvino backend
     llminferenceservice: e2e tests for llm inference service controller
-    modelcache: e2e tests for model caching
\ No newline at end of file
+    modelcache: e2e tests for model caching
+    cluster_cpu: test targeting cluster with CPU
+    cluster_amd: test targeting cluster with AMD
+    cluster_intel: test targeting cluster with Intel
+    cluster_nvidia: test targeting cluster with NVIDIA
\ No newline at end of file

From 2752ce177d747863eaa72a1e0a1227ba0c01fedc Mon Sep 17 00:00:00 2001
From: Bartosz Majsak <bartosz.majsak@gmail.com>
Date: Thu, 24 Jul 2025 18:43:45 +0200
Subject: [PATCH 06/38] chore: excludes tests from flake8

Signed-off-by: Bartosz Majsak <bartosz.majsak@gmail.com>
---
 .flake8 | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.flake8 b/.flake8
index 83642179983..1037518bd0e 100644
--- a/.flake8
+++ b/.flake8
@@ -16,3 +16,4 @@ exclude =
   python/*_pb2.py
   docs/**/*.py
   python/kserve/kserve/protocol/rest/openai/types/openapi.py
+  test/e2e/llmisvc/**.py

From 20e58693f053eb2f30274269c54a3b94ac06106c Mon Sep 17 00:00:00 2001
From: Bartosz Majsak <bartosz.majsak@gmail.com>
Date: Thu, 24 Jul 2025 18:58:57 +0200
Subject: [PATCH 07/38] chore: precommit fixes

Signed-off-by: Bartosz Majsak <bartosz.majsak@gmail.com>

; Conflicts:
;	test/e2e/llmisvc/test_configs.py

Signed-off-by: Bartosz Majsak <bartosz.majsak@gmail.com>
---
 test/e2e/llmisvc/test_configs.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/test/e2e/llmisvc/test_configs.py b/test/e2e/llmisvc/test_configs.py
index 259ed056e78..8b55eba58ed 100644
--- a/test/e2e/llmisvc/test_configs.py
+++ b/test/e2e/llmisvc/test_configs.py
@@ -75,16 +75,20 @@ def _create_configs(names, namespace=KSERVE_TEST_NAMESPACE):
                 get_llmisvc_config(client, name, namespace)
                 continue
             except Exception as e:
-                is_404_api = isinstance(e, ApiException) and getattr(e, "status", None) == 404
-                is_404_runtime = isinstance(e, RuntimeError) and "not found" in str(e).lower()
+                is_404_api = (
+                    isinstance(e, ApiException) and getattr(e, "status", None) == 404
+                )
+                is_404_runtime = (
+                    isinstance(e, RuntimeError) and "not found" in str(e).lower()
+                )
                 if not (is_404_api or is_404_runtime):
                     raise
 
             body = {
                 "apiVersion": "serving.kserve.io/v1alpha1",
-                "kind":       "LLMInferenceServiceConfig",
-                "metadata":   {"name": name, "namespace": namespace},
-                "spec":       spec,
+                "kind": "LLMInferenceServiceConfig",
+                "metadata": {"name": name, "namespace": namespace},
+                "spec": spec,
             }
 
             try:

From 5aceb3bda68540213ceb2aca5bfb1ada6861c8b7 Mon Sep 17 00:00:00 2001
From: Bartosz Majsak <bartosz.majsak@gmail.com>
Date: Fri, 25 Jul 2025 09:46:05 +0200
Subject: [PATCH 08/38] chore: no need for local pytest.ini as its ignored
 anyway

Signed-off-by: Bartosz Majsak <bartosz.majsak@gmail.com>
---
 test/e2e/llmisvc/pytest.ini | 18 ------------------
 1 file changed, 18 deletions(-)
 delete mode 100644 test/e2e/llmisvc/pytest.ini

diff --git a/test/e2e/llmisvc/pytest.ini b/test/e2e/llmisvc/pytest.ini
deleted file mode 100644
index 3c88282f9bc..00000000000
--- a/test/e2e/llmisvc/pytest.ini
+++ /dev/null
@@ -1,18 +0,0 @@
-[pytest]
-testpaths = .
-python_files = test_*.py
-python_functions = test_*
-python_classes = Test*
-addopts = 
-    -v
-    --tb=short
-    --strict-markers
-    --disable-warnings
-markers =
-    llminferenceservice: LLM inference service tests
-    cluster.cpu: CPU tests
-    cluster.amd: AMD tests
-    cluster.intel: Intel tests
-    cluster.nvidia: NVIDIA tests
-    asyncio: AsyncIO tests
-asyncio_mode = auto 
\ No newline at end of file

From 5299d05b418099af930c4acb4acc5f3b7436a756 Mon Sep 17 00:00:00 2001
From: Bartosz Majsak <bartosz.majsak@gmail.com>
Date: Fri, 25 Jul 2025 09:52:15 +0200
Subject: [PATCH 09/38] lint: adds possibility to ignore unused warnings

When using with test factories in other pkg this yields false negatives.

#upstream

Signed-off-by: Bartosz Majsak <bartosz.majsak@gmail.com>
---
 .flake8                                        | 3 +--
 test/e2e/llmisvc/test_llm_inference_service.py | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/.flake8 b/.flake8
index 1037518bd0e..84fed34e69c 100644
--- a/.flake8
+++ b/.flake8
@@ -1,6 +1,6 @@
 [flake8]
 max-line-length = 120
-extend-ignore = E203,E501,E701, B017
+extend-ignore = E203,E501,E701,B017,F401,F811
 exclude =
   .venv,
   venv,
@@ -16,4 +16,3 @@ exclude =
   python/*_pb2.py
   docs/**/*.py
   python/kserve/kserve/protocol/rest/openai/types/openapi.py
-  test/e2e/llmisvc/**.py
diff --git a/test/e2e/llmisvc/test_llm_inference_service.py b/test/e2e/llmisvc/test_llm_inference_service.py
index 64f32a54bf0..d9182bdda8f 100644
--- a/test/e2e/llmisvc/test_llm_inference_service.py
+++ b/test/e2e/llmisvc/test_llm_inference_service.py
@@ -24,7 +24,7 @@
 from .test_configs import (
     LLMINFERENCESERVICE_CONFIGS,
     generate_test_id,
-    llm_config_factory,
+    llm_config_factory,  # noqa: F401,F811
     KSERVE_TEST_NAMESPACE,
 )
 

From 2b3a4c60c20b46a08e2a4268843f6a2d33bbdc84 Mon Sep 17 00:00:00 2001
From: Bartosz Majsak <bartosz.majsak@gmail.com>
Date: Fri, 25 Jul 2025 09:52:24 +0200
Subject: [PATCH 10/38] precommit fixes

Signed-off-by: Bartosz Majsak <bartosz.majsak@gmail.com>
---
 test/e2e/llmisvc/conftest.py                   |  6 +++---
 test/e2e/llmisvc/test_configs.py               |  3 +++
 test/e2e/llmisvc/test_llm_inference_service.py | 15 +++++++++++----
 3 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/test/e2e/llmisvc/conftest.py b/test/e2e/llmisvc/conftest.py
index cdbad281060..a22fdb37b55 100644
--- a/test/e2e/llmisvc/conftest.py
+++ b/test/e2e/llmisvc/conftest.py
@@ -19,6 +19,7 @@
 def config_names(request):
     return request.param
 
+
 def pytest_collection_modifyitems(config, items):
     for item in items:
         # only touch parameterized tests
@@ -32,9 +33,7 @@ def pytest_collection_modifyitems(config, items):
         rest = rest.rstrip("]")
 
         cluster_marks = [
-            m.name
-            for m in item.iter_markers()
-            if m.name.startswith("cluster_")
+            m.name for m in item.iter_markers() if m.name.startswith("cluster_")
         ]
         if not cluster_marks:
             continue
@@ -42,6 +41,7 @@ def pytest_collection_modifyitems(config, items):
         new_id = "-".join(cluster_marks + [rest])
         item._nodeid = f"{base}[{new_id}]"
 
+
 def pytest_configure(config):
     config.addinivalue_line(
         "markers", "llminferenceservice: mark test as an LLM inference service test"
diff --git a/test/e2e/llmisvc/test_configs.py b/test/e2e/llmisvc/test_configs.py
index 8b55eba58ed..774fb94047e 100644
--- a/test/e2e/llmisvc/test_configs.py
+++ b/test/e2e/llmisvc/test_configs.py
@@ -58,6 +58,7 @@
     },
 }
 
+
 @pytest.fixture(scope="session")
 def llm_config_factory():
     """Factory for creating/cleaning LLMInferenceServiceConfig once per session."""
@@ -113,6 +114,7 @@ def _create_configs(names, namespace=KSERVE_TEST_NAMESPACE):
         except Exception:
             pass
 
+
 def generate_test_id(config_names):
     """Generate a test ID from config names by removing prefixes."""
     parts = []
@@ -120,6 +122,7 @@ def generate_test_id(config_names):
         parts.append(config)
     return "-".join(parts)
 
+
 def create_llmisvc_config(kserve_client, llm_config, namespace=None):
     version = llm_config["apiVersion"].split("/")[1]
 
diff --git a/test/e2e/llmisvc/test_llm_inference_service.py b/test/e2e/llmisvc/test_llm_inference_service.py
index d9182bdda8f..8dcb292062c 100644
--- a/test/e2e/llmisvc/test_llm_inference_service.py
+++ b/test/e2e/llmisvc/test_llm_inference_service.py
@@ -36,8 +36,14 @@
 @pytest.mark.parametrize(
     "config_names",
     [
-        pytest.param(["router-managed", "workload-single-cpu", "model-fb-opt-125m"], marks=pytest.mark.cluster_cpu),
-        pytest.param(["router-managed", "workload-amd-gpu", "model-fb-opt-125m"], marks=pytest.mark.cluster_amd),
+        pytest.param(
+            ["router-managed", "workload-single-cpu", "model-fb-opt-125m"],
+            marks=pytest.mark.cluster_cpu,
+        ),
+        pytest.param(
+            ["router-managed", "workload-amd-gpu", "model-fb-opt-125m"],
+            marks=pytest.mark.cluster_amd,
+        ),
     ],
     indirect=["config_names"],
     ids=generate_test_id,
@@ -53,8 +59,9 @@ async def test_llm_inference_service(request, llm_config_factory, config_names):
             name=service_name, namespace=KSERVE_TEST_NAMESPACE
         ),
         spec={
-
-            "baseRefs": [{"name": config_name} for config_name in created_service_configs],
+            "baseRefs": [
+                {"name": config_name} for config_name in created_service_configs
+            ],
         },
     )
 

From 7b6f752b73d705ddcb6a9983c148906146601691 Mon Sep 17 00:00:00 2001
From: Bartosz Majsak <bartosz.majsak@gmail.com>
Date: Fri, 25 Jul 2025 10:00:20 +0200
Subject: [PATCH 11/38] Fail early on CRDs

Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
Signed-off-by: Bartosz Majsak <bartosz.majsak@gmail.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 5aa8fca8d9c..4b3efdc226d 100644
--- a/Makefile
+++ b/Makefile
@@ -253,7 +253,7 @@ deploy-dev-llm:
 	./hack/deploy_dev_llm.sh
 
 deploy-ci: manifests
-	kubectl apply --server-side=true -k config/crd || true
+	kubectl apply --server-side=true -k config/crd
 	kubectl wait --for=condition=established --timeout=60s crd/llminferenceserviceconfigs.serving.kserve.io
 	kubectl apply --server-side=true -k config/overlays/test
 	# TODO: Add runtimes as part of default deployment

From 1b0747198de8e042df0c82d61c725ea7341b6f7c Mon Sep 17 00:00:00 2001
From: Bartosz Majsak <bartosz.majsak@gmail.com>
Date: Fri, 25 Jul 2025 10:03:51 +0200
Subject: [PATCH 12/38] chore: test/e2e/llmisvc/README.md

Signed-off-by: Bartosz Majsak <bartosz.majsak@gmail.com>
---
 test/e2e/llmisvc/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/e2e/llmisvc/README.md b/test/e2e/llmisvc/README.md
index ba0efc76ca2..d9c7f8ebddb 100644
--- a/test/e2e/llmisvc/README.md
+++ b/test/e2e/llmisvc/README.md
@@ -55,5 +55,5 @@ Test IDs are generated by combining the cluster capability from pytest marks wit
 2. Follow `category-descriptor` naming (prefix automatically stripped from test IDs)
 3. Add new cluster capability test cases using `pytest.param` with appropriate marks:
    ```python
-   pytest.param(["router-managed", "workload-nvidia-a100-gpu", "model-llama-70b"], marks=pytest.mark.cluster_qualcomm),
+   pytest.param(["router-managed", "workload-nvidia-a100-gpu", "model-llama-70b"], marks=pytest.mark.cluster_nvidia),
    ``` 
\ No newline at end of file

From 1121cd42f782bf299a0956d850dc1fba67acefd9 Mon Sep 17 00:00:00 2001
From: Bartosz Majsak <bartosz.majsak@gmail.com>
Date: Fri, 25 Jul 2025 19:54:22 +0200
Subject: [PATCH 13/38] chore: simplifies test fixtures

Signed-off-by: Bartosz Majsak <bartosz.majsak@gmail.com>
---
 test/e2e/llmisvc/conftest.py                  |   9 +-
 .../llmisvc/{test_configs.py => fixtures.py}  |  72 ++++++--
 .../e2e/llmisvc/test_llm_inference_service.py | 172 ++++++++----------
 3 files changed, 133 insertions(+), 120 deletions(-)
 rename test/e2e/llmisvc/{test_configs.py => fixtures.py} (74%)

diff --git a/test/e2e/llmisvc/conftest.py b/test/e2e/llmisvc/conftest.py
index a22fdb37b55..ad950aedf93 100644
--- a/test/e2e/llmisvc/conftest.py
+++ b/test/e2e/llmisvc/conftest.py
@@ -12,14 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import pytest
-
-
-@pytest.fixture
-def config_names(request):
-    return request.param
-
 
+# This hook is used to ensure that the test names are unique and to ensure that
+# the test names are consistent with the cluster marks.
 def pytest_collection_modifyitems(config, items):
     for item in items:
         # only touch parameterized tests
diff --git a/test/e2e/llmisvc/test_configs.py b/test/e2e/llmisvc/fixtures.py
similarity index 74%
rename from test/e2e/llmisvc/test_configs.py
rename to test/e2e/llmisvc/fixtures.py
index 774fb94047e..c8f0a3f8886 100644
--- a/test/e2e/llmisvc/test_configs.py
+++ b/test/e2e/llmisvc/fixtures.py
@@ -12,11 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import uuid
 import os
 import pytest
+from typing import List
 from kubernetes import client
 from kubernetes.client.rest import ApiException
-from kserve import KServeClient, constants
+from kserve import KServeClient, constants, V1alpha1LLMInferenceService
 
 KSERVE_PLURAL_LLMINFERENCESERVICECONFIG = "llminferenceserviceconfigs"
 KSERVE_TEST_NAMESPACE = "kserve-ci-e2e-test"
@@ -51,7 +53,7 @@
     },
     "router-with-scheduler": {
         "router": {
-            "scheduler": {"pool": {}, "template": {}},
+            "scheduler": {},
             "route": {},
             "gateway": {},
         },
@@ -59,14 +61,36 @@
 }
 
 
-@pytest.fixture(scope="session")
+@pytest.fixture(scope="function")
+def test_case(request):
+    tc = request.param
+    
+    service_name = generate_service_name(request.node.name, tc.base_refs)
+    tc.model_name = get_model_name_from_configs(tc.base_refs)
+
+    tc.llm_service = V1alpha1LLMInferenceService(
+        api_version="serving.kserve.io/v1alpha1",
+        kind="LLMInferenceService",
+        metadata=client.V1ObjectMeta(
+            name=service_name, namespace=KSERVE_TEST_NAMESPACE
+        ),
+        spec={
+            "baseRefs": [
+                {"name": base_ref} for base_ref in tc.base_refs
+            ],
+        },
+    )
+    
+    return tc
+
+@pytest.fixture(scope="session", autouse=True)
 def llm_config_factory():
     """Factory for creating/cleaning LLMInferenceServiceConfig once per session."""
     created = []
     client = KServeClient(config_file=os.environ.get("KUBECONFIG", "~/.kube/config"))
 
-    def _create_configs(names, namespace=KSERVE_TEST_NAMESPACE):
-        for name in names:
+    def _create_configs(namespace=KSERVE_TEST_NAMESPACE):
+        for name in LLMINFERENCESERVICE_CONFIGS:
             if name not in LLMINFERENCESERVICE_CONFIGS:
                 raise ValueError(f"Unknown config name: {name}")
 
@@ -103,9 +127,8 @@ def _create_configs(names, namespace=KSERVE_TEST_NAMESPACE):
                 # otherwise, real error
                 raise
 
-        return names
 
-    yield _create_configs
+    yield _create_configs()
 
     # teardown: best‑effort cleanup
     for name, namespace in created:
@@ -114,13 +137,34 @@ def _create_configs(names, namespace=KSERVE_TEST_NAMESPACE):
         except Exception:
             pass
 
-
-def generate_test_id(config_names):
-    """Generate a test ID from config names by removing prefixes."""
-    parts = []
-    for config in config_names:
-        parts.append(config)
-    return "-".join(parts)
+def get_model_name_from_configs(config_names):
+    """Extract model name from model config."""
+    for config_name in config_names:
+        if config_name.startswith("model-"):
+            config = LLMINFERENCESERVICE_CONFIGS[config_name]
+            if "model" in config and "name" in config["model"]:
+                return config["model"]["name"]
+    return "default-model"
+
+def generate_service_name(test_name: str, base_refs: List[str]) -> str:
+    base_name = test_name.split("[", 1)[0]
+    base_name = base_name.replace("test_", "")
+    base_name = base_name.replace("_", "-")
+    config_suffix = "-".join(sorted(base_refs))
+    test_case = f"{base_name}-{config_suffix}".lower()
+
+    uid = uuid.uuid4().hex[:8]
+
+    max_total = 63
+    sep = "-"
+    max_test_case = max_total - len(sep) - len(uid)
+    test_case = test_case[:max_test_case].rstrip(sep)
+
+    return f"{test_case}{sep}{uid}"
+
+def generate_test_id(test_case) -> str:
+    """Generate a test ID from base refs."""
+    return "-".join(test_case.base_refs)
 
 
 def create_llmisvc_config(kserve_client, llm_config, namespace=None):
diff --git a/test/e2e/llmisvc/test_llm_inference_service.py b/test/e2e/llmisvc/test_llm_inference_service.py
index 8dcb292062c..b60d09531d9 100644
--- a/test/e2e/llmisvc/test_llm_inference_service.py
+++ b/test/e2e/llmisvc/test_llm_inference_service.py
@@ -15,92 +15,98 @@
 import json
 import os
 import time
+from dataclasses import dataclass
+from typing import Any, Callable, List, Optional
 
 import pytest
 import requests
 from kubernetes import client
 from kserve import KServeClient, V1alpha1LLMInferenceService, constants
 
-from .test_configs import (
+from .fixtures import (
     LLMINFERENCESERVICE_CONFIGS,
     generate_test_id,
     llm_config_factory,  # noqa: F401,F811
+    test_case,  # noqa: F401,F811
     KSERVE_TEST_NAMESPACE,
 )
 
 KSERVE_PLURAL_LLMINFERENCESERVICE = "llminferenceservices"
 
 
+def assert_200(response: requests.Response) -> None:
+    """Default response assertion that checks for 200 status code."""
+    assert (
+        response.status_code == 200
+    ), f"Service returned {response.status_code}: {response.text}"
+
+
+@dataclass
+class Case:
+    """Test case configuration for LLM inference service tests."""
+    base_refs: List[str]
+    prompt: str = "Boston is a"
+    max_tokens: int = 1
+    response_assertion: Callable[[requests.Response], None] = assert_200
+    llm_service: V1alpha1LLMInferenceService = None # Generated by llm_service_factory
+    model_name: str = "default/model"
+
+
 @pytest.mark.llminferenceservice
-@pytest.mark.asyncio(scope="session")
+@pytest.mark.asyncio(loop_scope="session")
 @pytest.mark.parametrize(
-    "config_names",
+    "test_case",
     [
         pytest.param(
-            ["router-managed", "workload-single-cpu", "model-fb-opt-125m"],
+            Case(["router-managed", "workload-single-cpu", "model-fb-opt-125m"]),
             marks=pytest.mark.cluster_cpu,
         ),
         pytest.param(
-            ["router-managed", "workload-amd-gpu", "model-fb-opt-125m"],
-            marks=pytest.mark.cluster_amd,
+            Case(
+                base_refs=["router-managed", "workload-single-cpu", "model-fb-opt-125m"],
+                prompt="What is the capital of France?",
+                response_assertion=lambda response: (
+                    response.status_code == 200 
+                    and response.json().get("choices") is not None
+                    and len(response.json().get("choices", [])) > 0
+                ),
+            ),
+            marks=pytest.mark.cluster_cpu,
         ),
     ],
-    indirect=["config_names"],
+    indirect=["test_case"],
     ids=generate_test_id,
 )
-async def test_llm_inference_service(request, llm_config_factory, config_names):
-    created_service_configs = llm_config_factory(config_names)
-    service_name = generate_service_name(request.node.name, config_names)
-
-    llm_isvc = V1alpha1LLMInferenceService(
-        api_version="serving.kserve.io/v1alpha1",
-        kind="LLMInferenceService",
-        metadata=client.V1ObjectMeta(
-            name=service_name, namespace=KSERVE_TEST_NAMESPACE
-        ),
-        spec={
-            "baseRefs": [
-                {"name": config_name} for config_name in created_service_configs
-            ],
-        },
-    )
-
+def test_llm_inference_service(test_case: Case):
+    
     kserve_client = KServeClient(
         config_file=os.environ.get("KUBECONFIG", "~/.kube/config")
     )
 
+    service_name = test_case.llm_service.metadata.name
+    
     try:
-        create_llmisvc(kserve_client, llm_isvc)
-        wait_for_model_response(
-            kserve_client,
-            service_name,
-            KSERVE_TEST_NAMESPACE,
-            model_name=get_model_name_from_configs(config_names),
-        )
+        create_llmisvc(kserve_client, test_case.llm_service)
+        wait_for_model_response(kserve_client, test_case)
     except Exception as e:
         print(f"ERROR: Failed to call llm inference service {service_name}: {e}")
-        collect_diagnostics(service_name, KSERVE_TEST_NAMESPACE)
+        collect_diagnostics(kserve_client, test_case.llm_service)
         raise
     finally:
         try:
-            delete_llmisvc(kserve_client, service_name, KSERVE_TEST_NAMESPACE)
+            delete_llmisvc(kserve_client, test_case.llm_service)
         except Exception as e:
             print(f"Warning: Failed to cleanup service {service_name}: {e}")
 
 
-def create_llmisvc(kserve_client, llm_isvc, namespace=None):
+def create_llmisvc(kserve_client: KServeClient, llm_isvc: V1alpha1LLMInferenceService):
     from kserve.utils import utils
 
-    version = llm_isvc.api_version.split("/")[1]
-
-    if namespace is None:
-        namespace = utils.get_isvc_namespace(llm_isvc)
-
     try:
         outputs = kserve_client.api_instance.create_namespaced_custom_object(
             constants.KSERVE_GROUP,
-            version,
-            namespace,
+            llm_isvc.api_version.split("/")[1],
+            llm_isvc.metadata.namespace,
             KSERVE_PLURAL_LLMINFERENCESERVICE,
             llm_isvc,
         )
@@ -112,16 +118,14 @@ def create_llmisvc(kserve_client, llm_isvc, namespace=None):
         ) from e
 
 
-def delete_llmisvc(
-    kserve_client, name, namespace, version=constants.KSERVE_V1ALPHA1_VERSION
-):
+def delete_llmisvc(kserve_client: KServeClient, llm_isvc: V1alpha1LLMInferenceService):
     try:
         return kserve_client.api_instance.delete_namespaced_custom_object(
             constants.KSERVE_GROUP,
-            version,
-            namespace,
+            llm_isvc.api_version.split("/")[1],
+            llm_isvc.metadata.namespace,
             KSERVE_PLURAL_LLMINFERENCESERVICE,
-            name,
+            llm_isvc.metadata.name,
         )
     except client.rest.ApiException as e:
         raise RuntimeError(
@@ -130,9 +134,7 @@ def delete_llmisvc(
         ) from e
 
 
-def get_llmisvc(
-    kserve_client, name, namespace, version=constants.KSERVE_V1ALPHA1_VERSION
-):
+def get_llmisvc(kserve_client: KServeClient, name, namespace, version=constants.KSERVE_V1ALPHA1_VERSION):
     try:
         return kserve_client.api_instance.get_namespaced_custom_object(
             constants.KSERVE_GROUP,
@@ -149,55 +151,48 @@ def get_llmisvc(
 
 
 def wait_for_model_response(
-    kserve_client,
-    name,
-    namespace,
-    timeout_seconds=600,
-    version=constants.KSERVE_V1ALPHA1_VERSION,
-    model_name=None,
-):
-    if model_name is None:
-        model_name = "default-model"
-
+    kserve_client: KServeClient,
+    test_case: Case,
+    timeout_seconds: int = 300, # TODO Make it configurable in Case
+) -> str:
+    
     service_url = None
 
     def assert_model_responds():
         nonlocal service_url
 
         try:
-            service_url = get_llm_service_url(kserve_client, name, namespace, version)
+            service_url = get_llm_service_url(kserve_client, test_case.llm_service)
         except Exception as e:
             raise AssertionError(f"Failed to get service URL: {e}") from e
 
         completion_url = f"{service_url}/v1/completions"
-        test_payload = {"model": model_name, "prompt": "test", "max_tokens": 1}
-
+        test_payload = {"model": test_case.model_name, "prompt": test_case.prompt, "max_tokens": test_case.max_tokens}
+        print(f"Calling LLM service at {completion_url} with payload {test_payload}")
         try:
             response = requests.post(
                 completion_url,
                 headers={"Content-Type": "application/json"},
                 json=test_payload,
-                timeout=30,
+                timeout=30, # TODO Make it configurable
             )
         except Exception as e:
             raise AssertionError(f"Failed to call model: {e}") from e
 
-        assert (
-            response.status_code == 200
-        ), f"Service returned {response.status_code}: {response.text}"
+        test_case.response_assertion(response)
         return service_url
 
     return wait_for(assert_model_responds, timeout=timeout_seconds, interval=10.0)
 
 
-def get_llm_service_url(
-    kserve_client, service_name, namespace, version=constants.KSERVE_V1ALPHA1_VERSION
-):
+def get_llm_service_url(kserve_client: KServeClient, llm_isvc: V1alpha1LLMInferenceService):
+    service_name = llm_isvc.metadata.name
+    
     try:
-        llm_isvc = get_llmisvc(kserve_client, service_name, namespace, version)
+        llm_isvc = get_llmisvc(kserve_client, llm_isvc.metadata.name, llm_isvc.metadata.namespace, llm_isvc.api_version.split("/")[1])
 
         if "status" not in llm_isvc:
-            raise ValueError(f"No status found in LLM inference service {service_name}")
+            raise ValueError(f"No status found in LLM inference service {service_name} status: {llm_isvc}")
 
         status = llm_isvc["status"]
 
@@ -221,7 +216,8 @@ def get_llm_service_url(
         ) from e
 
 
-def wait_for(assertion_fn, timeout: float = 5.0, interval: float = 0.1):
+def wait_for(assertion_fn: Callable[[], Any], timeout: float = 5.0, interval: float = 0.1) -> Any:
+    """Wait for assertion function to succeed within timeout."""
     deadline = time.time() + timeout
     while True:
         try:
@@ -231,33 +227,11 @@ def wait_for(assertion_fn, timeout: float = 5.0, interval: float = 0.1):
                 raise
             time.sleep(interval)
 
-
-def get_model_name_from_configs(config_names):
-    """Extract model name from model config."""
-    for config_name in config_names:
-        if config_name.startswith("model-"):
-            config = LLMINFERENCESERVICE_CONFIGS[config_name]
-            if "model" in config and "name" in config["model"]:
-                return config["model"]["name"]
-    return "default-model"
-
-
-def generate_service_name(test_name, config_names):
-    base_name = test_name.split("[")[0]  # Remove everything after [
-    base_name = base_name.replace("test_", "").replace("_", "-")
-    config_suffix = "-".join(sorted(config_names))
-    service_name = f"{base_name}-{config_suffix}"
-    service_name = service_name.lower()
-    service_name = service_name[:63].rstrip("-")
-    return service_name
-
-
-def collect_diagnostics(service_name, namespace):
+def collect_diagnostics(kserve_client: KServeClient, llm_isvc: V1alpha1LLMInferenceService):
     try:
-        kserve_client = KServeClient(
-            config_file=os.environ.get("KUBECONFIG", "~/.kube/config")
-        )
-
+        
+        service_name = llm_isvc.metadata.name
+        namespace = llm_isvc.metadata.namespace
         print(f"\n{'='*60}")
         print(f"DIAGNOSTIC INFORMATION FOR {service_name} in {namespace}")
         print(f"{'='*60}")

From 12664771c68deccfcb3c62fc7f58212768458fdd Mon Sep 17 00:00:00 2001
From: Bartosz Majsak <bartosz.majsak@gmail.com>
Date: Fri, 25 Jul 2025 20:26:40 +0200
Subject: [PATCH 14/38] fix: adjusts gh action to run on cpu cluster

Signed-off-by: Bartosz Majsak <bartosz.majsak@gmail.com>
---
 .github/workflows/e2e-test-llmisvc.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/e2e-test-llmisvc.yml b/.github/workflows/e2e-test-llmisvc.yml
index 65a82d93279..d7a6c43fc2e 100644
--- a/.github/workflows/e2e-test-llmisvc.yml
+++ b/.github/workflows/e2e-test-llmisvc.yml
@@ -96,9 +96,9 @@ jobs:
       - name: Run E2E tests
         timeout-minutes: 30
         run: |
-          # Run only CPU tests for now using pytest markers
+          # Run only CPU tests for now using pytest markers (cluster_)
           # Available GPU vendors: amd, nvidia, intel
-          ./test/scripts/gh-actions/run-e2e-tests.sh "llminferenceservice and cpu" 1 "istio-gatewayapi-ext"
+          ./test/scripts/gh-actions/run-e2e-tests.sh "llminferenceservice and cluster_cpu" 2 "istio-gatewayapi-ext"
 
       - name: Check system status
         if: always()

From f270cfa8e23ab66ab8df887b266fa3b726547701 Mon Sep 17 00:00:00 2001
From: Bartosz Majsak <bartosz.majsak@gmail.com>
Date: Fri, 25 Jul 2025 20:44:07 +0200
Subject: [PATCH 15/38] feat: adds logging decorator

Signed-off-by: Bartosz Majsak <bartosz.majsak@gmail.com>
---
 test/__init__.py                              |  0
 test/e2e/llmisvc/logging.py                   | 45 +++++++++++++++++++
 .../e2e/llmisvc/test_llm_inference_service.py | 35 ++++++++-------
 3 files changed, 64 insertions(+), 16 deletions(-)
 create mode 100644 test/__init__.py
 create mode 100644 test/e2e/llmisvc/logging.py

diff --git a/test/__init__.py b/test/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/test/e2e/llmisvc/logging.py b/test/e2e/llmisvc/logging.py
new file mode 100644
index 00000000000..7da44364fa2
--- /dev/null
+++ b/test/e2e/llmisvc/logging.py
@@ -0,0 +1,45 @@
+# Copyright 2025 The KServe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import functools
+import logging
+import time
+from datetime import datetime
+
+# Set up logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+
+def log_execution(func):
+    """Decorator to log function start/end with timestamps and duration."""
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs):
+        func_name = func.__name__
+        
+        timestamp_start = datetime.now().isoformat()
+        logger.info(f"[{func_name}] [{timestamp_start}] start")
+        start_time = time.time()
+        
+        try:
+            result = func(*args, **kwargs)
+            duration = time.time() - start_time
+            timestamp_end = datetime.now().isoformat()
+            logger.info(f"[{func_name}] [{timestamp_end}] end - SUCCESS in {duration:.3f}s")
+            return result
+        except Exception as e:
+            duration = time.time() - start_time
+            timestamp_end = datetime.now().isoformat()
+            logger.error(f"[{func_name}] [{timestamp_end}] end - FAILED in {duration:.3f}s: {e}")
+            raise
+    return wrapper
diff --git a/test/e2e/llmisvc/test_llm_inference_service.py b/test/e2e/llmisvc/test_llm_inference_service.py
index b60d09531d9..826b1e4181b 100644
--- a/test/e2e/llmisvc/test_llm_inference_service.py
+++ b/test/e2e/llmisvc/test_llm_inference_service.py
@@ -16,7 +16,7 @@
 import os
 import time
 from dataclasses import dataclass
-from typing import Any, Callable, List, Optional
+from typing import Any, Callable, List
 
 import pytest
 import requests
@@ -24,12 +24,10 @@
 from kserve import KServeClient, V1alpha1LLMInferenceService, constants
 
 from .fixtures import (
-    LLMINFERENCESERVICE_CONFIGS,
     generate_test_id,
-    llm_config_factory,  # noqa: F401,F811
     test_case,  # noqa: F401,F811
-    KSERVE_TEST_NAMESPACE,
 )
+from .logging import log_execution
 
 KSERVE_PLURAL_LLMINFERENCESERVICE = "llminferenceservices"
 
@@ -61,22 +59,23 @@ class Case:
             Case(["router-managed", "workload-single-cpu", "model-fb-opt-125m"]),
             marks=pytest.mark.cluster_cpu,
         ),
-        pytest.param(
-            Case(
-                base_refs=["router-managed", "workload-single-cpu", "model-fb-opt-125m"],
-                prompt="What is the capital of France?",
-                response_assertion=lambda response: (
-                    response.status_code == 200 
-                    and response.json().get("choices") is not None
-                    and len(response.json().get("choices", [])) > 0
-                ),
-            ),
-            marks=pytest.mark.cluster_cpu,
-        ),
+        # pytest.param(
+        #     Case(
+        #         base_refs=["router-managed", "workload-single-cpu", "model-fb-opt-125m"],
+        #         prompt="What is the capital of France?",
+        #         response_assertion=lambda response: (
+        #             response.status_code == 200 
+        #             and response.json().get("choices") is not None
+        #             and len(response.json().get("choices", [])) > 0
+        #         ),
+        #     ),
+        #     marks=pytest.mark.cluster_cpu,
+        # ),
     ],
     indirect=["test_case"],
     ids=generate_test_id,
 )
+@log_execution
 def test_llm_inference_service(test_case: Case):
     
     kserve_client = KServeClient(
@@ -99,6 +98,7 @@ def test_llm_inference_service(test_case: Case):
             print(f"Warning: Failed to cleanup service {service_name}: {e}")
 
 
+@log_execution
 def create_llmisvc(kserve_client: KServeClient, llm_isvc: V1alpha1LLMInferenceService):
     from kserve.utils import utils
 
@@ -118,6 +118,7 @@ def create_llmisvc(kserve_client: KServeClient, llm_isvc: V1alpha1LLMInferenceSe
         ) from e
 
 
+@log_execution
 def delete_llmisvc(kserve_client: KServeClient, llm_isvc: V1alpha1LLMInferenceService):
     try:
         return kserve_client.api_instance.delete_namespaced_custom_object(
@@ -134,6 +135,7 @@ def delete_llmisvc(kserve_client: KServeClient, llm_isvc: V1alpha1LLMInferenceSe
         ) from e
 
 
+@log_execution
 def get_llmisvc(kserve_client: KServeClient, name, namespace, version=constants.KSERVE_V1ALPHA1_VERSION):
     try:
         return kserve_client.api_instance.get_namespaced_custom_object(
@@ -150,6 +152,7 @@ def get_llmisvc(kserve_client: KServeClient, name, namespace, version=constants.
         ) from e
 
 
+@log_execution
 def wait_for_model_response(
     kserve_client: KServeClient,
     test_case: Case,

From ba5834562440b6334f2d890b3f6c02d2d3173768 Mon Sep 17 00:00:00 2001
From: Bartosz Majsak <bartosz.majsak@gmail.com>
Date: Sat, 26 Jul 2025 00:25:33 +0200
Subject: [PATCH 16/38] feat: logging and cr dump

Signed-off-by: Bartosz Majsak <bartosz.majsak@gmail.com>
---
 test/e2e/llmisvc/README.md                    |  31 ++-
 test/e2e/llmisvc/diagnostic.py                | 100 ++++++++++
 test/e2e/llmisvc/fixtures.py                  |  15 +-
 test/e2e/llmisvc/logging.py                   |  18 +-
 .../e2e/llmisvc/test_llm_inference_service.py | 182 +++++++++++-------
 5 files changed, 257 insertions(+), 89 deletions(-)
 create mode 100644 test/e2e/llmisvc/diagnostic.py

diff --git a/test/e2e/llmisvc/README.md b/test/e2e/llmisvc/README.md
index d9c7f8ebddb..c08b3c5fcef 100644
--- a/test/e2e/llmisvc/README.md
+++ b/test/e2e/llmisvc/README.md
@@ -4,7 +4,10 @@
 
 Tests combine config fragments from different categories to create complete scenarios:
 ```python
-pytest.param(["router-managed", "workload-single-cpu", "model-fb-opt-125m"], marks=pytest.mark.cluster_cpu)
+pytest.param(
+    TestCase(["router-managed", "workload-single-cpu", "model-fb-opt-125m"]),
+    marks=pytest.mark.cluster_cpu,
+)
 ```
 
 The `llm_config_factory` fixture automatically creates/cleans up `LLMInferenceServiceConfig` objects.
@@ -51,9 +54,29 @@ Test IDs are generated by combining the cluster capability from pytest marks wit
 
 ## Adding New Configs
 
-1. Add to `LLMINFERENCESERVICE_CONFIGS` in `test_configs.py`
+1. Add to `LLMINFERENCESERVICE_CONFIGS` in `fixtures.py`
 2. Follow `category-descriptor` naming (prefix automatically stripped from test IDs)
 3. Add new cluster capability test cases using `pytest.param` with appropriate marks:
    ```python
-   pytest.param(["router-managed", "workload-nvidia-a100-gpu", "model-llama-70b"], marks=pytest.mark.cluster_nvidia),
-   ``` 
\ No newline at end of file
+   pytest.param(
+       TestCase(["router-managed", "workload-nvidia-a100-gpu", "model-llama-70b"]),
+       marks=pytest.mark.cluster_nvidia,
+   ),
+   ```
+
+   You can also customize test behavior with additional LlmDTestCase parameters:
+   ```python
+   pytest.param(
+       TestCase(
+           base_refs=["router-managed", "workload-single-cpu", "model-fb-opt-125m"],
+           prompt="What is the capital of France?",
+           max_tokens=50,
+           response_assertion=lambda response: (
+               response.status_code == 200 
+               and response.json().get("choices") is not None
+               and len(response.json().get("choices", [])) > 0
+           ),
+       ),
+       marks=pytest.mark.cluster_cpu,
+   ),
+   ``` 
diff --git a/test/e2e/llmisvc/diagnostic.py b/test/e2e/llmisvc/diagnostic.py
new file mode 100644
index 00000000000..47fa19392c2
--- /dev/null
+++ b/test/e2e/llmisvc/diagnostic.py
@@ -0,0 +1,100 @@
+# Copyright 2025 The KServe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import itertools
+from datetime import datetime
+import pytest
+from kubernetes import client, config, dynamic
+from kubernetes.client import api_client
+from kubernetes.client.exceptions import ApiException
+from kserve import KServeClient, V1alpha1LLMInferenceService, constants
+
+
+def print_all_events_table(namespace: str, max_events: int = 50):
+    """
+    Print the most recent `max_events` events in `namespace` as a nice table.
+    """
+    core = client.CoreV1Api()
+
+    try:
+        evs = core.list_namespaced_event(namespace=namespace).items
+
+        if not evs:
+            print("ℹ️ # No events found in namespace", namespace)
+            return
+
+        evs = sorted(
+            evs, key=lambda e: e.last_timestamp or e.first_timestamp, reverse=True
+        )[:max_events]
+
+        # print header
+        header = f"{'TIME':<25} {'NAMESPACE':<12} {'SOURCE':<20} {'TYPE':<8} {'REASON':<20} MESSAGE"
+        print(header)
+        print("-" * len(header))
+
+        for ev in evs:
+            ts = ev.last_timestamp or ev.first_timestamp
+            ts_str = (
+                ts.strftime("%Y-%m-%d %H:%M:%S")
+                if isinstance(ts, datetime)
+                else str(ts)
+            )
+            src = f"{ev.source.component or ''}/{ev.source.host or ''}".strip("/")
+            msg = (ev.message or "").replace("\n", " ")
+            print(
+                f"{ts_str:<25} {ev.metadata.namespace:<12} {src:<20} {ev.type or '':<8} "
+                f"{ev.reason or '':<20} {msg}"
+            )
+
+    except Exception as e:
+        print(f"# ❌ failed to list events: {e}")
+
+
+def kinds_matching_by_labels(namespace: str, labels, api_kinds):
+    """
+    List all namespaced objects in `namespace` matching `labels`
+    whose kind is in `api_kinds`.
+
+    :param namespace: kube namespace to search
+    :param labels: either a dict of {k: v} or a raw selector string
+    :param api_kinds: an iterable of Resource.kind strings to include
+    :return: list of Unstructured objects
+    """
+    config.load_kube_config()
+    dyn = dynamic.DynamicClient(api_client.ApiClient())
+
+    selector = (
+        ",".join(f"{k}={v}" for k, v in labels.items())
+        if isinstance(labels, dict)
+        else labels
+    )
+
+    all_resources = itertools.chain.from_iterable(dyn.resources)
+
+    found = []
+    for rsrc in all_resources:
+        if not rsrc.namespaced or "list" not in rsrc.verbs:
+            continue
+        if rsrc.kind not in api_kinds:
+            continue
+
+        try:
+            resp = rsrc.get(namespace=namespace, label_selector=selector)
+        except ApiException:
+            continue
+
+        items = getattr(resp, "items", [])
+        found.extend(items)
+
+    return found
diff --git a/test/e2e/llmisvc/fixtures.py b/test/e2e/llmisvc/fixtures.py
index c8f0a3f8886..09892cdbb62 100644
--- a/test/e2e/llmisvc/fixtures.py
+++ b/test/e2e/llmisvc/fixtures.py
@@ -64,10 +64,12 @@
 @pytest.fixture(scope="function")
 def test_case(request):
     tc = request.param
-    
+
     service_name = generate_service_name(request.node.name, tc.base_refs)
     tc.model_name = get_model_name_from_configs(tc.base_refs)
 
+    # TODO fail early if base_refs does not exist (e.g. mistyped)?
+
     tc.llm_service = V1alpha1LLMInferenceService(
         api_version="serving.kserve.io/v1alpha1",
         kind="LLMInferenceService",
@@ -75,14 +77,13 @@ def test_case(request):
             name=service_name, namespace=KSERVE_TEST_NAMESPACE
         ),
         spec={
-            "baseRefs": [
-                {"name": base_ref} for base_ref in tc.base_refs
-            ],
+            "baseRefs": [{"name": base_ref} for base_ref in tc.base_refs],
         },
     )
-    
+
     return tc
 
+
 @pytest.fixture(scope="session", autouse=True)
 def llm_config_factory():
     """Factory for creating/cleaning LLMInferenceServiceConfig once per session."""
@@ -127,7 +128,6 @@ def _create_configs(namespace=KSERVE_TEST_NAMESPACE):
                 # otherwise, real error
                 raise
 
-
     yield _create_configs()
 
     # teardown: best‑effort cleanup
@@ -137,6 +137,7 @@ def _create_configs(namespace=KSERVE_TEST_NAMESPACE):
         except Exception:
             pass
 
+
 def get_model_name_from_configs(config_names):
     """Extract model name from model config."""
     for config_name in config_names:
@@ -146,6 +147,7 @@ def get_model_name_from_configs(config_names):
                 return config["model"]["name"]
     return "default-model"
 
+
 def generate_service_name(test_name: str, base_refs: List[str]) -> str:
     base_name = test_name.split("[", 1)[0]
     base_name = base_name.replace("test_", "")
@@ -162,6 +164,7 @@ def generate_service_name(test_name: str, base_refs: List[str]) -> str:
 
     return f"{test_case}{sep}{uid}"
 
+
 def generate_test_id(test_case) -> str:
     """Generate a test ID from base refs."""
     return "-".join(test_case.base_refs)
diff --git a/test/e2e/llmisvc/logging.py b/test/e2e/llmisvc/logging.py
index 7da44364fa2..31f6822113e 100644
--- a/test/e2e/llmisvc/logging.py
+++ b/test/e2e/llmisvc/logging.py
@@ -17,29 +17,35 @@
 import time
 from datetime import datetime
 
-# Set up logging
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
+)
 logger = logging.getLogger(__name__)
 
+
 def log_execution(func):
     """Decorator to log function start/end with timestamps and duration."""
+
     @functools.wraps(func)
     def wrapper(*args, **kwargs):
         func_name = func.__name__
-        
+
         timestamp_start = datetime.now().isoformat()
         logger.info(f"[{func_name}] [{timestamp_start}] start")
         start_time = time.time()
-        
+
         try:
             result = func(*args, **kwargs)
             duration = time.time() - start_time
             timestamp_end = datetime.now().isoformat()
-            logger.info(f"[{func_name}] [{timestamp_end}] end - SUCCESS in {duration:.3f}s")
+            logger.info(f"[{func_name}] [{timestamp_end}] end - ✅ in {duration:.3f}s")
             return result
         except Exception as e:
             duration = time.time() - start_time
             timestamp_end = datetime.now().isoformat()
-            logger.error(f"[{func_name}] [{timestamp_end}] end - FAILED in {duration:.3f}s: {e}")
+            logger.error(
+                f"[{func_name}] [{timestamp_end}] end - ❌ {duration:.3f}s: {e}"
+            )
             raise
+
     return wrapper
diff --git a/test/e2e/llmisvc/test_llm_inference_service.py b/test/e2e/llmisvc/test_llm_inference_service.py
index 826b1e4181b..67d21e92b6d 100644
--- a/test/e2e/llmisvc/test_llm_inference_service.py
+++ b/test/e2e/llmisvc/test_llm_inference_service.py
@@ -12,17 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import json
 import os
 import time
 from dataclasses import dataclass
 from typing import Any, Callable, List
-
 import pytest
 import requests
 from kubernetes import client
+import yaml
 from kserve import KServeClient, V1alpha1LLMInferenceService, constants
-
+from .diagnostic import (
+    print_all_events_table,
+    kinds_matching_by_labels,
+)
 from .fixtures import (
     generate_test_id,
     test_case,  # noqa: F401,F811
@@ -40,14 +42,17 @@ def assert_200(response: requests.Response) -> None:
 
 
 @dataclass
-class Case:
+class TestCase:
+    __test__ = False  # So pytest will not try to execute it.
     """Test case configuration for LLM inference service tests."""
     base_refs: List[str]
-    prompt: str = "Boston is a"
+    prompt: str = "KServe is a"
     max_tokens: int = 1
     response_assertion: Callable[[requests.Response], None] = assert_200
-    llm_service: V1alpha1LLMInferenceService = None # Generated by llm_service_factory
-    model_name: str = "default/model"
+    wait_timeout: int = 300
+    # Factory provided
+    llm_service: V1alpha1LLMInferenceService = None  # Generated by llm_service_factory
+    model_name: str = "default/model"  # This will be generated by the factory
 
 
 @pytest.mark.llminferenceservice
@@ -56,15 +61,16 @@ class Case:
     "test_case",
     [
         pytest.param(
-            Case(["router-managed", "workload-single-cpu", "model-fb-opt-125m"]),
+            TestCase(["router-managed", "workload-single-cpu", "model-fb-opt-125m"]),
             marks=pytest.mark.cluster_cpu,
         ),
+        # Example test case
         # pytest.param(
         #     Case(
         #         base_refs=["router-managed", "workload-single-cpu", "model-fb-opt-125m"],
         #         prompt="What is the capital of France?",
         #         response_assertion=lambda response: (
-        #             response.status_code == 200 
+        #             response.status_code == 200
         #             and response.json().get("choices") is not None
         #             and len(response.json().get("choices", [])) > 0
         #         ),
@@ -76,26 +82,27 @@ class Case:
     ids=generate_test_id,
 )
 @log_execution
-def test_llm_inference_service(test_case: Case):
-    
+def test_llm_inference_service(test_case: TestCase):
+
     kserve_client = KServeClient(
         config_file=os.environ.get("KUBECONFIG", "~/.kube/config")
     )
 
     service_name = test_case.llm_service.metadata.name
-    
+
     try:
         create_llmisvc(kserve_client, test_case.llm_service)
-        wait_for_model_response(kserve_client, test_case)
+        wait_for_model_response(kserve_client, test_case, test_case.wait_timeout)
+        print(f"🎉 Test completed successfully for service {service_name}")
     except Exception as e:
-        print(f"ERROR: Failed to call llm inference service {service_name}: {e}")
+        print(f"❌ ERROR: Failed to call llm inference service {service_name}: {e}")
         collect_diagnostics(kserve_client, test_case.llm_service)
         raise
     finally:
         try:
             delete_llmisvc(kserve_client, test_case.llm_service)
         except Exception as e:
-            print(f"Warning: Failed to cleanup service {service_name}: {e}")
+            print(f"⚠️ Warning: Failed to cleanup service {service_name}: {e}")
 
 
 @log_execution
@@ -110,10 +117,11 @@ def create_llmisvc(kserve_client: KServeClient, llm_isvc: V1alpha1LLMInferenceSe
             KSERVE_PLURAL_LLMINFERENCESERVICE,
             llm_isvc,
         )
+        print(f"✅ LLM inference service {llm_isvc.metadata.name} created successfully")
         return outputs
     except client.rest.ApiException as e:
         raise RuntimeError(
-            f"Exception when calling CustomObjectsApi->"
+            f"❌ Exception when calling CustomObjectsApi->"
             f"create_namespaced_custom_object for LLMInferenceService: {e}"
         ) from e
 
@@ -121,22 +129,29 @@ def create_llmisvc(kserve_client: KServeClient, llm_isvc: V1alpha1LLMInferenceSe
 @log_execution
 def delete_llmisvc(kserve_client: KServeClient, llm_isvc: V1alpha1LLMInferenceService):
     try:
-        return kserve_client.api_instance.delete_namespaced_custom_object(
+        result = kserve_client.api_instance.delete_namespaced_custom_object(
             constants.KSERVE_GROUP,
             llm_isvc.api_version.split("/")[1],
             llm_isvc.metadata.namespace,
             KSERVE_PLURAL_LLMINFERENCESERVICE,
             llm_isvc.metadata.name,
         )
+        print(f"✅ LLM inference service {llm_isvc.metadata.name} deleted successfully")
+        return result
     except client.rest.ApiException as e:
         raise RuntimeError(
-            f"Exception when calling CustomObjectsApi->"
+            f"❌ Exception when calling CustomObjectsApi->"
             f"delete_namespaced_custom_object for LLMInferenceService: {e}"
         ) from e
 
 
 @log_execution
-def get_llmisvc(kserve_client: KServeClient, name, namespace, version=constants.KSERVE_V1ALPHA1_VERSION):
+def get_llmisvc(
+    kserve_client: KServeClient,
+    name,
+    namespace,
+    version=constants.KSERVE_V1ALPHA1_VERSION,
+):
     try:
         return kserve_client.api_instance.get_namespaced_custom_object(
             constants.KSERVE_GROUP,
@@ -147,7 +162,7 @@ def get_llmisvc(kserve_client: KServeClient, name, namespace, version=constants.
         )
     except client.rest.ApiException as e:
         raise RuntimeError(
-            f"Exception when calling CustomObjectsApi->"
+            f"❌ Exception when calling CustomObjectsApi->"
             f"get_namespaced_custom_object for LLMInferenceService: {e}"
         ) from e
 
@@ -155,10 +170,10 @@ def get_llmisvc(kserve_client: KServeClient, name, namespace, version=constants.
 @log_execution
 def wait_for_model_response(
     kserve_client: KServeClient,
-    test_case: Case,
-    timeout_seconds: int = 300, # TODO Make it configurable in Case
+    test_case: TestCase,
+    timeout_seconds: int = 600,
 ) -> str:
-    
+
     service_url = None
 
     def assert_model_responds():
@@ -167,39 +182,54 @@ def assert_model_responds():
         try:
             service_url = get_llm_service_url(kserve_client, test_case.llm_service)
         except Exception as e:
-            raise AssertionError(f"Failed to get service URL: {e}") from e
+            raise AssertionError(f"❌ Failed to get service URL: {e}") from e
 
         completion_url = f"{service_url}/v1/completions"
-        test_payload = {"model": test_case.model_name, "prompt": test_case.prompt, "max_tokens": test_case.max_tokens}
-        print(f"Calling LLM service at {completion_url} with payload {test_payload}")
+        test_payload = {
+            "model": test_case.model_name,
+            "prompt": test_case.prompt,
+            "max_tokens": test_case.max_tokens,
+        }
+        print(f"📞 Calling LLM service at {completion_url} with payload {test_payload}")
         try:
             response = requests.post(
                 completion_url,
                 headers={"Content-Type": "application/json"},
                 json=test_payload,
-                timeout=30, # TODO Make it configurable
+                timeout=30,  # TODO Make it configurable
             )
         except Exception as e:
-            raise AssertionError(f"Failed to call model: {e}") from e
+            raise AssertionError(f"❌ Failed to call model: {e}") from e
 
         test_case.response_assertion(response)
+        print("✅ LLM service responded successfully")
         return service_url
 
     return wait_for(assert_model_responds, timeout=timeout_seconds, interval=10.0)
 
 
-def get_llm_service_url(kserve_client: KServeClient, llm_isvc: V1alpha1LLMInferenceService):
+def get_llm_service_url(
+    kserve_client: KServeClient, llm_isvc: V1alpha1LLMInferenceService
+):
     service_name = llm_isvc.metadata.name
-    
+
     try:
-        llm_isvc = get_llmisvc(kserve_client, llm_isvc.metadata.name, llm_isvc.metadata.namespace, llm_isvc.api_version.split("/")[1])
+        llm_isvc = get_llmisvc(
+            kserve_client,
+            llm_isvc.metadata.name,
+            llm_isvc.metadata.namespace,
+            llm_isvc.api_version.split("/")[1],
+        )
 
         if "status" not in llm_isvc:
-            raise ValueError(f"No status found in LLM inference service {service_name} status: {llm_isvc}")
+            raise ValueError(
+                f"❌ No status found in LLM inference service {service_name} status: {llm_isvc}"
+            )
 
         status = llm_isvc["status"]
 
         if "url" in status and status["url"]:
+            print(f"✅ Found service URL: {status['url']}")
             return status["url"]
 
         if (
@@ -209,17 +239,22 @@ def get_llm_service_url(kserve_client: KServeClient, llm_isvc: V1alpha1LLMInfere
         ):
             first_address = status["addresses"][0]
             if "url" in first_address:
+                print(f"✅ Found service URL in addresses: {first_address['url']}")
                 return first_address["url"]
 
-        raise ValueError(f"No URL found in LLM inference service {service_name} status")
+        raise ValueError(
+            f"❌ No URL found in LLM inference service {service_name} status"
+        )
 
     except Exception as e:
         raise ValueError(
-            f"Failed to get URL for LLM inference service {service_name}: {e}"
+            f"❌ Failed to get URL for LLM inference service {service_name}: {e}"
         ) from e
 
 
-def wait_for(assertion_fn: Callable[[], Any], timeout: float = 5.0, interval: float = 0.1) -> Any:
+def wait_for(
+    assertion_fn: Callable[[], Any], timeout: float = 5.0, interval: float = 0.1
+) -> Any:
     """Wait for assertion function to succeed within timeout."""
     deadline = time.time() + timeout
     while True:
@@ -230,45 +265,46 @@ def wait_for(assertion_fn: Callable[[], Any], timeout: float = 5.0, interval: fl
                 raise
             time.sleep(interval)
 
-def collect_diagnostics(kserve_client: KServeClient, llm_isvc: V1alpha1LLMInferenceService):
-    try:
-        
-        service_name = llm_isvc.metadata.name
-        namespace = llm_isvc.metadata.namespace
-        print(f"\n{'='*60}")
-        print(f"DIAGNOSTIC INFORMATION FOR {service_name} in {namespace}")
-        print(f"{'='*60}")
-
-        print("\n--- LLM Inference Service ---")
-        try:
-            llm_isvc = get_llmisvc(kserve_client, service_name, namespace)
-            print(json.dumps(llm_isvc, indent=2, default=str))
-        except Exception as e:
-            print(f"Failed to get LLM inference service: {e}")
 
-        print("\n--- Events ---")
-        try:
-            core_v1 = client.CoreV1Api()
-            events = core_v1.list_namespaced_event(
-                namespace=namespace,
-                field_selector=f"involvedObject.name={service_name}",
-            )
-            if events.items:
-                sorted_events = sorted(
-                    events.items,
-                    key=lambda x: x.last_timestamp or x.first_timestamp,
-                    reverse=True,
-                )
-                for event in sorted_events[:5]:
-                    timestamp = event.last_timestamp or event.first_timestamp
-                    print(f"  {event.type}: {event.reason} - {event.message}")
-                    print(f"    Time: {timestamp}")
-            else:
-                print("  No events found")
-        except Exception as e:
-            print(f"Failed to list events: {e}")
+def collect_diagnostics(
+    kserve_client: KServeClient, llm_isvc: V1alpha1LLMInferenceService
+):
+    print("🔍 # Collecting diagnostics...")
+    name = llm_isvc.metadata.name
+    ns = llm_isvc.metadata.namespace
+
+    svc = get_llmisvc(kserve_client, name, ns)
 
-        print(f"\n{'='*60}")
+    labels = {
+        "app.kubernetes.io/part-of": "llminferenceservice",
+        "app.kubernetes.io/name": svc["metadata"].get("name"),
+    }
 
+    print(f"🔍 # Diagnostics for {name!r} in {ns!r}")
+    print("---")
+    print(f"# LLMInferenceService {name}")
+    try:
+        print(yaml.safe_dump(svc, sort_keys=False))
     except Exception as e:
-        print(f"Failed to collect diagnostics: {e}")
+        print(f"# ❌ failed to dump LLMInferenceService: {e}")
+
+    print_all_events_table(ns)
+
+    all_resources = kinds_matching_by_labels(
+        ns,
+        labels,
+        api_kinds={
+            "HTTPRoute",
+            "InferencePool",
+            "InferenceModel",
+            "Service",
+            "Deployment",
+            "LeaderWorkerSet",
+            "DestinationRule",
+            "StatefulSet",
+        },
+    )
+
+    for obj in all_resources:
+        print("---")
+        print(yaml.safe_dump(obj.to_dict(), sort_keys=False))

From 405cad40cffeebe4673b49e8c89d6fb809ac1a3d Mon Sep 17 00:00:00 2001
From: Bartosz Majsak <bartosz.majsak@gmail.com>
Date: Sat, 26 Jul 2025 00:26:26 +0200
Subject: [PATCH 17/38] chore: bumps python to 3.12 for e2e job

Signed-off-by: Bartosz Majsak <bartosz.majsak@gmail.com>
---
 .github/workflows/e2e-test-llmisvc.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/e2e-test-llmisvc.yml b/.github/workflows/e2e-test-llmisvc.yml
index d7a6c43fc2e..7b08f7fa6e7 100644
--- a/.github/workflows/e2e-test-llmisvc.yml
+++ b/.github/workflows/e2e-test-llmisvc.yml
@@ -70,7 +70,7 @@ jobs:
       - name: Setup Python
         uses: actions/setup-python@v5
         with:
-          python-version: "3.9"
+          python-version: "3.12"
 
       - name: Setup Minikube
         uses: ./.github/actions/minikube-setup

From 9587f189ab3c30673791dbe4e59a2d5dce4d3cc4 Mon Sep 17 00:00:00 2001
From: Bartosz Majsak <bartosz.majsak@gmail.com>
Date: Sat, 26 Jul 2025 00:49:15 +0200
Subject: [PATCH 18/38] fix: mismatched class name in example comment

Signed-off-by: Bartosz Majsak <bartosz.majsak@gmail.com>
---
 test/e2e/llmisvc/test_llm_inference_service.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/e2e/llmisvc/test_llm_inference_service.py b/test/e2e/llmisvc/test_llm_inference_service.py
index 67d21e92b6d..94d3bf19a68 100644
--- a/test/e2e/llmisvc/test_llm_inference_service.py
+++ b/test/e2e/llmisvc/test_llm_inference_service.py
@@ -66,7 +66,7 @@ class TestCase:
         ),
         # Example test case
         # pytest.param(
-        #     Case(
+        #     TestCase(
         #         base_refs=["router-managed", "workload-single-cpu", "model-fb-opt-125m"],
         #         prompt="What is the capital of France?",
         #         response_assertion=lambda response: (

From dfade037b562eef152f3ddcecf2de15061121543 Mon Sep 17 00:00:00 2001
From: Bartosz Majsak <bartosz.majsak@gmail.com>
Date: Sat, 26 Jul 2025 00:52:04 +0200
Subject: [PATCH 19/38] fix: clarifies workload preset

Signed-off-by: Bartosz Majsak <bartosz.majsak@gmail.com>
---
 test/e2e/llmisvc/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/e2e/llmisvc/README.md b/test/e2e/llmisvc/README.md
index c08b3c5fcef..b605afe4eed 100644
--- a/test/e2e/llmisvc/README.md
+++ b/test/e2e/llmisvc/README.md
@@ -44,7 +44,7 @@ pytest -m "llminferenceservice and (cluster_cpu or cluster_amd)" test/e2e/llmisv
 
 Use prefixed categories that get composed together:
 
-- **`workload-*`**: Container specs and resources (e.g., `workload-single-cpu`, `workload-multi-node-gpu`)
+- **`workload-*`**: workload topology, container specs and resource specs (e.g., `workload-single-cpu`, `workload-multi-node-gpu`)
 - **`model-*`**: Model sources (e.g., `model-fb-opt-125m`, `model-gpt2`) 
 - **`router-*`**: Routing configs (e.g., `router-managed`, `router-with-scheduler`)
 

From 176da0354a91203bf6a11e3ab7b0bf250f89e256 Mon Sep 17 00:00:00 2001
From: Bartosz Majsak <bartosz.majsak@gmail.com>
Date: Sat, 26 Jul 2025 01:00:14 +0200
Subject: [PATCH 20/38] chore: removes noise

aka poor man debugging tool

Signed-off-by: Bartosz Majsak <bartosz.majsak@gmail.com>
---
 test/e2e/llmisvc/diagnostic.py                 | 1 -
 test/e2e/llmisvc/fixtures.py                   | 1 -
 test/e2e/llmisvc/test_llm_inference_service.py | 4 ----
 test/scripts/gh-actions/run-e2e-tests.sh       | 1 -
 4 files changed, 7 deletions(-)

diff --git a/test/e2e/llmisvc/diagnostic.py b/test/e2e/llmisvc/diagnostic.py
index 47fa19392c2..509b68548e3 100644
--- a/test/e2e/llmisvc/diagnostic.py
+++ b/test/e2e/llmisvc/diagnostic.py
@@ -38,7 +38,6 @@ def print_all_events_table(namespace: str, max_events: int = 50):
             evs, key=lambda e: e.last_timestamp or e.first_timestamp, reverse=True
         )[:max_events]
 
-        # print header
         header = f"{'TIME':<25} {'NAMESPACE':<12} {'SOURCE':<20} {'TYPE':<8} {'REASON':<20} MESSAGE"
         print(header)
         print("-" * len(header))
diff --git a/test/e2e/llmisvc/fixtures.py b/test/e2e/llmisvc/fixtures.py
index 09892cdbb62..be4dcdee731 100644
--- a/test/e2e/llmisvc/fixtures.py
+++ b/test/e2e/llmisvc/fixtures.py
@@ -130,7 +130,6 @@ def _create_configs(namespace=KSERVE_TEST_NAMESPACE):
 
     yield _create_configs()
 
-    # teardown: best‑effort cleanup
     for name, namespace in created:
         try:
             delete_llmisvc_config(client, name, namespace)
diff --git a/test/e2e/llmisvc/test_llm_inference_service.py b/test/e2e/llmisvc/test_llm_inference_service.py
index 94d3bf19a68..6f91f1604d4 100644
--- a/test/e2e/llmisvc/test_llm_inference_service.py
+++ b/test/e2e/llmisvc/test_llm_inference_service.py
@@ -93,7 +93,6 @@ def test_llm_inference_service(test_case: TestCase):
     try:
         create_llmisvc(kserve_client, test_case.llm_service)
         wait_for_model_response(kserve_client, test_case, test_case.wait_timeout)
-        print(f"🎉 Test completed successfully for service {service_name}")
     except Exception as e:
         print(f"❌ ERROR: Failed to call llm inference service {service_name}: {e}")
         collect_diagnostics(kserve_client, test_case.llm_service)
@@ -229,7 +228,6 @@ def get_llm_service_url(
         status = llm_isvc["status"]
 
         if "url" in status and status["url"]:
-            print(f"✅ Found service URL: {status['url']}")
             return status["url"]
 
         if (
@@ -239,7 +237,6 @@ def get_llm_service_url(
         ):
             first_address = status["addresses"][0]
             if "url" in first_address:
-                print(f"✅ Found service URL in addresses: {first_address['url']}")
                 return first_address["url"]
 
         raise ValueError(
@@ -269,7 +266,6 @@ def wait_for(
 def collect_diagnostics(
     kserve_client: KServeClient, llm_isvc: V1alpha1LLMInferenceService
 ):
-    print("🔍 # Collecting diagnostics...")
     name = llm_isvc.metadata.name
     ns = llm_isvc.metadata.namespace
 
diff --git a/test/scripts/gh-actions/run-e2e-tests.sh b/test/scripts/gh-actions/run-e2e-tests.sh
index ee8794c8d23..98d6af06f6e 100755
--- a/test/scripts/gh-actions/run-e2e-tests.sh
+++ b/test/scripts/gh-actions/run-e2e-tests.sh
@@ -35,7 +35,6 @@ pushd test/e2e >/dev/null
     echo "Skipping explainer tests for raw deployment with ingress"
     pytest -m "$MARKER" --ignore=qpext --log-cli-level=INFO -n $PARALLELISM --dist worksteal --network-layer $NETWORK_LAYER --ignore=explainer/
   else
-    echo "pytest -m '$MARKER' --ignore=qpext --log-cli-level=INFO -n $PARALLELISM --dist worksteal --network-layer $NETWORK_LAYER"
     pytest -m "$MARKER" --ignore=qpext --log-cli-level=INFO -n $PARALLELISM --dist worksteal --network-layer $NETWORK_LAYER
   fi
 popd

From 8210c4533a84f0eddd927a0520850929c4ef84dc Mon Sep 17 00:00:00 2001
From: Bartosz Majsak <bartosz.majsak@gmail.com>
Date: Mon, 28 Jul 2025 10:36:43 +0200
Subject: [PATCH 21/38] fix: imports inference service config factory

Signed-off-by: Bartosz Majsak <bartosz.majsak@gmail.com>
---
 test/e2e/llmisvc/test_llm_inference_service.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/test/e2e/llmisvc/test_llm_inference_service.py b/test/e2e/llmisvc/test_llm_inference_service.py
index 6f91f1604d4..7c453bc3486 100644
--- a/test/e2e/llmisvc/test_llm_inference_service.py
+++ b/test/e2e/llmisvc/test_llm_inference_service.py
@@ -27,7 +27,9 @@
 )
 from .fixtures import (
     generate_test_id,
+    # Factory functions are not called explicitly, but they need to be imported to work
     test_case,  # noqa: F401,F811
+    llm_config_factory, # noqa: F401,F811
 )
 from .logging import log_execution
 

From e75b15eb2899957f73676d0d11d889081e32ea30 Mon Sep 17 00:00:00 2001
From: Bartosz Majsak <bartosz.majsak@gmail.com>
Date: Mon, 28 Jul 2025 10:58:38 +0200
Subject: [PATCH 22/38] chore: bumps resource limits

Signed-off-by: Bartosz Majsak <bartosz.majsak@gmail.com>
---
 test/e2e/llmisvc/fixtures.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/e2e/llmisvc/fixtures.py b/test/e2e/llmisvc/fixtures.py
index be4dcdee731..37db7fb1889 100644
--- a/test/e2e/llmisvc/fixtures.py
+++ b/test/e2e/llmisvc/fixtures.py
@@ -32,8 +32,8 @@
                     "image": "quay.io/pierdipi/vllm-cpu:latest",
                     "env": [{"name": "VLLM_LOGGING_LEVEL", "value": "DEBUG"}],
                     "resources": {
-                        "limits": {"cpu": "1", "memory": "10Gi"},
-                        "requests": {"cpu": "100m", "memory": "8Gi"},
+                        "limits": {"cpu": "2", "memory": "10Gi"},
+                        "requests": {"cpu": "1", "memory": "8Gi"},
                     },
                     "livenessProbe": {
                         "initialDelaySeconds": 30,

From 6be1e7cb7bf76fb49a2d233c3eae324ee631ecef Mon Sep 17 00:00:00 2001
From: Bartosz Majsak <bartosz.majsak@gmail.com>
Date: Mon, 28 Jul 2025 10:59:56 +0200
Subject: [PATCH 23/38] chore: cleanup

Signed-off-by: Bartosz Majsak <bartosz.majsak@gmail.com>
---
 test/e2e/llmisvc/fixtures.py                   | 10 ----------
 test/e2e/llmisvc/test_llm_inference_service.py |  4 ++--
 2 files changed, 2 insertions(+), 12 deletions(-)

diff --git a/test/e2e/llmisvc/fixtures.py b/test/e2e/llmisvc/fixtures.py
index 37db7fb1889..f6de3793ea4 100644
--- a/test/e2e/llmisvc/fixtures.py
+++ b/test/e2e/llmisvc/fixtures.py
@@ -51,16 +51,8 @@
     "router-managed": {
         "router": {"scheduler": {}, "route": {}, "gateway": {}},
     },
-    "router-with-scheduler": {
-        "router": {
-            "scheduler": {},
-            "route": {},
-            "gateway": {},
-        },
-    },
 }
 
-
 @pytest.fixture(scope="function")
 def test_case(request):
     tc = request.param
@@ -92,8 +84,6 @@ def llm_config_factory():
 
     def _create_configs(namespace=KSERVE_TEST_NAMESPACE):
         for name in LLMINFERENCESERVICE_CONFIGS:
-            if name not in LLMINFERENCESERVICE_CONFIGS:
-                raise ValueError(f"Unknown config name: {name}")
 
             spec = LLMINFERENCESERVICE_CONFIGS[name]
 
diff --git a/test/e2e/llmisvc/test_llm_inference_service.py b/test/e2e/llmisvc/test_llm_inference_service.py
index 7c453bc3486..39d314435bd 100644
--- a/test/e2e/llmisvc/test_llm_inference_service.py
+++ b/test/e2e/llmisvc/test_llm_inference_service.py
@@ -191,7 +191,7 @@ def assert_model_responds():
             "prompt": test_case.prompt,
             "max_tokens": test_case.max_tokens,
         }
-        print(f"📞 Calling LLM service at {completion_url} with payload {test_payload}")
+        print(f"Calling LLM service at {completion_url} with payload {test_payload}")
         try:
             response = requests.post(
                 completion_url,
@@ -254,7 +254,7 @@ def get_llm_service_url(
 def wait_for(
     assertion_fn: Callable[[], Any], timeout: float = 5.0, interval: float = 0.1
 ) -> Any:
-    """Wait for assertion function to succeed within timeout."""
+    """Wait for the assertion to succeed within timeout."""
     deadline = time.time() + timeout
     while True:
         try:

From e7b015cd18580f93515fd8620328fb022a3d1a0a Mon Sep 17 00:00:00 2001
From: Bartosz Majsak <bartosz.majsak@gmail.com>
Date: Mon, 28 Jul 2025 11:00:31 +0200
Subject: [PATCH 24/38] feat: adds simple p/d deployment

Signed-off-by: Bartosz Majsak <bartosz.majsak@gmail.com>
---
 test/e2e/llmisvc/fixtures.py                  | 45 +++++++++++++++++++
 .../e2e/llmisvc/test_llm_inference_service.py | 34 +++++++-------
 test/e2e/pytest.ini                           |  3 +-
 3 files changed, 66 insertions(+), 16 deletions(-)

diff --git a/test/e2e/llmisvc/fixtures.py b/test/e2e/llmisvc/fixtures.py
index f6de3793ea4..555d24853ca 100644
--- a/test/e2e/llmisvc/fixtures.py
+++ b/test/e2e/llmisvc/fixtures.py
@@ -45,6 +45,51 @@
             ]
         },
     },
+    "workload-pd-cpu": {
+        "model": {
+            "uri": "hf://facebook/opt-125m",
+            "name": "facebook/opt-125m"
+        },
+        "router": {
+            "scheduler": {},
+            "route": {},
+            "gateway": {}
+        },
+        "template": {
+            "initContainers": [
+                {
+                    "name": "llm-d-routing-sidecar",
+                    "image": "ghcr.io/llm-d/llm-d-routing-sidecar:v0.2.0"
+                }
+            ],
+            "containers": [
+                {
+                    "name": "main",
+                    "image": "quay.io/pierdipi/vllm-cpu:latest",
+                    "env": [{"name": "VLLM_LOGGING_LEVEL", "value": "DEBUG"}],
+                    "resources": {
+                        "limits": {"cpu": "2", "memory": "10Gi"},
+                        "requests": {"cpu": "1", "memory": "8Gi"},
+                    }
+                }
+            ]
+        },
+        "prefill": {
+            "template": {
+                "containers": [
+                    {
+                        "name": "main",
+                        "image": "quay.io/pierdipi/vllm-cpu:latest",
+                        "env": [{"name": "VLLM_LOGGING_LEVEL", "value": "DEBUG"}],
+                        "resources": {
+                            "limits": {"cpu": "2", "memory": "10Gi"},
+                            "requests": {"cpu": "1", "memory": "8Gi"},
+                        }
+                    }
+                ]
+            }
+        }
+    },
     "model-fb-opt-125m": {
         "model": {"uri": "hf://facebook/opt-125m", "name": "facebook/opt-125m"},
     },
diff --git a/test/e2e/llmisvc/test_llm_inference_service.py b/test/e2e/llmisvc/test_llm_inference_service.py
index 39d314435bd..ea4a44d548f 100644
--- a/test/e2e/llmisvc/test_llm_inference_service.py
+++ b/test/e2e/llmisvc/test_llm_inference_service.py
@@ -63,22 +63,26 @@ class TestCase:
     "test_case",
     [
         pytest.param(
-            TestCase(["router-managed", "workload-single-cpu", "model-fb-opt-125m"]),
-            marks=pytest.mark.cluster_cpu,
+            TestCase(
+                base_refs=["router-managed", "workload-single-cpu", "model-fb-opt-125m"],
+                prompt = "KServe is a",
+            ),
+            marks=[pytest.mark.cluster_cpu, pytest.mark.cluster_single_node],
+        ),
+        pytest.param(
+            TestCase(
+                base_refs=["router-managed", "workload-pd-cpu", "model-fb-opt-125m"],
+                prompt="You are an expert in Kubernetes-native machine learning serving platforms, with deep knowledge of the KServe project. "
+                       "Explain the challenges of serving large-scale models, GPU scheduling, and how KServe integrates with capabilities like multi-model serving. "
+                       "Provide a detailed comparison with open source alternatives, focusing on operational trade-offs.",
+                response_assertion=lambda response: (
+                        response.status_code == 200
+                        and response.json().get("choices") is not None
+                        and len(response.json().get("choices", [])) > 0
+                ),                
+            ),
+            marks=[pytest.mark.cluster_cpu, pytest.mark.cluster_single_node],
         ),
-        # Example test case
-        # pytest.param(
-        #     TestCase(
-        #         base_refs=["router-managed", "workload-single-cpu", "model-fb-opt-125m"],
-        #         prompt="What is the capital of France?",
-        #         response_assertion=lambda response: (
-        #             response.status_code == 200
-        #             and response.json().get("choices") is not None
-        #             and len(response.json().get("choices", [])) > 0
-        #         ),
-        #     ),
-        #     marks=pytest.mark.cluster_cpu,
-        # ),
     ],
     indirect=["test_case"],
     ids=generate_test_id,
diff --git a/test/e2e/pytest.ini b/test/e2e/pytest.ini
index 92c33eb8a26..32b2ab1b408 100644
--- a/test/e2e/pytest.ini
+++ b/test/e2e/pytest.ini
@@ -22,4 +22,5 @@ markers =
     cluster_cpu: test targeting cluster with CPU
     cluster_amd: test targeting cluster with AMD
     cluster_intel: test targeting cluster with Intel
-    cluster_nvidia: test targeting cluster with NVIDIA
\ No newline at end of file
+    cluster_nvidia: test targeting cluster with NVIDIA
+    cluster_single_node: test targeting single node cluster
\ No newline at end of file

From a4ca343a662d6da15b4f0a3835ec9f7eb0d1f31f Mon Sep 17 00:00:00 2001
From: Bartosz Majsak <bartosz.majsak@gmail.com>
Date: Mon, 28 Jul 2025 11:01:08 +0200
Subject: [PATCH 25/38] feat: makes response timeout configurable with 60s
 default

Signed-off-by: Bartosz Majsak <bartosz.majsak@gmail.com>
---
 test/e2e/llmisvc/test_llm_inference_service.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/test/e2e/llmisvc/test_llm_inference_service.py b/test/e2e/llmisvc/test_llm_inference_service.py
index ea4a44d548f..ecf2b0583ba 100644
--- a/test/e2e/llmisvc/test_llm_inference_service.py
+++ b/test/e2e/llmisvc/test_llm_inference_service.py
@@ -48,10 +48,11 @@ class TestCase:
     __test__ = False  # So pytest will not try to execute it.
     """Test case configuration for LLM inference service tests."""
     base_refs: List[str]
-    prompt: str = "KServe is a"
+    prompt: str
     max_tokens: int = 1
     response_assertion: Callable[[requests.Response], None] = assert_200
     wait_timeout: int = 300
+    response_timeout: int = 60
     # Factory provided
     llm_service: V1alpha1LLMInferenceService = None  # Generated by llm_service_factory
     model_name: str = "default/model"  # This will be generated by the factory
@@ -201,7 +202,7 @@ def assert_model_responds():
                 completion_url,
                 headers={"Content-Type": "application/json"},
                 json=test_payload,
-                timeout=30,  # TODO Make it configurable
+                timeout=test_case.response_timeout, 
             )
         except Exception as e:
             raise AssertionError(f"❌ Failed to call model: {e}") from e

From 19279316b3e709c4c34abc9f5d23714d6641be47 Mon Sep 17 00:00:00 2001
From: Bartosz Majsak <bartosz.majsak@gmail.com>
Date: Mon, 28 Jul 2025 11:19:08 +0200
Subject: [PATCH 26/38] midstream: disable gh-action

kustomize manifests are significantly different making it impossible to re-use upstream e2e github action without major restructuring

Signed-off-by: Bartosz Majsak <bartosz.majsak@gmail.com>
---
 .github/workflows/e2e-test-llmisvc.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/e2e-test-llmisvc.yml b/.github/workflows/e2e-test-llmisvc.yml
index 7b08f7fa6e7..6943f568e9a 100644
--- a/.github/workflows/e2e-test-llmisvc.yml
+++ b/.github/workflows/e2e-test-llmisvc.yml
@@ -53,6 +53,7 @@ concurrency:
 
 jobs:
   test-llmisvc:
+    if: false
     runs-on: ubuntu-22.04
     needs: [ kserve-image-build ]
     steps:
@@ -106,6 +107,7 @@ jobs:
           ./test/scripts/gh-actions/status-check.sh
 
   kserve-image-build:
+    if: false
     runs-on: ubuntu-latest
     steps:
       - name: Checkout source

From 0623b620a228af8fcc982d7ad1b2c32515512182 Mon Sep 17 00:00:00 2001
From: Bartosz Majsak <bartosz.majsak@gmail.com>
Date: Mon, 28 Jul 2025 13:25:45 +0200
Subject: [PATCH 27/38] chore: uses name variable

Signed-off-by: Bartosz Majsak <bartosz.majsak@gmail.com>
---
 test/e2e/llmisvc/test_llm_inference_service.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/e2e/llmisvc/test_llm_inference_service.py b/test/e2e/llmisvc/test_llm_inference_service.py
index ecf2b0583ba..7a4949ac882 100644
--- a/test/e2e/llmisvc/test_llm_inference_service.py
+++ b/test/e2e/llmisvc/test_llm_inference_service.py
@@ -280,7 +280,7 @@ def collect_diagnostics(
 
     labels = {
         "app.kubernetes.io/part-of": "llminferenceservice",
-        "app.kubernetes.io/name": svc["metadata"].get("name"),
+        "app.kubernetes.io/name": name,
     }
 
     print(f"🔍 # Diagnostics for {name!r} in {ns!r}")

From 8d62551e7cabe689b29cdbb4b670ff5921350e69 Mon Sep 17 00:00:00 2001
From: Bartosz Majsak <bartosz.majsak@gmail.com>
Date: Mon, 28 Jul 2025 13:29:21 +0200
Subject: [PATCH 28/38] chore: removes redundant preset

Signed-off-by: Bartosz Majsak <bartosz.majsak@gmail.com>
---
 test/e2e/llmisvc/fixtures.py | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/test/e2e/llmisvc/fixtures.py b/test/e2e/llmisvc/fixtures.py
index 555d24853ca..e38bcd89434 100644
--- a/test/e2e/llmisvc/fixtures.py
+++ b/test/e2e/llmisvc/fixtures.py
@@ -46,15 +46,6 @@
         },
     },
     "workload-pd-cpu": {
-        "model": {
-            "uri": "hf://facebook/opt-125m",
-            "name": "facebook/opt-125m"
-        },
-        "router": {
-            "scheduler": {},
-            "route": {},
-            "gateway": {}
-        },
         "template": {
             "initContainers": [
                 {

From 48d6b0b9bb0ce252bebca9bead301b6dcb90c059 Mon Sep 17 00:00:00 2001
From: Bartosz Majsak <bartosz.majsak@gmail.com>
Date: Mon, 28 Jul 2025 14:01:35 +0200
Subject: [PATCH 29/38] fix: minor precommit linter findings

Signed-off-by: Bartosz Majsak <bartosz.majsak@gmail.com>
---
 test/e2e/llmisvc/fixtures.py                   | 1 +
 test/e2e/llmisvc/test_llm_inference_service.py | 8 ++++----
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/test/e2e/llmisvc/fixtures.py b/test/e2e/llmisvc/fixtures.py
index e38bcd89434..13ce5090f52 100644
--- a/test/e2e/llmisvc/fixtures.py
+++ b/test/e2e/llmisvc/fixtures.py
@@ -89,6 +89,7 @@
     },
 }
 
+
 @pytest.fixture(scope="function")
 def test_case(request):
     tc = request.param
diff --git a/test/e2e/llmisvc/test_llm_inference_service.py b/test/e2e/llmisvc/test_llm_inference_service.py
index 7a4949ac882..57819705e2f 100644
--- a/test/e2e/llmisvc/test_llm_inference_service.py
+++ b/test/e2e/llmisvc/test_llm_inference_service.py
@@ -29,7 +29,7 @@
     generate_test_id,
     # Factory functions are not called explicitly, but they need to be imported to work
     test_case,  # noqa: F401,F811
-    llm_config_factory, # noqa: F401,F811
+    llm_config_factory,  # noqa: F401,F811
 )
 from .logging import log_execution
 
@@ -66,7 +66,7 @@ class TestCase:
         pytest.param(
             TestCase(
                 base_refs=["router-managed", "workload-single-cpu", "model-fb-opt-125m"],
-                prompt = "KServe is a",
+                prompt="KServe is a",
             ),
             marks=[pytest.mark.cluster_cpu, pytest.mark.cluster_single_node],
         ),
@@ -80,7 +80,7 @@ class TestCase:
                         response.status_code == 200
                         and response.json().get("choices") is not None
                         and len(response.json().get("choices", [])) > 0
-                ),                
+                ),
             ),
             marks=[pytest.mark.cluster_cpu, pytest.mark.cluster_single_node],
         ),
@@ -202,7 +202,7 @@ def assert_model_responds():
                 completion_url,
                 headers={"Content-Type": "application/json"},
                 json=test_payload,
-                timeout=test_case.response_timeout, 
+                timeout=test_case.response_timeout,
             )
         except Exception as e:
             raise AssertionError(f"❌ Failed to call model: {e}") from e

From b56022a4bbc4baff8d03c733497b0ec0f7718f41 Mon Sep 17 00:00:00 2001
From: Bartosz Majsak <bartosz.majsak@gmail.com>
Date: Mon, 28 Jul 2025 16:08:09 +0200
Subject: [PATCH 30/38] chore: removes leftover empty file

Signed-off-by: Bartosz Majsak <bartosz.majsak@gmail.com>
---
 test/__init__.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 test/__init__.py

diff --git a/test/__init__.py b/test/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000

From a878d7556f32dc87214560f43443d9779d30a6a6 Mon Sep 17 00:00:00 2001
From: Bartosz Majsak <bartosz.majsak@gmail.com>
Date: Mon, 28 Jul 2025 20:04:33 +0200
Subject: [PATCH 31/38] chore: reworks related resources dump to exclude
 certain kinds

with default exclusion for secrets

Signed-off-by: Bartosz Majsak <bartosz.majsak@gmail.com>
---
 test/e2e/llmisvc/diagnostic.py                 | 11 ++++++-----
 test/e2e/llmisvc/test_llm_inference_service.py | 18 ++----------------
 2 files changed, 8 insertions(+), 21 deletions(-)

diff --git a/test/e2e/llmisvc/diagnostic.py b/test/e2e/llmisvc/diagnostic.py
index 509b68548e3..bd6984e2a1b 100644
--- a/test/e2e/llmisvc/diagnostic.py
+++ b/test/e2e/llmisvc/diagnostic.py
@@ -60,14 +60,14 @@ def print_all_events_table(namespace: str, max_events: int = 50):
         print(f"# ❌ failed to list events: {e}")
 
 
-def kinds_matching_by_labels(namespace: str, labels, api_kinds):
+def kinds_matching_by_labels(namespace: str, labels, skip_api_kinds={"Secret"}):
     """
     List all namespaced objects in `namespace` matching `labels`
-    whose kind is in `api_kinds`.
+    whose kinds are not in `skip_api_kinds`.
 
     :param namespace: kube namespace to search
     :param labels: either a dict of {k: v} or a raw selector string
-    :param api_kinds: an iterable of Resource.kind strings to include
+    :param skip_api_kinds: an iterable of Resource.kind strings to exclude
     :return: list of Unstructured objects
     """
     config.load_kube_config()
@@ -85,12 +85,13 @@ def kinds_matching_by_labels(namespace: str, labels, api_kinds):
     for rsrc in all_resources:
         if not rsrc.namespaced or "list" not in rsrc.verbs:
             continue
-        if rsrc.kind not in api_kinds:
+        if rsrc.kind in skip_api_kinds:
             continue
 
         try:
             resp = rsrc.get(namespace=namespace, label_selector=selector)
-        except ApiException:
+        except Exception as e:
+            print(f"failed to get {rsrc.kind}, skipping: {e}")
             continue
 
         items = getattr(resp, "items", [])
diff --git a/test/e2e/llmisvc/test_llm_inference_service.py b/test/e2e/llmisvc/test_llm_inference_service.py
index 57819705e2f..94e0a094fba 100644
--- a/test/e2e/llmisvc/test_llm_inference_service.py
+++ b/test/e2e/llmisvc/test_llm_inference_service.py
@@ -49,7 +49,7 @@ class TestCase:
     """Test case configuration for LLM inference service tests."""
     base_refs: List[str]
     prompt: str
-    max_tokens: int = 1
+    max_tokens: int = None
     response_assertion: Callable[[requests.Response], None] = assert_200
     wait_timeout: int = 300
     response_timeout: int = 60
@@ -293,21 +293,7 @@ def collect_diagnostics(
 
     print_all_events_table(ns)
 
-    all_resources = kinds_matching_by_labels(
-        ns,
-        labels,
-        api_kinds={
-            "HTTPRoute",
-            "InferencePool",
-            "InferenceModel",
-            "Service",
-            "Deployment",
-            "LeaderWorkerSet",
-            "DestinationRule",
-            "StatefulSet",
-        },
-    )
-
+    all_resources = kinds_matching_by_labels(ns, labels)
     for obj in all_resources:
         print("---")
         print(yaml.safe_dump(obj.to_dict(), sort_keys=False))

From 492d47d30128d63ff7324ebaaae7eb0540c90f5a Mon Sep 17 00:00:00 2001
From: Bartosz Majsak <bartosz.majsak@gmail.com>
Date: Tue, 29 Jul 2025 11:02:03 +0200
Subject: [PATCH 32/38] chore: minor fixes in README

Signed-off-by: Bartosz Majsak <bartosz.majsak@gmail.com>
---
 test/e2e/llmisvc/README.md | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/test/e2e/llmisvc/README.md b/test/e2e/llmisvc/README.md
index b605afe4eed..d9f4a56f2bd 100644
--- a/test/e2e/llmisvc/README.md
+++ b/test/e2e/llmisvc/README.md
@@ -40,22 +40,10 @@ pytest -m "llminferenceservice and (cluster_amd or cluster_nvidia or cluster_int
 pytest -m "llminferenceservice and (cluster_cpu or cluster_amd)" test/e2e/llmisvc/
 ```
 
-## Config Naming Convention
-
-Use prefixed categories that get composed together:
-
-- **`workload-*`**: workload topology, container specs and resource specs (e.g., `workload-single-cpu`, `workload-multi-node-gpu`)
-- **`model-*`**: Model sources (e.g., `model-fb-opt-125m`, `model-gpt2`) 
-- **`router-*`**: Routing configs (e.g., `router-managed`, `router-with-scheduler`)
-
-Test IDs are generated by combining the cluster capability from pytest marks with all config names:
-- Test ID format: `{cluster_capability}-{config1}-{config2}-{config3}`
-- Example: `cluster_cpu-router-managed-workload-single-cpu-model-fb-opt-125m`
-
 ## Adding New Configs
 
 1. Add to `LLMINFERENCESERVICE_CONFIGS` in `fixtures.py`
-2. Follow `category-descriptor` naming (prefix automatically stripped from test IDs)
+2. Follow `category-descriptor` naming (described in the subsequent section)
 3. Add new cluster capability test cases using `pytest.param` with appropriate marks:
    ```python
    pytest.param(
@@ -80,3 +68,16 @@ Test IDs are generated by combining the cluster capability from pytest marks wit
        marks=pytest.mark.cluster_cpu,
    ),
    ``` 
+
+## Config Naming Convention
+
+Use prefixed categories that get composed together:
+
+- **`workload-*`**: workload topology, container specs and resource specs (e.g., `workload-single-cpu`, `workload-multi-node-gpu`)
+- **`model-*`**: Model sources (e.g., `model-fb-opt-125m`, `model-gpt2`) 
+- **`router-*`**: Routing configs (e.g., `router-managed`, `router-with-scheduler`)
+
+Test IDs are generated by combining the cluster capability from pytest marks with all config names:
+- Test ID format: `{cluster_capability}-{config1}-{config2}-{config3}`
+- Example: `cluster_cpu-router-managed-workload-single-cpu-model-fb-opt-125m`
+

From 01e714a4f00ef7951b4765e0df4f5a49d32a37d7 Mon Sep 17 00:00:00 2001
From: Bartosz Majsak <bartosz.majsak@gmail.com>
Date: Tue, 29 Jul 2025 11:04:27 +0200
Subject: [PATCH 33/38] fix: filters out *List resources

Signed-off-by: Bartosz Majsak <bartosz.majsak@gmail.com>
---
 test/e2e/llmisvc/diagnostic.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/e2e/llmisvc/diagnostic.py b/test/e2e/llmisvc/diagnostic.py
index bd6984e2a1b..11d255bc50d 100644
--- a/test/e2e/llmisvc/diagnostic.py
+++ b/test/e2e/llmisvc/diagnostic.py
@@ -85,7 +85,7 @@ def kinds_matching_by_labels(namespace: str, labels, skip_api_kinds={"Secret"}):
     for rsrc in all_resources:
         if not rsrc.namespaced or "list" not in rsrc.verbs:
             continue
-        if rsrc.kind in skip_api_kinds:
+        if rsrc.kind.endswith("List") or rsrc.kind in skip_api_kinds:
             continue
 
         try:

From 6392bcd4c587a0e863e4494a999a8fc7838af6ab Mon Sep 17 00:00:00 2001
From: Bartosz Majsak <bartosz.majsak@gmail.com>
Date: Tue, 29 Jul 2025 11:06:26 +0200
Subject: [PATCH 34/38] chore: removes redundant init-container

Signed-off-by: Bartosz Majsak <bartosz.majsak@gmail.com>
---
 test/e2e/llmisvc/fixtures.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/test/e2e/llmisvc/fixtures.py b/test/e2e/llmisvc/fixtures.py
index 13ce5090f52..93c2a056dd1 100644
--- a/test/e2e/llmisvc/fixtures.py
+++ b/test/e2e/llmisvc/fixtures.py
@@ -47,12 +47,6 @@
     },
     "workload-pd-cpu": {
         "template": {
-            "initContainers": [
-                {
-                    "name": "llm-d-routing-sidecar",
-                    "image": "ghcr.io/llm-d/llm-d-routing-sidecar:v0.2.0"
-                }
-            ],
             "containers": [
                 {
                     "name": "main",

From 66552698fde51a37fb94f565676ba00599b0ad10 Mon Sep 17 00:00:00 2001
From: Bartosz Majsak <bartosz.majsak@gmail.com>
Date: Tue, 29 Jul 2025 11:29:24 +0200
Subject: [PATCH 35/38] fix: makes test params xdict-friendly

Previous design of test fixtures did not work for xdict when
multilple worker nodes were execution the same parametrized test
due to limitation of session-scope fixtures in such a setup:

https://github.com/pytest-dev/pytest-xdist/issues/271

This was leading to race conditions and flaky tests when shared
LLMInferenceServiceConfigs were removed prematurely, leading to
failures of subsequent tests.

This approach creates per-test clones of base_refs, making them localized
and avoids introducing weird workarounds that author spent too time much on.

Signed-off-by: Bartosz Majsak <bartosz.majsak@gmail.com>
---
 test/e2e/llmisvc/fixtures.py                  | 174 ++++++++++--------
 .../e2e/llmisvc/test_llm_inference_service.py |   4 +-
 2 files changed, 100 insertions(+), 78 deletions(-)

diff --git a/test/e2e/llmisvc/fixtures.py b/test/e2e/llmisvc/fixtures.py
index 93c2a056dd1..d679ebf3bcf 100644
--- a/test/e2e/llmisvc/fixtures.py
+++ b/test/e2e/llmisvc/fixtures.py
@@ -12,14 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import uuid
 import os
 import pytest
+import hashlib
 from typing import List
 from kubernetes import client
 from kubernetes.client.rest import ApiException
 from kserve import KServeClient, constants, V1alpha1LLMInferenceService
 
+from .logging import logger
+
 KSERVE_PLURAL_LLMINFERENCESERVICECONFIG = "llminferenceserviceconfigs"
 KSERVE_TEST_NAMESPACE = "kserve-ci-e2e-test"
 
@@ -87,79 +89,61 @@
 @pytest.fixture(scope="function")
 def test_case(request):
     tc = request.param
+    created_configs = []
+    kserve_client = KServeClient(config_file=os.environ.get("KUBECONFIG", "~/.kube/config"))
 
-    service_name = generate_service_name(request.node.name, tc.base_refs)
-    tc.model_name = get_model_name_from_configs(tc.base_refs)
-
-    # TODO fail early if base_refs does not exist (e.g. mistyped)?
-
-    tc.llm_service = V1alpha1LLMInferenceService(
-        api_version="serving.kserve.io/v1alpha1",
-        kind="LLMInferenceService",
-        metadata=client.V1ObjectMeta(
-            name=service_name, namespace=KSERVE_TEST_NAMESPACE
-        ),
-        spec={
-            "baseRefs": [{"name": base_ref} for base_ref in tc.base_refs],
-        },
-    )
-
-    return tc
-
+    try:
+        # Validate base_refs defined in the test fixture exist in LLMINFERENCESERVICE_CONFIGS
+        missing_refs = [ref for ref in tc.base_refs if ref not in LLMINFERENCESERVICE_CONFIGS]
+        if missing_refs:
+            raise ValueError(f"Missing base_refs in LLMINFERENCESERVICE_CONFIGS: {missing_refs}")
 
-@pytest.fixture(scope="session", autouse=True)
-def llm_config_factory():
-    """Factory for creating/cleaning LLMInferenceServiceConfig once per session."""
-    created = []
-    client = KServeClient(config_file=os.environ.get("KUBECONFIG", "~/.kube/config"))
+        service_name = generate_service_name(request.node.name, tc.base_refs)
+        tc.model_name = get_model_name_from_configs(tc.base_refs)
 
-    def _create_configs(namespace=KSERVE_TEST_NAMESPACE):
-        for name in LLMINFERENCESERVICE_CONFIGS:
+        # Create unique configs for this test
+        unique_base_refs = []
+        for base_ref in tc.base_refs:
+            unique_config_name = generate_k8s_safe_suffix(base_ref, [service_name])
+            unique_base_refs.append(unique_config_name)
 
-            spec = LLMINFERENCESERVICE_CONFIGS[name]
+            original_spec = LLMINFERENCESERVICE_CONFIGS[base_ref]
 
-            try:
-                get_llmisvc_config(client, name, namespace)
-                continue
-            except Exception as e:
-                is_404_api = (
-                    isinstance(e, ApiException) and getattr(e, "status", None) == 404
-                )
-                is_404_runtime = (
-                    isinstance(e, RuntimeError) and "not found" in str(e).lower()
-                )
-                if not (is_404_api or is_404_runtime):
-                    raise
-
-            body = {
+            unique_config_body = {
                 "apiVersion": "serving.kserve.io/v1alpha1",
                 "kind": "LLMInferenceServiceConfig",
-                "metadata": {"name": name, "namespace": namespace},
-                "spec": spec,
+                "metadata": {"name": unique_config_name, "namespace": KSERVE_TEST_NAMESPACE},
+                "spec": original_spec,
             }
 
-            try:
-                create_llmisvc_config(client, body, namespace)
-                created.append((name, namespace))
-            except Exception as e:
-                if isinstance(e, ApiException) and getattr(e, "status", None) == 409:
-                    continue
-                if isinstance(e, RuntimeError) and "already exists" in str(e).lower():
-                    continue
-                # otherwise, real error
-                raise
+            create_or_update_llmisvc_config(kserve_client, unique_config_body, KSERVE_TEST_NAMESPACE)
+            created_configs.append(unique_config_name)
+
+        tc.llm_service = V1alpha1LLMInferenceService(
+            api_version="serving.kserve.io/v1alpha1",
+            kind="LLMInferenceService",
+            metadata=client.V1ObjectMeta(
+                name=service_name, namespace=KSERVE_TEST_NAMESPACE
+            ),
+            spec={
+                "baseRefs": [{"name": base_ref} for base_ref in unique_base_refs],
+            },
+        )
 
-    yield _create_configs()
+        yield tc
 
-    for name, namespace in created:
-        try:
-            delete_llmisvc_config(client, name, namespace)
-        except Exception:
-            pass
+    finally:
+        for config_name in created_configs:
+            try:
+                logger.info(f"Cleaning up unique LLMInferenceServiceConfig {config_name}")
+                delete_llmisvc_config(kserve_client, config_name, KSERVE_TEST_NAMESPACE)
+                logger.info(f"✓ Deleted unique LLMInferenceServiceConfig {config_name}")
+            except Exception as e:
+                logger.warning(f"Failed to cleanup LLMInferenceServiceConfig {config_name}: {e}")
 
 
 def get_model_name_from_configs(config_names):
-    """Extract model name from model config."""
+    """Extract the model name from model config."""
     for config_name in config_names:
         if config_name.startswith("model-"):
             config = LLMINFERENCESERVICE_CONFIGS[config_name]
@@ -168,21 +152,29 @@ def get_model_name_from_configs(config_names):
     return "default-model"
 
 
-def generate_service_name(test_name: str, base_refs: List[str]) -> str:
-    base_name = test_name.split("[", 1)[0]
-    base_name = base_name.replace("test_", "")
-    base_name = base_name.replace("_", "-")
-    config_suffix = "-".join(sorted(base_refs))
-    test_case = f"{base_name}-{config_suffix}".lower()
+def generate_k8s_safe_suffix(base_name: str, extra_parts: List[str] = None) -> str:
+    """Generate a Kubernetes-safe name suffix with hash."""
+    if extra_parts:
+        full_name = f"{base_name}-{'-'.join(sorted(extra_parts))}"
+    else:
+        full_name = base_name
 
-    uid = uuid.uuid4().hex[:8]
+    full_name = full_name.lower().replace("_", "-")
+
+    name_hash = hashlib.md5(full_name.encode()).hexdigest()[:8]
 
     max_total = 63
     sep = "-"
-    max_test_case = max_total - len(sep) - len(uid)
-    test_case = test_case[:max_test_case].rstrip(sep)
+    max_base = max_total - len(sep) - len(name_hash)
+    safe_base = full_name[:max_base].rstrip(sep)
+
+    return f"{safe_base}{sep}{name_hash}"
 
-    return f"{test_case}{sep}{uid}"
+
+def generate_service_name(test_name: str, base_refs: List[str]) -> str:
+    base_name = test_name.split("[", 1)[0]
+    base_name = base_name.replace("test_", "")
+    return generate_k8s_safe_suffix(base_name, base_refs)
 
 
 def generate_test_id(test_case) -> str:
@@ -190,32 +182,62 @@ def generate_test_id(test_case) -> str:
     return "-".join(test_case.base_refs)
 
 
-def create_llmisvc_config(kserve_client, llm_config, namespace=None):
+def create_or_update_llmisvc_config(kserve_client, llm_config, namespace=None):
+    """Create or update an LLMInferenceServiceConfig resource."""
     version = llm_config["apiVersion"].split("/")[1]
 
     if namespace is None:
         namespace = llm_config.get("metadata", {}).get("namespace", "default")
 
+    name = llm_config.get("metadata", {}).get("name")
+    if not name:
+        raise ValueError("LLMInferenceServiceConfig must have a name in metadata")
+
+    logger.info(f"Checking LLMInferenceServiceConfig {name} in namespace {namespace}")
+
     try:
-        outputs = kserve_client.api_instance.create_namespaced_custom_object(
+        existing_config = kserve_client.api_instance.get_namespaced_custom_object(
             constants.KSERVE_GROUP,
             version,
             namespace,
             KSERVE_PLURAL_LLMINFERENCESERVICECONFIG,
+            name,
+        )
+
+        llm_config["metadata"] = existing_config["metadata"]
+
+        outputs = kserve_client.api_instance.replace_namespaced_custom_object(
+            constants.KSERVE_GROUP,
+            version,
+            namespace,
+            KSERVE_PLURAL_LLMINFERENCESERVICECONFIG,
+            name,
             llm_config,
         )
+        logger.info(f"✓ Successfully updated LLMInferenceServiceConfig {name}")
         return outputs
+
     except client.rest.ApiException as e:
-        raise RuntimeError(
-            f"Exception when calling CustomObjectsApi->"
-            f"create_namespaced_custom_object for LLMInferenceServiceConfig: {e}"
-        ) from e
+        if e.status == 404:  # Not found - create it
+            logger.info(f"Resource not found, creating LLMInferenceServiceConfig {name}")
+            outputs = kserve_client.api_instance.create_namespaced_custom_object(
+                constants.KSERVE_GROUP,
+                version,
+                namespace,
+                KSERVE_PLURAL_LLMINFERENCESERVICECONFIG,
+                llm_config,
+            )
+            logger.info(f"✓ Successfully created LLMInferenceServiceConfig {name}")
+            return outputs
+        else:
+            raise RuntimeError(f"Failed to get/create LLMInferenceServiceConfig {name}: {e}") from e
 
 
 def delete_llmisvc_config(
     kserve_client, name, namespace, version=constants.KSERVE_V1ALPHA1_VERSION
 ):
     try:
+        print(f"Deleting LLMInferenceServiceConfig {name} in namespace {namespace}")
         return kserve_client.api_instance.delete_namespaced_custom_object(
             constants.KSERVE_GROUP,
             version,
diff --git a/test/e2e/llmisvc/test_llm_inference_service.py b/test/e2e/llmisvc/test_llm_inference_service.py
index 94e0a094fba..c09d5582520 100644
--- a/test/e2e/llmisvc/test_llm_inference_service.py
+++ b/test/e2e/llmisvc/test_llm_inference_service.py
@@ -15,6 +15,7 @@
 import os
 import time
 from dataclasses import dataclass
+from operator import truediv
 from typing import Any, Callable, List
 import pytest
 import requests
@@ -29,7 +30,6 @@
     generate_test_id,
     # Factory functions are not called explicitly, but they need to be imported to work
     test_case,  # noqa: F401,F811
-    llm_config_factory,  # noqa: F401,F811
 )
 from .logging import log_execution
 
@@ -49,7 +49,7 @@ class TestCase:
     """Test case configuration for LLM inference service tests."""
     base_refs: List[str]
     prompt: str
-    max_tokens: int = None
+    max_tokens: int = 10
     response_assertion: Callable[[requests.Response], None] = assert_200
     wait_timeout: int = 300
     response_timeout: int = 60

From cc45a4684929b9c225466945a98ff8cabdd7ee90 Mon Sep 17 00:00:00 2001
From: Bartosz Majsak <bartosz.majsak@gmail.com>
Date: Tue, 29 Jul 2025 11:40:13 +0200
Subject: [PATCH 36/38] chore: clean up

Signed-off-by: Bartosz Majsak <bartosz.majsak@gmail.com>
---
 test/e2e/llmisvc/diagnostic.py                 | 12 +++++-------
 test/e2e/llmisvc/fixtures.py                   | 14 +++++++-------
 test/e2e/llmisvc/test_llm_inference_service.py |  5 ++---
 3 files changed, 14 insertions(+), 17 deletions(-)

diff --git a/test/e2e/llmisvc/diagnostic.py b/test/e2e/llmisvc/diagnostic.py
index 11d255bc50d..59fc75dc595 100644
--- a/test/e2e/llmisvc/diagnostic.py
+++ b/test/e2e/llmisvc/diagnostic.py
@@ -14,10 +14,8 @@
 
 import itertools
 from datetime import datetime
-import pytest
 from kubernetes import client, config, dynamic
 from kubernetes.client import api_client
-from kubernetes.client.exceptions import ApiException
 from kserve import KServeClient, V1alpha1LLMInferenceService, constants
 
 
@@ -28,21 +26,21 @@ def print_all_events_table(namespace: str, max_events: int = 50):
     core = client.CoreV1Api()
 
     try:
-        evs = core.list_namespaced_event(namespace=namespace).items
+        events = core.list_namespaced_event(namespace=namespace).items
 
-        if not evs:
+        if not events:
             print("ℹ️ # No events found in namespace", namespace)
             return
 
-        evs = sorted(
-            evs, key=lambda e: e.last_timestamp or e.first_timestamp, reverse=True
+        events = sorted(
+            events, key=lambda e: e.last_timestamp or e.first_timestamp, reverse=True
         )[:max_events]
 
         header = f"{'TIME':<25} {'NAMESPACE':<12} {'SOURCE':<20} {'TYPE':<8} {'REASON':<20} MESSAGE"
         print(header)
         print("-" * len(header))
 
-        for ev in evs:
+        for ev in events:
             ts = ev.last_timestamp or ev.first_timestamp
             ts_str = (
                 ts.strftime("%Y-%m-%d %H:%M:%S")
diff --git a/test/e2e/llmisvc/fixtures.py b/test/e2e/llmisvc/fixtures.py
index d679ebf3bcf..72a66e9f3e1 100644
--- a/test/e2e/llmisvc/fixtures.py
+++ b/test/e2e/llmisvc/fixtures.py
@@ -99,7 +99,7 @@ def test_case(request):
             raise ValueError(f"Missing base_refs in LLMINFERENCESERVICE_CONFIGS: {missing_refs}")
 
         service_name = generate_service_name(request.node.name, tc.base_refs)
-        tc.model_name = get_model_name_from_configs(tc.base_refs)
+        tc.model_name = _get_model_name_from_configs(tc.base_refs)
 
         # Create unique configs for this test
         unique_base_refs = []
@@ -116,7 +116,7 @@ def test_case(request):
                 "spec": original_spec,
             }
 
-            create_or_update_llmisvc_config(kserve_client, unique_config_body, KSERVE_TEST_NAMESPACE)
+            _create_or_update_llmisvc_config(kserve_client, unique_config_body, KSERVE_TEST_NAMESPACE)
             created_configs.append(unique_config_name)
 
         tc.llm_service = V1alpha1LLMInferenceService(
@@ -136,13 +136,13 @@ def test_case(request):
         for config_name in created_configs:
             try:
                 logger.info(f"Cleaning up unique LLMInferenceServiceConfig {config_name}")
-                delete_llmisvc_config(kserve_client, config_name, KSERVE_TEST_NAMESPACE)
+                _delete_llmisvc_config(kserve_client, config_name, KSERVE_TEST_NAMESPACE)
                 logger.info(f"✓ Deleted unique LLMInferenceServiceConfig {config_name}")
             except Exception as e:
                 logger.warning(f"Failed to cleanup LLMInferenceServiceConfig {config_name}: {e}")
 
 
-def get_model_name_from_configs(config_names):
+def _get_model_name_from_configs(config_names):
     """Extract the model name from model config."""
     for config_name in config_names:
         if config_name.startswith("model-"):
@@ -182,7 +182,7 @@ def generate_test_id(test_case) -> str:
     return "-".join(test_case.base_refs)
 
 
-def create_or_update_llmisvc_config(kserve_client, llm_config, namespace=None):
+def _create_or_update_llmisvc_config(kserve_client, llm_config, namespace=None):
     """Create or update an LLMInferenceServiceConfig resource."""
     version = llm_config["apiVersion"].split("/")[1]
 
@@ -233,7 +233,7 @@ def create_or_update_llmisvc_config(kserve_client, llm_config, namespace=None):
             raise RuntimeError(f"Failed to get/create LLMInferenceServiceConfig {name}: {e}") from e
 
 
-def delete_llmisvc_config(
+def _delete_llmisvc_config(
     kserve_client, name, namespace, version=constants.KSERVE_V1ALPHA1_VERSION
 ):
     try:
@@ -252,7 +252,7 @@ def delete_llmisvc_config(
         ) from e
 
 
-def get_llmisvc_config(
+def _get_llmisvc_config(
     kserve_client, name, namespace, version=constants.KSERVE_V1ALPHA1_VERSION
 ):
     try:
diff --git a/test/e2e/llmisvc/test_llm_inference_service.py b/test/e2e/llmisvc/test_llm_inference_service.py
index c09d5582520..1f3bc0405d0 100644
--- a/test/e2e/llmisvc/test_llm_inference_service.py
+++ b/test/e2e/llmisvc/test_llm_inference_service.py
@@ -15,7 +15,6 @@
 import os
 import time
 from dataclasses import dataclass
-from operator import truediv
 from typing import Any, Callable, List
 import pytest
 import requests
@@ -102,7 +101,7 @@ def test_llm_inference_service(test_case: TestCase):
         wait_for_model_response(kserve_client, test_case, test_case.wait_timeout)
     except Exception as e:
         print(f"❌ ERROR: Failed to call llm inference service {service_name}: {e}")
-        collect_diagnostics(kserve_client, test_case.llm_service)
+        _collect_diagnostics(kserve_client, test_case.llm_service)
         raise
     finally:
         try:
@@ -270,7 +269,7 @@ def wait_for(
             time.sleep(interval)
 
 
-def collect_diagnostics(
+def _collect_diagnostics(
     kserve_client: KServeClient, llm_isvc: V1alpha1LLMInferenceService
 ):
     name = llm_isvc.metadata.name

From 1ba1e0cf09a31efa7a2efe2fafeb02e719a9af46 Mon Sep 17 00:00:00 2001
From: Bartosz Majsak <bartosz.majsak@gmail.com>
Date: Tue, 29 Jul 2025 12:14:29 +0200
Subject: [PATCH 37/38] review

Signed-off-by: Bartosz Majsak <bartosz.majsak@gmail.com>
---
 test/e2e/llmisvc/diagnostic.py                 | 9 ++++++---
 test/e2e/llmisvc/test_llm_inference_service.py | 2 --
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/test/e2e/llmisvc/diagnostic.py b/test/e2e/llmisvc/diagnostic.py
index 59fc75dc595..8ced640c637 100644
--- a/test/e2e/llmisvc/diagnostic.py
+++ b/test/e2e/llmisvc/diagnostic.py
@@ -58,16 +58,19 @@ def print_all_events_table(namespace: str, max_events: int = 50):
         print(f"# ❌ failed to list events: {e}")
 
 
-def kinds_matching_by_labels(namespace: str, labels, skip_api_kinds={"Secret"}):
+def kinds_matching_by_labels(namespace: str, labels, skip_api_kinds=None):
     """
     List all namespaced objects in `namespace` matching `labels`
     whose kinds are not in `skip_api_kinds`.
 
-    :param namespace: kube namespace to search
+    :param namespace: Namespace to search
     :param labels: either a dict of {k: v} or a raw selector string
     :param skip_api_kinds: an iterable of Resource.kind strings to exclude
-    :return: list of Unstructured objects
+    :return: a list of Unstructured objects
     """
+    if skip_api_kinds is None:
+        skip_api_kinds = {"Secret"}
+        
     config.load_kube_config()
     dyn = dynamic.DynamicClient(api_client.ApiClient())
 
diff --git a/test/e2e/llmisvc/test_llm_inference_service.py b/test/e2e/llmisvc/test_llm_inference_service.py
index 1f3bc0405d0..c8f6fcfa38d 100644
--- a/test/e2e/llmisvc/test_llm_inference_service.py
+++ b/test/e2e/llmisvc/test_llm_inference_service.py
@@ -112,8 +112,6 @@ def test_llm_inference_service(test_case: TestCase):
 
 @log_execution
 def create_llmisvc(kserve_client: KServeClient, llm_isvc: V1alpha1LLMInferenceService):
-    from kserve.utils import utils
-
     try:
         outputs = kserve_client.api_instance.create_namespaced_custom_object(
             constants.KSERVE_GROUP,

From 03b15b574c8a0fff597abcfecf4397bdec418a81 Mon Sep 17 00:00:00 2001
From: Bartosz Majsak <bartosz.majsak@gmail.com>
Date: Tue, 29 Jul 2025 12:21:51 +0200
Subject: [PATCH 38/38] precommit fix

Signed-off-by: Bartosz Majsak <bartosz.majsak@gmail.com>
---
 test/e2e/llmisvc/diagnostic.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/e2e/llmisvc/diagnostic.py b/test/e2e/llmisvc/diagnostic.py
index 8ced640c637..a1458db91de 100644
--- a/test/e2e/llmisvc/diagnostic.py
+++ b/test/e2e/llmisvc/diagnostic.py
@@ -70,7 +70,7 @@ def kinds_matching_by_labels(namespace: str, labels, skip_api_kinds=None):
     """
     if skip_api_kinds is None:
         skip_api_kinds = {"Secret"}
-        
+
     config.load_kube_config()
     dyn = dynamic.DynamicClient(api_client.ApiClient())