diff --git a/pkg/controller/llmisvc/controller_int_test.go b/pkg/controller/llmisvc/controller_int_test.go index 6ff1818910a..a7fc1dd9576 100644 --- a/pkg/controller/llmisvc/controller_int_test.go +++ b/pkg/controller/llmisvc/controller_int_test.go @@ -21,6 +21,17 @@ import ( "fmt" "time" + "github.com/kserve/kserve/pkg/constants" + + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" + "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/client-go/util/retry" + ctrl "sigs.k8s.io/controller-runtime" + logf "sigs.k8s.io/controller-runtime/pkg/log" + igwapi "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2" + gatewayapi "sigs.k8s.io/gateway-api/apis/v1" + . "github.com/onsi/ginkgo/v2" "github.com/onsi/gomega" . "github.com/onsi/gomega" @@ -28,21 +39,12 @@ import ( appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" rbacv1 "k8s.io/api/rbac/v1" - "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/api/meta" - "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/types" - "k8s.io/client-go/util/retry" "k8s.io/utils/ptr" - "knative.dev/pkg/apis" "knative.dev/pkg/kmeta" - ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" - logf "sigs.k8s.io/controller-runtime/pkg/log" - igwapi "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2" - gatewayapi "sigs.k8s.io/gateway-api/apis/v1" "github.com/kserve/kserve/pkg/apis/serving/v1alpha1" "github.com/kserve/kserve/pkg/controller/llmisvc" @@ -183,26 +185,13 @@ var _ = Describe("LLMInferenceService Controller", func() { envTest.DeleteAll(namespace) }() - modelURL, err := apis.ParseURL("hf://facebook/opt-125m") - Expect(err).ToNot(HaveOccurred()) - - llmSvc := &v1alpha1.LLMInferenceService{ - ObjectMeta: metav1.ObjectMeta{ - Name: svcName, - Namespace: nsName, - }, - Spec: v1alpha1.LLMInferenceServiceSpec{ - Model: v1alpha1.LLMModelSpec{ - URI: *modelURL, - }, - WorkloadSpec: v1alpha1.WorkloadSpec{}, - Router: &v1alpha1.RouterSpec{ - Route: &v1alpha1.GatewayRoutesSpec{}, - Gateway: &v1alpha1.GatewaySpec{}, - Scheduler: &v1alpha1.SchedulerSpec{}, - }, - }, - } + llmSvc := LLMInferenceService(svcName, + InNamespace[*v1alpha1.LLMInferenceService](nsName), + WithModelURI("hf://facebook/opt-125m"), + WithManagedRoute(), + WithManagedGateway(), + WithManagedScheduler(), + ) // when Expect(envTest.Create(ctx, llmSvc)).To(Succeed()) @@ -254,34 +243,15 @@ var _ = Describe("LLMInferenceService Controller", func() { envTest.DeleteAll(namespace) }() - modelURL, err := apis.ParseURL("hf://facebook/opt-125m") - Expect(err).ToNot(HaveOccurred()) - infPoolName := kmeta.ChildName(svcName, "-my-inf-pool") - llmSvc := &v1alpha1.LLMInferenceService{ - ObjectMeta: metav1.ObjectMeta{ - Name: svcName, - Namespace: nsName, - }, - Spec: v1alpha1.LLMInferenceServiceSpec{ - Model: v1alpha1.LLMModelSpec{ - URI: *modelURL, - }, - WorkloadSpec: v1alpha1.WorkloadSpec{}, - Router: &v1alpha1.RouterSpec{ - Route: &v1alpha1.GatewayRoutesSpec{}, - Gateway: &v1alpha1.GatewaySpec{}, - Scheduler: &v1alpha1.SchedulerSpec{ - Pool: &v1alpha1.InferencePoolSpec{ - Ref: &corev1.LocalObjectReference{ - Name: infPoolName, - }, - }, - }, - }, - }, - } + llmSvc := LLMInferenceService(svcName, + InNamespace[*v1alpha1.LLMInferenceService](nsName), + WithModelURI("hf://facebook/opt-125m"), + WithManagedRoute(), + WithManagedGateway(), + WithInferencePoolRef(infPoolName), + ) infPool := InferencePool(infPoolName, InNamespace[*igwapi.InferencePool](nsName), @@ -339,24 +309,12 @@ var _ = Describe("LLMInferenceService Controller", func() { envTest.DeleteAll(namespace) }() - modelURL, err := apis.ParseURL("hf://facebook/opt-125m") - Expect(err).ToNot(HaveOccurred()) - - llmSvc := &v1alpha1.LLMInferenceService{ - ObjectMeta: metav1.ObjectMeta{ - Name: llmSvcName, - Namespace: nsName, - }, - Spec: v1alpha1.LLMInferenceServiceSpec{ - Model: v1alpha1.LLMModelSpec{ - URI: *modelURL, - }, - Router: &v1alpha1.RouterSpec{ - Route: &v1alpha1.GatewayRoutesSpec{}, - Gateway: &v1alpha1.GatewaySpec{}, - }, - }, - } + llmSvc := LLMInferenceService(llmSvcName, + InNamespace[*v1alpha1.LLMInferenceService](nsName), + WithModelURI("hf://facebook/opt-125m"), + WithManagedRoute(), + WithManagedGateway(), + ) // when Expect(envTest.Create(ctx, llmSvc)).To(Succeed()) @@ -420,29 +378,12 @@ var _ = Describe("LLMInferenceService Controller", func() { envTest.DeleteAll(namespace) }() - modelURL, err := apis.ParseURL("hf://facebook/opt-125m") - Expect(err).ToNot(HaveOccurred()) - - llmSvc := &v1alpha1.LLMInferenceService{ - ObjectMeta: metav1.ObjectMeta{ - Name: svcName, - Namespace: nsName, - }, - Spec: v1alpha1.LLMInferenceServiceSpec{ - Model: v1alpha1.LLMModelSpec{ - URI: *modelURL, - }, - WorkloadSpec: v1alpha1.WorkloadSpec{}, - Router: &v1alpha1.RouterSpec{ - Route: &v1alpha1.GatewayRoutesSpec{ - HTTP: &v1alpha1.HTTPRouteSpec{ - Spec: customRouteSpec(ctx, envTest.Client, nsName, "my-ingress-gateway", "my-inference-service"), - }, - }, - Gateway: &v1alpha1.GatewaySpec{}, - }, - }, - } + llmSvc := LLMInferenceService(svcName, + InNamespace[*v1alpha1.LLMInferenceService](nsName), + WithModelURI("hf://facebook/opt-125m"), + WithManagedGateway(), + WithHTTPRouteSpec(customRouteSpec(ctx, envTest.Client, nsName, "my-ingress-gateway", "my-inference-service")), + ) // when Expect(envTest.Create(ctx, llmSvc)).To(Succeed()) @@ -492,8 +433,6 @@ var _ = Describe("LLMInferenceService Controller", func() { envTest.DeleteAll(namespace) }() - modelURL, err := apis.ParseURL("hf://facebook/opt-125m") - Expect(err).ToNot(HaveOccurred()) // Create the Gateway that the router-managed preset references gateway := Gateway("my-ingress-gateway", InNamespace[*gatewayapi.Gateway](nsName), @@ -510,31 +449,12 @@ var _ = Describe("LLMInferenceService Controller", func() { Expect(envTest.Delete(ctx, gateway)).To(Succeed()) }() - llmSvc := &v1alpha1.LLMInferenceService{ - ObjectMeta: metav1.ObjectMeta{ - Name: svcName, - Namespace: nsName, - }, - Spec: v1alpha1.LLMInferenceServiceSpec{ - Model: v1alpha1.LLMModelSpec{ - URI: *modelURL, - }, - WorkloadSpec: v1alpha1.WorkloadSpec{}, - Router: &v1alpha1.RouterSpec{ - Route: &v1alpha1.GatewayRoutesSpec{ - HTTP: &v1alpha1.HTTPRouteSpec{}, - }, - Gateway: &v1alpha1.GatewaySpec{ - Refs: []v1alpha1.UntypedObjectReference{ - { - Name: "my-ingress-gateway", - Namespace: gatewayapi.Namespace(nsName), - }, - }, - }, - }, - }, - } + llmSvc := LLMInferenceService(svcName, + InNamespace[*v1alpha1.LLMInferenceService](nsName), + WithModelURI("hf://facebook/opt-125m"), + WithManagedRoute(), + WithManagedGateway(), + ) Expect(envTest.Create(ctx, llmSvc)).To(Succeed()) defer func() { @@ -549,10 +469,23 @@ var _ = Describe("LLMInferenceService Controller", func() { return nil }).WithContext(ctx).Should(Succeed()) + customHTTPRoute := HTTPRoute("my-custom-route", []HTTPRouteOption{ + InNamespace[*gatewayapi.HTTPRoute](nsName), + WithParentRef(GatewayParentRef(gateway.Name, gateway.Namespace)), + WithHTTPRouteRule( + HTTPRouteRuleWithBackendAndTimeouts(svcName+"-inference-pool", 8000, "/", "0s", "0s"), + ), + }...) + Expect(envTest.Client.Create(ctx, customHTTPRoute)).To(Succeed()) + + // Make the HTTPRoute ready + ensureHTTPRouteReady(ctx, envTest.Client, customHTTPRoute) + // when - Update the HTTPRoute spec errRetry := retry.RetryOnConflict(retry.DefaultRetry, func() error { _, errUpdate := ctrl.CreateOrUpdate(ctx, envTest.Client, llmSvc, func() error { - llmSvc.Spec.Router.Route.HTTP.Refs = []corev1.LocalObjectReference{{Name: "my-custom-route"}} + WithHTTPRouteRefs(HTTPRouteRef("my-custom-route"))(llmSvc) + WithGatewayRefs(LLMGatewayRef(gateway.Name, gateway.Namespace))(llmSvc) return nil }) return errUpdate @@ -588,9 +521,6 @@ var _ = Describe("LLMInferenceService Controller", func() { envTest.DeleteAll(namespace) }() - modelURL, err := apis.ParseURL("hf://facebook/opt-125m") - Expect(err).ToNot(HaveOccurred()) - ingressGateway := DefaultGateway(nsName) Expect(envTest.Client.Create(ctx, ingressGateway)).To(Succeed()) ensureGatewayReady(ctx, envTest.Client, ingressGateway) @@ -611,27 +541,11 @@ var _ = Describe("LLMInferenceService Controller", func() { // Make the HTTPRoute ready ensureHTTPRouteReady(ctx, envTest.Client, customHTTPRoute) - llmSvc := &v1alpha1.LLMInferenceService{ - ObjectMeta: metav1.ObjectMeta{ - Name: svcName, - Namespace: nsName, - }, - Spec: v1alpha1.LLMInferenceServiceSpec{ - Model: v1alpha1.LLMModelSpec{ - URI: *modelURL, - }, - WorkloadSpec: v1alpha1.WorkloadSpec{}, - Router: &v1alpha1.RouterSpec{ - Route: &v1alpha1.GatewayRoutesSpec{ - HTTP: &v1alpha1.HTTPRouteSpec{ - Refs: []corev1.LocalObjectReference{ - {Name: "my-custom-route"}, - }, - }, - }, - }, - }, - } + llmSvc := LLMInferenceService(svcName, + InNamespace[*v1alpha1.LLMInferenceService](nsName), + WithModelURI("hf://facebook/opt-125m"), + WithHTTPRouteRefs(HTTPRouteRef("my-custom-route")), + ) // when Expect(envTest.Create(ctx, llmSvc)).To(Succeed()) @@ -676,22 +590,11 @@ var _ = Describe("LLMInferenceService Controller", func() { envTest.DeleteAll(namespace) }() - modelURL, err := apis.ParseURL("hf://facebook/opt-125m") - Expect(err).ToNot(HaveOccurred()) - - llmSvc := &v1alpha1.LLMInferenceService{ - ObjectMeta: metav1.ObjectMeta{ - Name: svcName, - Namespace: nsName, - }, - Spec: v1alpha1.LLMInferenceServiceSpec{ - Model: v1alpha1.LLMModelSpec{ - URI: *modelURL, - }, - WorkloadSpec: v1alpha1.WorkloadSpec{}, - Router: initialRouterSpec, - }, - } + llmSvc := LLMInferenceService(svcName, + InNamespace[*v1alpha1.LLMInferenceService](nsName), + WithModelURI("hf://facebook/opt-125m"), + ) + llmSvc.Spec.Router = initialRouterSpec // when - Create LLMInferenceService Expect(envTest.Create(ctx, llmSvc)).To(Succeed()) @@ -748,6 +651,150 @@ var _ = Describe("LLMInferenceService Controller", func() { }) }) + Context("Reference validation", func() { + When("referenced Gateway does not exist", func() { + It("should mark RouterReady condition as False with InvalidRefs reason", func(ctx SpecContext) { + // given + svcName := "test-llm-gateway-ref-not-found" + nsName := kmeta.ChildName(svcName, "-test") + namespace := &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: nsName, + }, + } + Expect(envTest.Client.Create(ctx, namespace)).To(Succeed()) + defer func() { + envTest.DeleteAll(namespace) + }() + + llmSvc := LLMInferenceService(svcName, + InNamespace[*v1alpha1.LLMInferenceService](nsName), + WithModelURI("hf://facebook/opt-125m"), + WithHTTPRouteRefs(HTTPRouteRef("non-existent-route")), + WithGatewayRefs(LLMGatewayRef("non-existent-gateway", nsName)), + ) + + // when + Expect(envTest.Create(ctx, llmSvc)).To(Succeed()) + defer func() { + Expect(envTest.Delete(ctx, llmSvc)).To(Succeed()) + }() + + // then + Eventually(func(g Gomega, ctx context.Context) error { + updatedLLMSvc := &v1alpha1.LLMInferenceService{} + g.Expect(envTest.Get(ctx, client.ObjectKeyFromObject(llmSvc), updatedLLMSvc)).To(Succeed()) + + g.Expect(updatedLLMSvc.Status).To(HaveCondition(string(v1alpha1.RouterReady), "False")) + + routerCondition := updatedLLMSvc.Status.GetCondition(v1alpha1.RouterReady) + g.Expect(routerCondition).ToNot(BeNil()) + g.Expect(routerCondition.Reason).To(Equal(llmisvc.RefsInvalidReason)) + g.Expect(routerCondition.Message).To(ContainSubstring(nsName + "/non-existent-gateway does not exist")) + + return nil + }).WithContext(ctx).Should(Succeed()) + }) + }) + + When("referenced parent Gateway from HTTPRoute does not exist", func() { + It("should mark RouterReady condition as False with RefsInvalid reason", func(ctx SpecContext) { + // given + svcName := "test-llm-parent-gateway-ref-not-found" + nsName := kmeta.ChildName(svcName, "-test") + namespace := &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: nsName, + }, + } + Expect(envTest.Client.Create(ctx, namespace)).To(Succeed()) + defer func() { + envTest.DeleteAll(namespace) + }() + + // Create HTTPRoute spec that references a non-existent gateway + customRouteSpec := &HTTPRoute("temp", + WithParentRefs(GatewayParentRef("non-existent-parent-gateway", nsName)), + WithHTTPRule( + WithBackendRefs(ServiceRef("some-backend", 8000, 1)), + ), + ).Spec + + llmSvc := LLMInferenceService(svcName, + InNamespace[*v1alpha1.LLMInferenceService](nsName), + WithModelURI("hf://facebook/opt-125m"), + WithHTTPRouteSpec(customRouteSpec), + ) + + // when + Expect(envTest.Create(ctx, llmSvc)).To(Succeed()) + defer func() { + Expect(envTest.Delete(ctx, llmSvc)).To(Succeed()) + }() + + // then + Eventually(func(g Gomega, ctx context.Context) error { + updatedLLMSvc := &v1alpha1.LLMInferenceService{} + g.Expect(envTest.Get(ctx, client.ObjectKeyFromObject(llmSvc), updatedLLMSvc)).To(Succeed()) + + g.Expect(updatedLLMSvc.Status).To(HaveCondition(string(v1alpha1.RouterReady), "False")) + + routerCondition := updatedLLMSvc.Status.GetCondition(v1alpha1.RouterReady) + g.Expect(routerCondition).ToNot(BeNil()) + g.Expect(routerCondition.Reason).To(Equal(llmisvc.RefsInvalidReason)) + g.Expect(routerCondition.Message).To(ContainSubstring(fmt.Sprintf("Managed HTTPRoute references non-existent Gateway %s/non-existent-parent-gateway", nsName))) + + return nil + }).WithContext(ctx).Should(Succeed()) + }) + }) + + When("referenced HTTPRoute does not exist", func() { + It("should mark RouterReady condition as False with RefsInvalid reason", func(ctx SpecContext) { + // given + svcName := "test-llm-route-ref-not-found" + nsName := kmeta.ChildName(svcName, "-test") + namespace := &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: nsName, + }, + } + Expect(envTest.Client.Create(ctx, namespace)).To(Succeed()) + defer func() { + envTest.DeleteAll(namespace) + }() + + llmSvc := LLMInferenceService(svcName, + InNamespace[*v1alpha1.LLMInferenceService](nsName), + WithModelURI("hf://facebook/opt-125m"), + WithHTTPRouteRefs(HTTPRouteRef("non-existent-route")), + WithGatewayRefs(LLMGatewayRef(constants.GatewayName, constants.KServeNamespace)), + ) + + // when + Expect(envTest.Create(ctx, llmSvc)).To(Succeed()) + defer func() { + Expect(envTest.Delete(ctx, llmSvc)).To(Succeed()) + }() + + // then + Eventually(func(g Gomega, ctx context.Context) error { + updatedLLMSvc := &v1alpha1.LLMInferenceService{} + g.Expect(envTest.Get(ctx, client.ObjectKeyFromObject(llmSvc), updatedLLMSvc)).To(Succeed()) + + g.Expect(updatedLLMSvc.Status).To(HaveCondition(string(v1alpha1.RouterReady), "False")) + + routerCondition := updatedLLMSvc.Status.GetCondition(v1alpha1.RouterReady) + g.Expect(routerCondition).ToNot(BeNil()) + g.Expect(routerCondition.Reason).To(Equal(llmisvc.RefsInvalidReason)) + g.Expect(routerCondition.Message).To(ContainSubstring(fmt.Sprintf("HTTPRoute %s/non-existent-route does not exist", nsName))) + + return nil + }).WithContext(ctx).Should(Succeed()) + }) + }) + }) + Context("Monitoring Reconciliation", func() { It("should create monitoring resources when llmisvc is created", func(ctx SpecContext) { // given diff --git a/pkg/controller/llmisvc/fixture/llmisvc_builders.go b/pkg/controller/llmisvc/fixture/llmisvc_builders.go index beebaa343de..1016fce4fd5 100644 --- a/pkg/controller/llmisvc/fixture/llmisvc_builders.go +++ b/pkg/controller/llmisvc/fixture/llmisvc_builders.go @@ -242,6 +242,22 @@ func WithManagedScheduler() LLMInferenceServiceOption { } } +func WithInferencePoolRef(poolName string) LLMInferenceServiceOption { + return func(llmSvc *v1alpha1.LLMInferenceService) { + if llmSvc.Spec.Router == nil { + llmSvc.Spec.Router = &v1alpha1.RouterSpec{} + } + if llmSvc.Spec.Router.Scheduler == nil { + llmSvc.Spec.Router.Scheduler = &v1alpha1.SchedulerSpec{} + } + llmSvc.Spec.Router.Scheduler.Pool = &v1alpha1.InferencePoolSpec{ + Ref: &corev1.LocalObjectReference{ + Name: poolName, + }, + } + } +} + func SimpleWorkerPodSpec() *corev1.PodSpec { return &corev1.PodSpec{ Containers: []corev1.Container{ diff --git a/pkg/controller/llmisvc/router.go b/pkg/controller/llmisvc/router.go index 9adc86c8dd8..4cc04210b79 100644 --- a/pkg/controller/llmisvc/router.go +++ b/pkg/controller/llmisvc/router.go @@ -49,6 +49,10 @@ func (r *LLMInferenceServiceReconciler) reconcileRouter(ctx context.Context, llm defer llmSvc.DetermineRouterReadiness() + if err := r.validateRouterReferences(ctx, llmSvc); err != nil { + return err + } + if err := r.reconcileScheduler(ctx, llmSvc); err != nil { llmSvc.MarkSchedulerWorkloadNotReady("SchedulerReconcileError", "Failed to reconcile scheduler: %v", err.Error()) return fmt.Errorf("failed to reconcile scheduler: %w", err) @@ -103,7 +107,6 @@ func (r *LLMInferenceServiceReconciler) reconcileHTTPRoutes(ctx context.Context, return Delete(ctx, r, llmSvc, expectedHTTPRoute) } - // TODO(validation): referenced gateway exists if route.HTTP.HasSpec() { if err := Reconcile(ctx, r, llmSvc, &gatewayapi.HTTPRoute{}, expectedHTTPRoute, semanticHTTPRouteIsEqual); err != nil { return fmt.Errorf("failed to reconcile HTTPRoute %s/%s: %w", expectedHTTPRoute.GetNamespace(), expectedHTTPRoute.GetName(), err) @@ -120,18 +123,20 @@ func (r *LLMInferenceServiceReconciler) collectReferencedRoutes(ctx context.Cont } referencedRoutes := make([]*gatewayapi.HTTPRoute, 0, len(llmSvc.Spec.Router.Route.HTTP.Refs)) + for _, routeRef := range llmSvc.Spec.Router.Route.HTTP.Refs { route := &gatewayapi.HTTPRoute{} if err := r.Client.Get(ctx, types.NamespacedName{Namespace: llmSvc.GetNamespace(), Name: routeRef.Name}, route); err != nil { if apierrors.IsNotFound(err) { - // TODO(follow-up) mark condition if not found + // Skip missing routes - validation is handled separately continue } - return referencedRoutes, fmt.Errorf("failed to get HTTPRoute %s/%s: %w", routeRef.Name, llmSvc.GetName(), err) + return referencedRoutes, fmt.Errorf("failed to get HTTPRoute %s/%s: %w", llmSvc.GetName(), routeRef.Name, err) } referencedRoutes = append(referencedRoutes, route) } + return referencedRoutes, nil } @@ -237,6 +242,13 @@ func (r *LLMInferenceServiceReconciler) EvaluateGatewayConditions(ctx context.Co return nil } + // Check if there's already a validation failure condition set + condition := llmSvc.GetStatus().GetCondition(v1alpha1.GatewaysReady) + if condition != nil && condition.IsFalse() && condition.Reason == RefsInvalidReason { + logger.Info("Gateway validation failed, skipping readiness evaluation", "reason", condition.Reason, "message", condition.Message) + return nil + } + gateways, err := r.CollectReferencedGateways(ctx, llmSvc) if err != nil { llmSvc.MarkGatewaysNotReady("GatewayFetchError", "Failed to fetch referenced Gateways: %v", err.Error()) @@ -324,6 +336,13 @@ func (r *LLMInferenceServiceReconciler) EvaluateHTTPRouteConditions(ctx context. return nil } + // Check if there's already a validation failure condition set + condition := llmSvc.GetStatus().GetCondition(v1alpha1.HTTPRoutesReady) + if condition != nil && condition.IsFalse() && condition.Reason == RefsInvalidReason { + logger.Info("HTTPRoute validation failed, skipping readiness evaluation", "reason", condition.Reason, "message", condition.Message) + return nil + } + // Collect all HTTPRoutes (both referenced and managed) var allRoutes []*gatewayapi.HTTPRoute diff --git a/pkg/controller/llmisvc/router_validation.go b/pkg/controller/llmisvc/router_validation.go new file mode 100644 index 00000000000..d0c6e6f194d --- /dev/null +++ b/pkg/controller/llmisvc/router_validation.go @@ -0,0 +1,264 @@ +/* +Copyright 2025 The KServe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package llmisvc + +import ( + "context" + "errors" + "fmt" + "strings" + + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/log" + gatewayapi "sigs.k8s.io/gateway-api/apis/v1" + + "github.com/kserve/kserve/pkg/apis/serving/v1alpha1" +) + +const ( + RefsInvalidReason = "RefsInvalid" +) + +// ValidationError represents a validation failure that should be reported via conditions +// rather than causing reconciliation to fail completely +type ValidationError struct { + message string +} + +func (e *ValidationError) Error() string { + return e.message +} + +func NewValidationError(msg string) *ValidationError { + return &ValidationError{ + message: msg, + } +} + +func IsValidationError(err error) bool { + var validationError *ValidationError + return errors.As(err, &validationError) +} + +// validateRouterReferences performs comprehensive validation of all router-related references +// including gateway references, HTTPRoute references, managed HTTPRoute specs, and route targets. +// It handles condition marking internally and returns validation or unexpected errors. +func (r *LLMInferenceServiceReconciler) validateRouterReferences(ctx context.Context, llmSvc *v1alpha1.LLMInferenceService) error { + logger := log.FromContext(ctx).WithName("validateRouterReferences") + + if err := r.validateGatewayReferences(ctx, llmSvc); err != nil { + if IsValidationError(err) { + llmSvc.MarkGatewaysNotReady(RefsInvalidReason, err.Error()) + logger.Info("Gateway reference validation failed", "error", err) + return err + } + return fmt.Errorf("gateway reference validation failed: %w", err) + } + + if err := r.validateHTTPRouteReferences(ctx, llmSvc); err != nil { + if IsValidationError(err) { + llmSvc.MarkHTTPRoutesNotReady(RefsInvalidReason, err.Error()) + logger.Info("HTTPRoute reference validation failed", "error", err) + return err + } + return fmt.Errorf("HTTPRoute reference validation failed: %w", err) + } + + if err := r.validateManagedHTTPRouteSpec(ctx, llmSvc); err != nil { + if IsValidationError(err) { + llmSvc.MarkHTTPRoutesNotReady(RefsInvalidReason, err.Error()) + logger.Info("Managed HTTPRoute spec validation failed", "error", err) + return err + } + return fmt.Errorf("managed HTTPRoute spec validation failed: %w", err) + } + + if llmSvc.Spec.Router != nil && llmSvc.Spec.Router.Route != nil && llmSvc.Spec.Router.Route.HTTP.HasRefs() { + referencedRoutes, err := r.collectReferencedRoutes(ctx, llmSvc) + if err != nil { + return fmt.Errorf("failed to collect referenced routes for target validation: %w", err) + } + + if err := r.validateHTTPRouteTargets(ctx, referencedRoutes); err != nil { + if IsValidationError(err) { + llmSvc.MarkHTTPRoutesNotReady(RefsInvalidReason, err.Error()) + logger.Info("HTTPRoute target validation failed", "error", err) + return err + } + return fmt.Errorf("HTTPRoute target validation failed: %w", err) + } + } + + return nil +} + +// validateGatewayReferences checks if all referenced gateways exist +func (r *LLMInferenceServiceReconciler) validateGatewayReferences(ctx context.Context, llmSvc *v1alpha1.LLMInferenceService) error { + logger := log.FromContext(ctx).WithName("validateGatewayReferences") + + // If no router or gateway configuration, skip validation + if llmSvc.Spec.Router == nil || llmSvc.Spec.Router.Gateway == nil || !llmSvc.Spec.Router.Gateway.HasRefs() { + return nil + } + + var missingGateways []string + + for _, ref := range llmSvc.Spec.Router.Gateway.Refs { + gateway := &gatewayapi.Gateway{} + gatewayKey := types.NamespacedName{ + Name: string(ref.Name), + Namespace: string(ref.Namespace), + } + + if gatewayKey.Namespace == "" { + gatewayKey.Namespace = llmSvc.GetNamespace() + } + + err := r.Client.Get(ctx, gatewayKey, gateway) + if err != nil { + if apierrors.IsNotFound(err) { + missingGateways = append(missingGateways, fmt.Sprintf("Gateway %s/%s does not exist", gatewayKey.Namespace, gatewayKey.Name)) + continue + } + logger.Error(err, "Error fetching Gateway", "name", gatewayKey.Name, "namespace", gatewayKey.Namespace) + return fmt.Errorf("failed to get Gateway %s: %w", gatewayKey, err) + } + } + + if len(missingGateways) > 0 { + message := strings.Join(missingGateways, "; ") + return NewValidationError(message) + } + + return nil +} + +// validateHTTPRouteReferences checks if all referenced HTTPRoutes exist +func (r *LLMInferenceServiceReconciler) validateHTTPRouteReferences(ctx context.Context, llmSvc *v1alpha1.LLMInferenceService) error { + // If no router or route configuration, skip validation + if llmSvc.Spec.Router == nil || llmSvc.Spec.Router.Route == nil || !llmSvc.Spec.Router.Route.HTTP.HasRefs() { + return nil + } + + var missingRoutes []string + + for _, routeRef := range llmSvc.Spec.Router.Route.HTTP.Refs { + route := &gatewayapi.HTTPRoute{} + if err := r.Client.Get(ctx, types.NamespacedName{Namespace: llmSvc.GetNamespace(), Name: routeRef.Name}, route); err != nil { + if apierrors.IsNotFound(err) { + missingRoutes = append(missingRoutes, fmt.Sprintf("HTTPRoute %s/%s does not exist", llmSvc.GetNamespace(), routeRef.Name)) + continue + } + return fmt.Errorf("failed to get HTTPRoute %s/%s: %w", llmSvc.GetNamespace(), routeRef.Name, err) + } + } + + if len(missingRoutes) > 0 { + message := strings.Join(missingRoutes, "; ") + return NewValidationError(message) + } + + return nil +} + +// validateHTTPRouteTargets checks if referenced HTTPRoutes properly target the inference service +func (r *LLMInferenceServiceReconciler) validateHTTPRouteTargets(ctx context.Context, routes []*gatewayapi.HTTPRoute) error { + logger := log.FromContext(ctx).WithName("validateHTTPRouteTargets") + + var targetErrors []string + + for _, route := range routes { + if len(route.Spec.ParentRefs) == 0 { + targetErrors = append(targetErrors, fmt.Sprintf("HTTPRoute %s/%s has no parent gateway references", route.Namespace, route.Name)) + continue + } + + for _, parentRef := range route.Spec.ParentRefs { + gatewayKey := types.NamespacedName{ + Name: string(parentRef.Name), + } + + if parentRef.Namespace != nil { + gatewayKey.Namespace = string(*parentRef.Namespace) + } else { + gatewayKey.Namespace = route.Namespace + } + + gateway := &gatewayapi.Gateway{} + err := r.Client.Get(ctx, gatewayKey, gateway) + if err != nil { + if apierrors.IsNotFound(err) { + targetErrors = append(targetErrors, fmt.Sprintf("HTTPRoute %s/%s references non-existent Gateway %s/%s", route.Namespace, route.Name, gatewayKey.Namespace, gatewayKey.Name)) + } else { + logger.Error(err, "Error fetching parent Gateway", "route", route.Name, "gateway", gatewayKey) + targetErrors = append(targetErrors, fmt.Sprintf("HTTPRoute %s/%s failed to validate parent Gateway %s/%s: %v", route.Namespace, route.Name, gatewayKey.Namespace, gatewayKey.Name, err)) + } + } + } + } + + if len(targetErrors) > 0 { + message := strings.Join(targetErrors, "; ") + return NewValidationError("HTTPRoute target validation failed: " + message) + } + + return nil +} + +// validateManagedHTTPRouteSpec checks if managed HTTPRoute spec has valid parent gateway references +func (r *LLMInferenceServiceReconciler) validateManagedHTTPRouteSpec(ctx context.Context, llmSvc *v1alpha1.LLMInferenceService) error { + logger := log.FromContext(ctx).WithName("validateManagedHTTPRouteSpec") + + // Only validate if there's a managed HTTPRoute spec + if llmSvc.Spec.Router == nil || llmSvc.Spec.Router.Route == nil || !llmSvc.Spec.Router.Route.HTTP.HasSpec() { + return nil + } + + var targetErrors []string + httpSpec := llmSvc.Spec.Router.Route.HTTP.Spec + + for _, parentRef := range httpSpec.ParentRefs { + gatewayKey := types.NamespacedName{ + Name: string(parentRef.Name), + } + + if parentRef.Namespace != nil { + gatewayKey.Namespace = string(*parentRef.Namespace) + } else { + gatewayKey.Namespace = llmSvc.GetNamespace() + } + + gateway := &gatewayapi.Gateway{} + err := r.Client.Get(ctx, gatewayKey, gateway) + if err != nil { + if apierrors.IsNotFound(err) { + targetErrors = append(targetErrors, fmt.Sprintf("Managed HTTPRoute references non-existent Gateway %s/%s", gatewayKey.Namespace, gatewayKey.Name)) + } else { + logger.Error(err, "Error fetching parent Gateway for managed route", "gateway", gatewayKey) + return fmt.Errorf("failed to validate parent Gateway %s/%s: %w", gatewayKey.Namespace, gatewayKey.Name, err) + } + } + } + + if len(targetErrors) > 0 { + message := strings.Join(targetErrors, "; ") + return NewValidationError(message) + } + + return nil +}