Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 0 additions & 8 deletions charts/kserve-resources/templates/clusterrole.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -186,14 +186,6 @@ rules:
- get
- patch
- update
- apiGroups:
- operator.knative.dev
resources:
- knativeservings
verbs:
- get
- list
- watch
- apiGroups:
- rbac.authorization.k8s.io
resourceNames:
Expand Down
14 changes: 0 additions & 14 deletions cmd/manager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ import (
typedcorev1 "k8s.io/client-go/kubernetes/typed/core/v1"
_ "k8s.io/client-go/plugin/pkg/client/auth/gcp"
"k8s.io/client-go/tools/record"
operatorv1beta1 "knative.dev/operator/pkg/apis/operator/v1beta1"
knservingv1 "knative.dev/serving/pkg/apis/serving/v1"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client/config"
Expand Down Expand Up @@ -191,19 +190,6 @@ func main() {
}
}

knServingFound, knServingCheckErr := utils.IsCrdAvailable(cfg, operatorv1beta1.SchemeGroupVersion.String(), constants.KnativeServingKind)
if knServingCheckErr != nil {
setupLog.Error(knServingCheckErr, "error when checking if Knative KnativeServing kind is available")
os.Exit(1)
}
if knServingFound {
setupLog.Info("Setting up Knative Operator scheme")
if err := operatorv1beta1.AddToScheme(mgr.GetScheme()); err != nil {
setupLog.Error(err, "unable to add Knative Operator APIs to scheme")
os.Exit(1)
}
}

if !ingressConfig.DisableIstioVirtualHost {
vsFound, vsCheckErr := utils.IsCrdAvailable(cfg, istioclientv1beta1.SchemeGroupVersion.String(), constants.IstioVirtualServiceKind)
if vsCheckErr != nil {
Expand Down
1 change: 0 additions & 1 deletion config/configmap/inferenceservice.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@ data:
# revisions, which prevents the reconciliation loop to be triggered if the annotations is
# configured here are used.
# Default values are:
# "autoscaling.knative.dev/initial-scale",
# "autoscaling.knative.dev/min-scale",
# "autoscaling.knative.dev/max-scale",
# "internal.serving.kserve.io/storage-initializer-sourceuri",
Expand Down
1 change: 0 additions & 1 deletion config/overlays/odh/inferenceservice-config-patch.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,6 @@ data:
inferenceService: |-
{
"serviceAnnotationDisallowedList": [
"autoscaling.knative.dev/initial-scale",
"autoscaling.knative.dev/min-scale",
"autoscaling.knative.dev/max-scale",
"internal.serving.kserve.io/storage-initializer-sourceuri",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,6 @@ data:
inferenceService: |-
{
"serviceAnnotationDisallowedList": [
"autoscaling.knative.dev/initial-scale",
"autoscaling.knative.dev/min-scale",
"autoscaling.knative.dev/max-scale",
"internal.serving.kserve.io/storage-initializer-sourceuri",
Expand Down
8 changes: 0 additions & 8 deletions config/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -173,14 +173,6 @@ rules:
- get
- patch
- update
- apiGroups:
- operator.knative.dev
resources:
- knativeservings
verbs:
- get
- list
- watch
- apiGroups:
- rbac.authorization.k8s.io
resourceNames:
Expand Down
1 change: 0 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ require (
k8s.io/kube-openapi v0.0.0-20250304201544-e5f78fe3ede9
k8s.io/utils v0.0.0-20241210054802-24370beab758
knative.dev/networking v0.0.0-20250117155906-67d1c274ba6a
knative.dev/operator v0.42.2
knative.dev/pkg v0.0.0-20250117084104-c43477f0052b
knative.dev/serving v0.44.0
sigs.k8s.io/controller-runtime v0.19.1
Expand Down
6 changes: 0 additions & 6 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -397,8 +397,6 @@ github.com/linode/linodego v1.40.0 h1:7ESY0PwK94hoggoCtIroT1Xk6b1flrFBNZ6KwqbTql
github.com/linode/linodego v1.40.0/go.mod h1:NsUw4l8QrLdIofRg1NYFBbW5ZERnmbZykVBszPZLORM=
github.com/mailru/easyjson v0.9.0 h1:PrnmzHw7262yW8sTBwxi1PdJA3Iw/EKBa8psRf7d9a4=
github.com/mailru/easyjson v0.9.0/go.mod h1:1+xMtQp2MRNVL/V1bOzuP3aP8VNwRW55fQUto+XFtTU=
github.com/manifestival/manifestival v0.7.2 h1:l4uFdWX/xQK4QcRfqGoMtBvaZeWPEuwD6hVsCwUqZY4=
github.com/manifestival/manifestival v0.7.2/go.mod h1:nl3T6HlfHCeidooWVTMI9vYNTBkQ1GdhLNb+smozbdk=
github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
Expand Down Expand Up @@ -959,12 +957,8 @@ k8s.io/kube-openapi v0.0.0-20250304201544-e5f78fe3ede9 h1:t0huyHnz6HsokckRxAF1bY
k8s.io/kube-openapi v0.0.0-20250304201544-e5f78fe3ede9/go.mod h1:5jIi+8yX4RIb8wk3XwBo5Pq2ccx4FP10ohkbSKCZoK8=
k8s.io/utils v0.0.0-20241210054802-24370beab758 h1:sdbE21q2nlQtFh65saZY+rRM6x6aJJI8IUa1AmH/qa0=
k8s.io/utils v0.0.0-20241210054802-24370beab758/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
knative.dev/caching v0.0.0-20250117155405-a76aa7cd2bb6 h1:a6oO186Ibm9iBmy9GzJPlJCVJnPPCppwo8NEm12Nnus=
knative.dev/caching v0.0.0-20250117155405-a76aa7cd2bb6/go.mod h1:xCMZSPoup5BSZ5GQ/Xa8xTEWNIZLLHx9mhPMeREt/ck=
knative.dev/networking v0.0.0-20250117155906-67d1c274ba6a h1:FaDPXtv42+AkYh/mE269pttPSZ3fDVAjJiEsYUaM4SM=
knative.dev/networking v0.0.0-20250117155906-67d1c274ba6a/go.mod h1:AIKYMfZydhwXR/60c/3KXEnqEnH6aNEEqulifdqJVcQ=
knative.dev/operator v0.42.2 h1:wgAWYHwoSFmV+wPHCt5dZahHTHLy2VCM4G82PEo9iSc=
knative.dev/operator v0.42.2/go.mod h1:cfSpJMgvwmuZ7USaxC+zgEuizMFc/xweREW5DG6J1DA=
knative.dev/pkg v0.0.0-20250117084104-c43477f0052b h1:a+gP7Yzu5NmoX2w1p8nfTgmSKF+aHLKGzqYT82ijJTw=
knative.dev/pkg v0.0.0-20250117084104-c43477f0052b/go.mod h1:bedSpkdLybR6JhL1J7XDLpd+JMKM/x8M5Apr80i5TeE=
knative.dev/serving v0.44.0 h1:c6TXhoSAI6eXt0/1ET3C69jMWYA4ES9FskSan/fBaac=
Expand Down
5 changes: 0 additions & 5 deletions pkg/constants/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -283,11 +283,8 @@ type InferenceServiceProtocol string

// Knative constants
const (
AutoscalerKey = "autoscaler"
AutoscalerInitialScaleKey = "initial-scale"
AutoscalerConfigmapName = "config-autoscaler"
AutoscalerAllowZeroScaleKey = "allow-zero-initial-scale"
DefaultKnServingName = "knative-serving"
DefaultKnServingNamespace = "knative-serving"
KnativeLocalGateway = "knative-serving/knative-local-gateway"
KnativeIngressGateway = "knative-serving/knative-ingress-gateway"
Expand Down Expand Up @@ -407,7 +404,6 @@ var (
// revisions, which prevents the reconciliation loop to be triggered if the annotations is
// configured here are used.
ServiceAnnotationDisallowedList = []string{
autoscaling.InitialScaleAnnotationKey,
autoscaling.MinScaleAnnotationKey,
autoscaling.MaxScaleAnnotationKey,
StorageInitializerSourceUriInternalAnnotationKey,
Expand Down Expand Up @@ -533,7 +529,6 @@ const (
const (
IstioVirtualServiceKind = "VirtualService"
KnativeServiceKind = "Service"
KnativeServingKind = "KnativeServing"
HTTPRouteKind = "HTTPRoute"
GatewayKind = "Gateway"
ServiceKind = "Service"
Expand Down
18 changes: 2 additions & 16 deletions pkg/controller/v1alpha1/inferencegraph/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ limitations under the License.
// +kubebuilder:rbac:groups=serving.knative.dev,resources=services/status,verbs=get;update;patch
// +kubebuilder:rbac:groups=route.openshift.io,resources=routes,verbs=create;get;update;patch;watch;delete
// +kubebuilder:rbac:groups=route.openshift.io,resources=routes/status,verbs=get
// +kubebuilder:rbac:groups=operator.knative.dev,resources=knativeservings,verbs=get;list;watch
package inferencegraph

import (
Expand All @@ -48,7 +47,6 @@ import (
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
"k8s.io/client-go/tools/record"
operatorv1beta1 "knative.dev/operator/pkg/apis/operator/v1beta1"
"knative.dev/pkg/apis"
knservingv1 "knative.dev/serving/pkg/apis/serving/v1"
ctrl "sigs.k8s.io/controller-runtime"
Expand Down Expand Up @@ -285,19 +283,7 @@ func (r *InferenceGraphReconciler) Reconcile(ctx context.Context, req ctrl.Reque
if !ksvcAvailable {
r.Recorder.Event(graph, corev1.EventTypeWarning, "ServerlessModeRejected",
"It is not possible to use Serverless deployment mode when Knative Services are not available")
return reconcile.Result{Requeue: false}, reconcile.TerminalError(fmt.Errorf("the resolved deployment mode of InferenceGraph '%s' is Serverless, but Knative Services are not available", graph.Name))
}

// Abort if Knative KnativeServings are not available
knServingFound, knServingCheckErr := utils.IsCrdAvailable(r.ClientConfig, operatorv1beta1.SchemeGroupVersion.String(), constants.KnativeServingKind)
if knServingCheckErr != nil {
return reconcile.Result{}, knServingCheckErr
}

if !knServingFound {
r.Recorder.Event(graph, corev1.EventTypeWarning, "ServerlessModeRejected",
"It is not possible to use Serverless deployment mode when Knative KnativeServings are not available")
return reconcile.Result{Requeue: false}, reconcile.TerminalError(fmt.Errorf("the resolved deployment mode of InferenceGraph '%s' is Serverless, but Knative KnativeServings are not available", graph.Name))
return reconcile.Result{Requeue: false}, reconcile.TerminalError(fmt.Errorf("the resolved deployment mode of InferenceGraph '%s' is Serverless, but Knative Serving is not available", graph.Name))
}

// Retrieve the allow-zero-initial-scale value from the knative autoscaler configuration.
Expand All @@ -306,7 +292,7 @@ func (r *InferenceGraphReconciler) Reconcile(ctx context.Context, req ctrl.Reque
return ctrl.Result{}, errors.Wrapf(err, "failed to retrieve the knative autoscaler configuration")
}

knutils.ValidateInitialScaleAnnotation(graph.Annotations, allowZeroInitialScale, r.Log)
knutils.ValidateInitialScaleAnnotation(graph.Annotations, allowZeroInitialScale, graph.Spec.MinReplicas, r.Log)

desired := createKnativeService(graph.ObjectMeta, graph, routerConfig)

Expand Down
115 changes: 112 additions & 3 deletions pkg/controller/v1alpha1/inferencegraph/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,61 @@ var _ = Describe("Inference Graph controller test", func() {
Expect(k8sClient.Create(ctx, ig)).Should(Succeed())
defer k8sClient.Delete(ctx, ig)

actualService := &knservingv1.Service{}
Eventually(func() error {
return k8sClient.Get(context.TODO(), serviceKey, actualService)
}, timeout).
Should(Succeed())

Expect(actualService.Spec.Template.Annotations).NotTo(HaveKey(autoscaling.InitialScaleAnnotationKey))
})
})
When("a Serverless InferenceGraph is created with zero min replicas", func() {
It("should use the default initial scale value", func() {
// Create configmap
configMap := &corev1.ConfigMap{
ObjectMeta: metav1.ObjectMeta{
Name: constants.InferenceServiceConfigMapName,
Namespace: constants.KServeNamespace,
},
Data: configs,
}
Expect(k8sClient.Create(context.TODO(), configMap)).NotTo(HaveOccurred())
defer k8sClient.Delete(context.TODO(), configMap)

// Create InferenceGraph
graphName := "initialscale4"
expectedRequest := reconcile.Request{NamespacedName: types.NamespacedName{Name: graphName, Namespace: "default"}}
serviceKey := expectedRequest.NamespacedName
ctx := context.Background()
var minScale int32 = 0
ig := &v1alpha1.InferenceGraph{
ObjectMeta: metav1.ObjectMeta{
Name: serviceKey.Name,
Namespace: serviceKey.Namespace,
Annotations: map[string]string{
"serving.kserve.io/deploymentMode": string(constants.Serverless),
},
},
Spec: v1alpha1.InferenceGraphSpec{
MinReplicas: &minScale,
Nodes: map[string]v1alpha1.InferenceRouter{
v1alpha1.GraphRootNodeName: {
RouterType: v1alpha1.Sequence,
Steps: []v1alpha1.InferenceStep{
{
InferenceTarget: v1alpha1.InferenceTarget{
ServiceURL: "http://someservice.exmaple.com",
},
},
},
},
},
},
}
Expect(k8sClient.Create(ctx, ig)).Should(Succeed())
defer k8sClient.Delete(ctx, ig)

actualService := &knservingv1.Service{}
Eventually(func() error {
return k8sClient.Get(context.TODO(), serviceKey, actualService)
Expand Down Expand Up @@ -291,7 +346,7 @@ var _ = Describe("Inference Graph controller test", func() {
Expect(k8sClient.Create(context.TODO(), configMap)).NotTo(HaveOccurred())
defer k8sClient.Delete(context.TODO(), configMap)

graphName := "initialscale4"
graphName := "initialscale5"
expectedRequest := reconcile.Request{NamespacedName: types.NamespacedName{Name: graphName, Namespace: "default"}}
serviceKey := expectedRequest.NamespacedName
ctx := context.Background()
Expand Down Expand Up @@ -346,7 +401,7 @@ var _ = Describe("Inference Graph controller test", func() {
Expect(k8sClient.Create(context.TODO(), configMap)).NotTo(HaveOccurred())
defer k8sClient.Delete(context.TODO(), configMap)

graphName := "initialscale5"
graphName := "initialscale6"
expectedRequest := reconcile.Request{NamespacedName: types.NamespacedName{Name: graphName, Namespace: "default"}}
serviceKey := expectedRequest.NamespacedName
ctx := context.Background()
Expand Down Expand Up @@ -401,7 +456,7 @@ var _ = Describe("Inference Graph controller test", func() {
Expect(k8sClient.Create(context.TODO(), configMap)).NotTo(HaveOccurred())
defer k8sClient.Delete(context.TODO(), configMap)

graphName := "initialscale6"
graphName := "initialscale7"
expectedRequest := reconcile.Request{NamespacedName: types.NamespacedName{Name: graphName, Namespace: "default"}}
serviceKey := expectedRequest.NamespacedName
ctx := context.Background()
Expand Down Expand Up @@ -443,6 +498,60 @@ var _ = Describe("Inference Graph controller test", func() {
Expect(actualService.Spec.Template.Annotations).NotTo(HaveKey(autoscaling.InitialScaleAnnotationKey))
})
})
When("a Serverless InferenceGraph is created with zero min replicas", func() {
It("should override the default initial scale value with zero", func() {
// Create configmap
configMap := &corev1.ConfigMap{
ObjectMeta: metav1.ObjectMeta{
Name: constants.InferenceServiceConfigMapName,
Namespace: constants.KServeNamespace,
},
Data: configs,
}
Expect(k8sClient.Create(context.TODO(), configMap)).NotTo(HaveOccurred())
defer k8sClient.Delete(context.TODO(), configMap)

graphName := "initialscale8"
expectedRequest := reconcile.Request{NamespacedName: types.NamespacedName{Name: graphName, Namespace: "default"}}
serviceKey := expectedRequest.NamespacedName
ctx := context.Background()
var minScale int32 = 0
ig := &v1alpha1.InferenceGraph{
ObjectMeta: metav1.ObjectMeta{
Name: serviceKey.Name,
Namespace: serviceKey.Namespace,
Annotations: map[string]string{
"serving.kserve.io/deploymentMode": string(constants.Serverless),
},
},
Spec: v1alpha1.InferenceGraphSpec{
MinReplicas: &minScale,
Nodes: map[string]v1alpha1.InferenceRouter{
v1alpha1.GraphRootNodeName: {
RouterType: v1alpha1.Sequence,
Steps: []v1alpha1.InferenceStep{
{
InferenceTarget: v1alpha1.InferenceTarget{
ServiceURL: "http://someservice.exmaple.com",
},
},
},
},
},
},
}
Expect(k8sClient.Create(ctx, ig)).Should(Succeed())
defer k8sClient.Delete(ctx, ig)

actualService := &knservingv1.Service{}
Eventually(func() error {
return k8sClient.Get(context.TODO(), serviceKey, actualService)
}, timeout).
Should(Succeed())

Expect(actualService.Spec.Template.Annotations[autoscaling.InitialScaleAnnotationKey]).To(Equal("0"))
})
})
})

Context("When creating an inferencegraph with headers in global config", func() {
Expand Down
20 changes: 0 additions & 20 deletions pkg/controller/v1alpha1/inferencegraph/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,6 @@ import (
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/kubernetes/scheme"
"k8s.io/client-go/rest"
"knative.dev/operator/pkg/apis/operator/base"
operatorv1beta1 "knative.dev/operator/pkg/apis/operator/v1beta1"
knservingv1 "knative.dev/serving/pkg/apis/serving/v1"

ctrl "sigs.k8s.io/controller-runtime"
Expand Down Expand Up @@ -134,24 +132,6 @@ var _ = BeforeSuite(func() {
}
Expect(k8sClient.Create(context.Background(), configAutoscaler)).Should(Succeed())

// Create knativeserving custom resource
knativeCr := &operatorv1beta1.KnativeServing{
ObjectMeta: metav1.ObjectMeta{
Name: constants.DefaultKnServingName,
Namespace: constants.DefaultKnServingNamespace,
},
Spec: operatorv1beta1.KnativeServingSpec{
CommonSpec: base.CommonSpec{
Config: base.ConfigMapData{
"autoscaler": map[string]string{
"allow-zero-initial-scale": "true",
},
},
},
},
}
Expect(k8sClient.Create(context.Background(), knativeCr)).Should(Succeed())

deployConfig := &v1beta1.DeployConfig{DefaultDeploymentMode: "Serverless"}

err = (&InferenceGraphReconciler{
Expand Down
Loading
Loading