diff --git a/Makefile b/Makefile index 748f2c351..a7995b3e6 100644 --- a/Makefile +++ b/Makefile @@ -358,7 +358,7 @@ PROTOC_GEN_GO_GRPC ?= $(LOCALBIN)/protoc-gen-go-grpc ## Tool Versions KUSTOMIZE_VERSION ?= v5.1.1 -VGOPATH_VERSION ?= v0.1.3 +VGOPATH_VERSION ?= v0.1.10 CONTROLLER_TOOLS_VERSION ?= v0.20.0 GEN_CRD_API_REFERENCE_DOCS_VERSION ?= v0.3.0 ADDLICENSE_VERSION ?= v1.1.1 diff --git a/api/compute/v1alpha1/common.go b/api/compute/v1alpha1/common.go index 661711ed0..9714e2d62 100644 --- a/api/compute/v1alpha1/common.go +++ b/api/compute/v1alpha1/common.go @@ -9,6 +9,10 @@ import ( corev1 "k8s.io/api/core/v1" ) +const ( + NamespaceMachinePoolLease = "ironcore-machinepool-lease" +) + const ( MachineMachinePoolRefNameField = "spec.machinePoolRef.name" MachineMachineClassRefNameField = "spec.machineClassRef.name" diff --git a/api/compute/v1alpha1/conditions.go b/api/compute/v1alpha1/conditions.go new file mode 100644 index 000000000..3110139aa --- /dev/null +++ b/api/compute/v1alpha1/conditions.go @@ -0,0 +1,46 @@ +// SPDX-FileCopyrightText: 2026 SAP SE or an SAP affiliate company and IronCore contributors +// SPDX-License-Identifier: Apache-2.0 + +package v1alpha1 + +import ( + "slices" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// FindMachinePoolCondition returns a pointer to the condition of the given type, +// or nil if no condition of that type is present. +func FindMachinePoolCondition(conditions []MachinePoolCondition, typ MachinePoolConditionType) *MachinePoolCondition { + idx := slices.IndexFunc(conditions, func(cond MachinePoolCondition) bool { + return cond.Type == typ + }) + if idx < 0 { + return nil + } + return &conditions[idx] +} + +// SetMachinePoolCondition inserts or updates a condition of the given type in the +// conditions slice. LastUpdateTime is always set to now. LastTransitionTime is set +// to now only when the condition is newly inserted or its Status differs from the +// previous value. +func SetMachinePoolCondition(conditions []MachinePoolCondition, cond MachinePoolCondition) []MachinePoolCondition { + idx := slices.IndexFunc(conditions, func(c MachinePoolCondition) bool { + return c.Type == cond.Type + }) + + cond.LastUpdateTime = metav1.Now() + + if idx < 0 || conditions[idx].Status != cond.Status { + cond.LastTransitionTime = metav1.Now() + } else { + cond.LastTransitionTime = conditions[idx].LastTransitionTime + } + + if idx < 0 { + return append(conditions, cond) + } + conditions[idx] = cond + return conditions +} diff --git a/api/compute/v1alpha1/conditions_test.go b/api/compute/v1alpha1/conditions_test.go new file mode 100644 index 000000000..998480a48 --- /dev/null +++ b/api/compute/v1alpha1/conditions_test.go @@ -0,0 +1,83 @@ +// SPDX-FileCopyrightText: 2026 SAP SE or an SAP affiliate company and IronCore contributors +// SPDX-License-Identifier: Apache-2.0 + +package v1alpha1_test + +import ( + "testing" + "time" + + computev1alpha1 "github.com/ironcore-dev/ironcore/api/compute/v1alpha1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +func TestFindMachinePoolCondition_returnsMatch(t *testing.T) { + conds := []computev1alpha1.MachinePoolCondition{ + {Type: "Other", Status: corev1.ConditionTrue}, + {Type: computev1alpha1.MachinePoolReady, Status: corev1.ConditionFalse, Reason: "X"}, + } + got := computev1alpha1.FindMachinePoolCondition(conds, computev1alpha1.MachinePoolReady) + if got == nil || got.Reason != "X" { + t.Fatalf("expected Ready condition with reason X, got %+v", got) + } +} + +func TestFindMachinePoolCondition_returnsNilWhenMissing(t *testing.T) { + conds := []computev1alpha1.MachinePoolCondition{{Type: "Other"}} + if got := computev1alpha1.FindMachinePoolCondition(conds, computev1alpha1.MachinePoolReady); got != nil { + t.Fatalf("expected nil, got %+v", got) + } +} + +func TestSetMachinePoolCondition_appendsWhenAbsent(t *testing.T) { + out := computev1alpha1.SetMachinePoolCondition(nil, computev1alpha1.MachinePoolCondition{ + Type: computev1alpha1.MachinePoolReady, + Status: corev1.ConditionTrue, + }) + if len(out) != 1 || out[0].Type != computev1alpha1.MachinePoolReady { + t.Fatalf("expected Ready appended, got %+v", out) + } + if out[0].LastTransitionTime.IsZero() { + t.Fatal("expected LastTransitionTime to be set on first append") + } + if out[0].LastUpdateTime.IsZero() { + t.Fatal("expected LastUpdateTime to be set on first append") + } +} + +func TestSetMachinePoolCondition_updatesInPlaceWithoutTransition(t *testing.T) { + earlier := metav1.NewTime(time.Now().Add(-time.Hour)) + in := []computev1alpha1.MachinePoolCondition{{ + Type: computev1alpha1.MachinePoolReady, + Status: corev1.ConditionTrue, + LastTransitionTime: earlier, + }} + out := computev1alpha1.SetMachinePoolCondition(in, computev1alpha1.MachinePoolCondition{ + Type: computev1alpha1.MachinePoolReady, + Status: corev1.ConditionTrue, // same status + Message: "still ready", + }) + if !out[0].LastTransitionTime.Equal(&earlier) { + t.Fatalf("expected LastTransitionTime preserved when status unchanged, got %v", out[0].LastTransitionTime) + } + if out[0].LastUpdateTime.IsZero() { + t.Fatal("expected LastUpdateTime advanced") + } +} + +func TestSetMachinePoolCondition_advancesTransitionWhenStatusChanges(t *testing.T) { + earlier := metav1.NewTime(time.Now().Add(-time.Hour)) + in := []computev1alpha1.MachinePoolCondition{{ + Type: computev1alpha1.MachinePoolReady, + Status: corev1.ConditionTrue, + LastTransitionTime: earlier, + }} + out := computev1alpha1.SetMachinePoolCondition(in, computev1alpha1.MachinePoolCondition{ + Type: computev1alpha1.MachinePoolReady, + Status: corev1.ConditionFalse, + }) + if out[0].LastTransitionTime.Equal(&earlier) { + t.Fatal("expected LastTransitionTime to advance when status changes") + } +} diff --git a/api/compute/v1alpha1/machinepool_types.go b/api/compute/v1alpha1/machinepool_types.go index 421b95cc4..1c5a966a9 100644 --- a/api/compute/v1alpha1/machinepool_types.go +++ b/api/compute/v1alpha1/machinepool_types.go @@ -85,6 +85,11 @@ type MachinePoolAddress struct { // MachinePoolConditionType is a type a MachinePoolCondition can have. type MachinePoolConditionType string +const ( + // MachinePoolReady means the machine pool is healthy and ready to accept machines. + MachinePoolReady MachinePoolConditionType = "Ready" +) + // MachinePoolCondition is one of the conditions of a MachinePool. type MachinePoolCondition struct { // Type is the type of the condition. @@ -97,6 +102,8 @@ type MachinePoolCondition struct { Message string `json:"message"` // ObservedGeneration represents the .metadata.generation that the condition was set based upon. ObservedGeneration int64 `json:"observedGeneration,omitempty"` + // LastUpdateTime is the last time this condition was updated. + LastUpdateTime metav1.Time `json:"lastUpdateTime,omitempty"` // LastTransitionTime is the last time the status of a condition has transitioned from one state to another. LastTransitionTime metav1.Time `json:"lastTransitionTime,omitempty"` } diff --git a/api/compute/v1alpha1/zz_generated.deepcopy.go b/api/compute/v1alpha1/zz_generated.deepcopy.go index 95e79d7df..200daf8d8 100644 --- a/api/compute/v1alpha1/zz_generated.deepcopy.go +++ b/api/compute/v1alpha1/zz_generated.deepcopy.go @@ -365,6 +365,7 @@ func (in *MachinePoolAddress) DeepCopy() *MachinePoolAddress { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *MachinePoolCondition) DeepCopyInto(out *MachinePoolCondition) { *out = *in + in.LastUpdateTime.DeepCopyInto(&out.LastUpdateTime) in.LastTransitionTime.DeepCopyInto(&out.LastTransitionTime) return } diff --git a/api/storage/v1alpha1/bucketpool_types.go b/api/storage/v1alpha1/bucketpool_types.go index e7a2598dd..d9a112dcd 100644 --- a/api/storage/v1alpha1/bucketpool_types.go +++ b/api/storage/v1alpha1/bucketpool_types.go @@ -22,11 +22,38 @@ type BucketPoolSpec struct { // BucketPoolStatus defines the observed state of BucketPool type BucketPoolStatus struct { // State represents the infrastructure state of a BucketPool. - State BucketPoolState `json:"state,omitempty"` + State BucketPoolState `json:"state,omitempty"` + Conditions []BucketPoolCondition `json:"conditions,omitempty"` // AvailableBucketClasses list the references of any supported BucketClass of this pool AvailableBucketClasses []corev1.LocalObjectReference `json:"availableBucketClasses,omitempty"` } +// BucketPoolConditionType is a type a BucketPoolCondition can have. +type BucketPoolConditionType string + +const ( + // BucketPoolReady means the bucket pool is healthy and ready to accept buckets. + BucketPoolReady BucketPoolConditionType = "Ready" +) + +// BucketPoolCondition is one of the conditions of a BucketPool. +type BucketPoolCondition struct { + // Type is the type of the condition. + Type BucketPoolConditionType `json:"type"` + // Status is the status of the condition. + Status corev1.ConditionStatus `json:"status"` + // Reason is a machine-readable indication of why the condition is in a certain state. + Reason string `json:"reason"` + // Message is a human-readable explanation of why the condition has a certain reason / state. + Message string `json:"message"` + // ObservedGeneration represents the .metadata.generation that the condition was set based upon. + ObservedGeneration int64 `json:"observedGeneration,omitempty"` + // LastUpdateTime is the last time this condition was updated. + LastUpdateTime metav1.Time `json:"lastUpdateTime,omitempty"` + // LastTransitionTime is the last time the status of a condition has transitioned from one state to another. + LastTransitionTime metav1.Time `json:"lastTransitionTime,omitempty"` +} + type BucketPoolState string const ( diff --git a/api/storage/v1alpha1/common.go b/api/storage/v1alpha1/common.go index f877083da..3a0a50cde 100644 --- a/api/storage/v1alpha1/common.go +++ b/api/storage/v1alpha1/common.go @@ -5,6 +5,11 @@ package v1alpha1 import corev1 "k8s.io/api/core/v1" +const ( + NamespaceVolumePoolLease = "ironcore-volumepool-lease" + NamespaceBucketPoolLease = "ironcore-bucketpool-lease" +) + const ( VolumeVolumePoolRefNameField = "spec.volumePoolRef.name" VolumeVolumeClassRefNameField = "spec.volumeClassRef.name" diff --git a/api/storage/v1alpha1/volumepool_types.go b/api/storage/v1alpha1/volumepool_types.go index 2596edf8a..65c60d9ac 100644 --- a/api/storage/v1alpha1/volumepool_types.go +++ b/api/storage/v1alpha1/volumepool_types.go @@ -43,6 +43,11 @@ const ( // VolumePoolConditionType is a type a VolumePoolCondition can have. type VolumePoolConditionType string +const ( + // VolumePoolReady means the volume pool is healthy and ready to accept volumes. + VolumePoolReady VolumePoolConditionType = "Ready" +) + // VolumePoolCondition is one of the conditions of a volume. type VolumePoolCondition struct { // Type is the type of the condition. @@ -55,6 +60,8 @@ type VolumePoolCondition struct { Message string `json:"message"` // ObservedGeneration represents the .metadata.generation that the condition was set based upon. ObservedGeneration int64 `json:"observedGeneration,omitempty"` + // LastUpdateTime is the last time this condition was updated. + LastUpdateTime metav1.Time `json:"lastUpdateTime,omitempty"` // LastTransitionTime is the last time the status of a condition has transitioned from one state to another. LastTransitionTime metav1.Time `json:"lastTransitionTime,omitempty"` } diff --git a/api/storage/v1alpha1/zz_generated.deepcopy.go b/api/storage/v1alpha1/zz_generated.deepcopy.go index f43120767..2fa3cbdcc 100644 --- a/api/storage/v1alpha1/zz_generated.deepcopy.go +++ b/api/storage/v1alpha1/zz_generated.deepcopy.go @@ -208,6 +208,24 @@ func (in *BucketPool) DeepCopyObject() runtime.Object { return nil } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *BucketPoolCondition) DeepCopyInto(out *BucketPoolCondition) { + *out = *in + in.LastUpdateTime.DeepCopyInto(&out.LastUpdateTime) + in.LastTransitionTime.DeepCopyInto(&out.LastTransitionTime) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new BucketPoolCondition. +func (in *BucketPoolCondition) DeepCopy() *BucketPoolCondition { + if in == nil { + return nil + } + out := new(BucketPoolCondition) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *BucketPoolList) DeepCopyInto(out *BucketPoolList) { *out = *in @@ -265,6 +283,13 @@ func (in *BucketPoolSpec) DeepCopy() *BucketPoolSpec { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *BucketPoolStatus) DeepCopyInto(out *BucketPoolStatus) { *out = *in + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]BucketPoolCondition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } if in.AvailableBucketClasses != nil { in, out := &in.AvailableBucketClasses, &out.AvailableBucketClasses *out = make([]v1.LocalObjectReference, len(*in)) @@ -638,6 +663,7 @@ func (in *VolumePool) DeepCopyObject() runtime.Object { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *VolumePoolCondition) DeepCopyInto(out *VolumePoolCondition) { *out = *in + in.LastUpdateTime.DeepCopyInto(&out.LastUpdateTime) in.LastTransitionTime.DeepCopyInto(&out.LastTransitionTime) return } diff --git a/api/storage/v1alpha1/zz_generated.model_name.go b/api/storage/v1alpha1/zz_generated.model_name.go index 2ad4ce1d2..3c6abb3d5 100644 --- a/api/storage/v1alpha1/zz_generated.model_name.go +++ b/api/storage/v1alpha1/zz_generated.model_name.go @@ -43,6 +43,11 @@ func (in BucketPool) OpenAPIModelName() string { return "com.github.ironcore-dev.ironcore.api.storage.v1alpha1.BucketPool" } +// OpenAPIModelName returns the OpenAPI model name for this type. +func (in BucketPoolCondition) OpenAPIModelName() string { + return "com.github.ironcore-dev.ironcore.api.storage.v1alpha1.BucketPoolCondition" +} + // OpenAPIModelName returns the OpenAPI model name for this type. func (in BucketPoolList) OpenAPIModelName() string { return "com.github.ironcore-dev.ironcore.api.storage.v1alpha1.BucketPoolList" diff --git a/client-go/applyconfigurations/compute/v1alpha1/machinepoolcondition.go b/client-go/applyconfigurations/compute/v1alpha1/machinepoolcondition.go index 6ca1a2f3e..ac89e33f3 100644 --- a/client-go/applyconfigurations/compute/v1alpha1/machinepoolcondition.go +++ b/client-go/applyconfigurations/compute/v1alpha1/machinepoolcondition.go @@ -26,6 +26,8 @@ type MachinePoolConditionApplyConfiguration struct { Message *string `json:"message,omitempty"` // ObservedGeneration represents the .metadata.generation that the condition was set based upon. ObservedGeneration *int64 `json:"observedGeneration,omitempty"` + // LastUpdateTime is the last time this condition was updated. + LastUpdateTime *metav1.Time `json:"lastUpdateTime,omitempty"` // LastTransitionTime is the last time the status of a condition has transitioned from one state to another. LastTransitionTime *metav1.Time `json:"lastTransitionTime,omitempty"` } @@ -76,6 +78,14 @@ func (b *MachinePoolConditionApplyConfiguration) WithObservedGeneration(value in return b } +// WithLastUpdateTime sets the LastUpdateTime field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the LastUpdateTime field is set to the value of the last call. +func (b *MachinePoolConditionApplyConfiguration) WithLastUpdateTime(value metav1.Time) *MachinePoolConditionApplyConfiguration { + b.LastUpdateTime = &value + return b +} + // WithLastTransitionTime sets the LastTransitionTime field in the declarative configuration to the given value // and returns the receiver, so that objects can be built by chaining "With" function invocations. // If called multiple times, the LastTransitionTime field is set to the value of the last call. diff --git a/client-go/applyconfigurations/storage/v1alpha1/bucketpoolcondition.go b/client-go/applyconfigurations/storage/v1alpha1/bucketpoolcondition.go new file mode 100644 index 000000000..5f84beda2 --- /dev/null +++ b/client-go/applyconfigurations/storage/v1alpha1/bucketpoolcondition.go @@ -0,0 +1,95 @@ +// SPDX-FileCopyrightText: 2025 SAP SE or an SAP affiliate company and IronCore contributors +// SPDX-License-Identifier: Apache-2.0 + +// Code generated by applyconfiguration-gen. DO NOT EDIT. + +package v1alpha1 + +import ( + storagev1alpha1 "github.com/ironcore-dev/ironcore/api/storage/v1alpha1" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// BucketPoolConditionApplyConfiguration represents a declarative configuration of the BucketPoolCondition type for use +// with apply. +// +// BucketPoolCondition is one of the conditions of a BucketPool. +type BucketPoolConditionApplyConfiguration struct { + // Type is the type of the condition. + Type *storagev1alpha1.BucketPoolConditionType `json:"type,omitempty"` + // Status is the status of the condition. + Status *v1.ConditionStatus `json:"status,omitempty"` + // Reason is a machine-readable indication of why the condition is in a certain state. + Reason *string `json:"reason,omitempty"` + // Message is a human-readable explanation of why the condition has a certain reason / state. + Message *string `json:"message,omitempty"` + // ObservedGeneration represents the .metadata.generation that the condition was set based upon. + ObservedGeneration *int64 `json:"observedGeneration,omitempty"` + // LastUpdateTime is the last time this condition was updated. + LastUpdateTime *metav1.Time `json:"lastUpdateTime,omitempty"` + // LastTransitionTime is the last time the status of a condition has transitioned from one state to another. + LastTransitionTime *metav1.Time `json:"lastTransitionTime,omitempty"` +} + +// BucketPoolConditionApplyConfiguration constructs a declarative configuration of the BucketPoolCondition type for use with +// apply. +func BucketPoolCondition() *BucketPoolConditionApplyConfiguration { + return &BucketPoolConditionApplyConfiguration{} +} + +// WithType sets the Type field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Type field is set to the value of the last call. +func (b *BucketPoolConditionApplyConfiguration) WithType(value storagev1alpha1.BucketPoolConditionType) *BucketPoolConditionApplyConfiguration { + b.Type = &value + return b +} + +// WithStatus sets the Status field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Status field is set to the value of the last call. +func (b *BucketPoolConditionApplyConfiguration) WithStatus(value v1.ConditionStatus) *BucketPoolConditionApplyConfiguration { + b.Status = &value + return b +} + +// WithReason sets the Reason field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Reason field is set to the value of the last call. +func (b *BucketPoolConditionApplyConfiguration) WithReason(value string) *BucketPoolConditionApplyConfiguration { + b.Reason = &value + return b +} + +// WithMessage sets the Message field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Message field is set to the value of the last call. +func (b *BucketPoolConditionApplyConfiguration) WithMessage(value string) *BucketPoolConditionApplyConfiguration { + b.Message = &value + return b +} + +// WithObservedGeneration sets the ObservedGeneration field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the ObservedGeneration field is set to the value of the last call. +func (b *BucketPoolConditionApplyConfiguration) WithObservedGeneration(value int64) *BucketPoolConditionApplyConfiguration { + b.ObservedGeneration = &value + return b +} + +// WithLastUpdateTime sets the LastUpdateTime field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the LastUpdateTime field is set to the value of the last call. +func (b *BucketPoolConditionApplyConfiguration) WithLastUpdateTime(value metav1.Time) *BucketPoolConditionApplyConfiguration { + b.LastUpdateTime = &value + return b +} + +// WithLastTransitionTime sets the LastTransitionTime field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the LastTransitionTime field is set to the value of the last call. +func (b *BucketPoolConditionApplyConfiguration) WithLastTransitionTime(value metav1.Time) *BucketPoolConditionApplyConfiguration { + b.LastTransitionTime = &value + return b +} diff --git a/client-go/applyconfigurations/storage/v1alpha1/bucketpoolstatus.go b/client-go/applyconfigurations/storage/v1alpha1/bucketpoolstatus.go index 9b669ffb5..64807109a 100644 --- a/client-go/applyconfigurations/storage/v1alpha1/bucketpoolstatus.go +++ b/client-go/applyconfigurations/storage/v1alpha1/bucketpoolstatus.go @@ -16,7 +16,8 @@ import ( // BucketPoolStatus defines the observed state of BucketPool type BucketPoolStatusApplyConfiguration struct { // State represents the infrastructure state of a BucketPool. - State *storagev1alpha1.BucketPoolState `json:"state,omitempty"` + State *storagev1alpha1.BucketPoolState `json:"state,omitempty"` + Conditions []BucketPoolConditionApplyConfiguration `json:"conditions,omitempty"` // AvailableBucketClasses list the references of any supported BucketClass of this pool AvailableBucketClasses []v1.LocalObjectReference `json:"availableBucketClasses,omitempty"` } @@ -35,6 +36,19 @@ func (b *BucketPoolStatusApplyConfiguration) WithState(value storagev1alpha1.Buc return b } +// WithConditions adds the given value to the Conditions field in the declarative configuration +// and returns the receiver, so that objects can be build by chaining "With" function invocations. +// If called multiple times, values provided by each call will be appended to the Conditions field. +func (b *BucketPoolStatusApplyConfiguration) WithConditions(values ...*BucketPoolConditionApplyConfiguration) *BucketPoolStatusApplyConfiguration { + for i := range values { + if values[i] == nil { + panic("nil value passed to WithConditions") + } + b.Conditions = append(b.Conditions, *values[i]) + } + return b +} + // WithAvailableBucketClasses adds the given value to the AvailableBucketClasses field in the declarative configuration // and returns the receiver, so that objects can be build by chaining "With" function invocations. // If called multiple times, values provided by each call will be appended to the AvailableBucketClasses field. diff --git a/client-go/applyconfigurations/storage/v1alpha1/volumepoolcondition.go b/client-go/applyconfigurations/storage/v1alpha1/volumepoolcondition.go index 000b6d5e1..12afe9439 100644 --- a/client-go/applyconfigurations/storage/v1alpha1/volumepoolcondition.go +++ b/client-go/applyconfigurations/storage/v1alpha1/volumepoolcondition.go @@ -26,6 +26,8 @@ type VolumePoolConditionApplyConfiguration struct { Message *string `json:"message,omitempty"` // ObservedGeneration represents the .metadata.generation that the condition was set based upon. ObservedGeneration *int64 `json:"observedGeneration,omitempty"` + // LastUpdateTime is the last time this condition was updated. + LastUpdateTime *metav1.Time `json:"lastUpdateTime,omitempty"` // LastTransitionTime is the last time the status of a condition has transitioned from one state to another. LastTransitionTime *metav1.Time `json:"lastTransitionTime,omitempty"` } @@ -76,6 +78,14 @@ func (b *VolumePoolConditionApplyConfiguration) WithObservedGeneration(value int return b } +// WithLastUpdateTime sets the LastUpdateTime field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the LastUpdateTime field is set to the value of the last call. +func (b *VolumePoolConditionApplyConfiguration) WithLastUpdateTime(value metav1.Time) *VolumePoolConditionApplyConfiguration { + b.LastUpdateTime = &value + return b +} + // WithLastTransitionTime sets the LastTransitionTime field in the declarative configuration to the given value // and returns the receiver, so that objects can be built by chaining "With" function invocations. // If called multiple times, the LastTransitionTime field is set to the value of the last call. diff --git a/client-go/applyconfigurations/utils.go b/client-go/applyconfigurations/utils.go index ea88d4c99..ffef835d7 100644 --- a/client-go/applyconfigurations/utils.go +++ b/client-go/applyconfigurations/utils.go @@ -203,6 +203,8 @@ func ForKind(kind schema.GroupVersionKind) interface{} { return &applyconfigurationsstoragev1alpha1.BucketConditionApplyConfiguration{} case storagev1alpha1.SchemeGroupVersion.WithKind("BucketPool"): return &applyconfigurationsstoragev1alpha1.BucketPoolApplyConfiguration{} + case storagev1alpha1.SchemeGroupVersion.WithKind("BucketPoolCondition"): + return &applyconfigurationsstoragev1alpha1.BucketPoolConditionApplyConfiguration{} case storagev1alpha1.SchemeGroupVersion.WithKind("BucketPoolSpec"): return &applyconfigurationsstoragev1alpha1.BucketPoolSpecApplyConfiguration{} case storagev1alpha1.SchemeGroupVersion.WithKind("BucketPoolStatus"): diff --git a/client-go/openapi/api_violations.report b/client-go/openapi/api_violations.report index ef96966c9..b3f50180a 100644 --- a/client-go/openapi/api_violations.report +++ b/client-go/openapi/api_violations.report @@ -39,6 +39,7 @@ API rule violation: list_type_missing,github.com/ironcore-dev/ironcore/api/netwo API rule violation: list_type_missing,github.com/ironcore-dev/ironcore/api/networking/v1alpha1,NetworkStatus,Peerings API rule violation: list_type_missing,github.com/ironcore-dev/ironcore/api/storage/v1alpha1,BucketPoolSpec,Taints API rule violation: list_type_missing,github.com/ironcore-dev/ironcore/api/storage/v1alpha1,BucketPoolStatus,AvailableBucketClasses +API rule violation: list_type_missing,github.com/ironcore-dev/ironcore/api/storage/v1alpha1,BucketPoolStatus,Conditions API rule violation: list_type_missing,github.com/ironcore-dev/ironcore/api/storage/v1alpha1,BucketSpec,Tolerations API rule violation: list_type_missing,github.com/ironcore-dev/ironcore/api/storage/v1alpha1,BucketStatus,Conditions API rule violation: list_type_missing,github.com/ironcore-dev/ironcore/api/storage/v1alpha1,VolumePoolSpec,Taints diff --git a/client-go/openapi/zz_generated.openapi.go b/client-go/openapi/zz_generated.openapi.go index fd12f8359..55d265646 100644 --- a/client-go/openapi/zz_generated.openapi.go +++ b/client-go/openapi/zz_generated.openapi.go @@ -135,6 +135,7 @@ func GetOpenAPIDefinitions(ref common.ReferenceCallback) map[string]common.OpenA storagev1alpha1.BucketCondition{}.OpenAPIModelName(): schema_ironcore_api_storage_v1alpha1_BucketCondition(ref), storagev1alpha1.BucketList{}.OpenAPIModelName(): schema_ironcore_api_storage_v1alpha1_BucketList(ref), storagev1alpha1.BucketPool{}.OpenAPIModelName(): schema_ironcore_api_storage_v1alpha1_BucketPool(ref), + storagev1alpha1.BucketPoolCondition{}.OpenAPIModelName(): schema_ironcore_api_storage_v1alpha1_BucketPoolCondition(ref), storagev1alpha1.BucketPoolList{}.OpenAPIModelName(): schema_ironcore_api_storage_v1alpha1_BucketPoolList(ref), storagev1alpha1.BucketPoolSpec{}.OpenAPIModelName(): schema_ironcore_api_storage_v1alpha1_BucketPoolSpec(ref), storagev1alpha1.BucketPoolStatus{}.OpenAPIModelName(): schema_ironcore_api_storage_v1alpha1_BucketPoolStatus(ref), @@ -1295,6 +1296,12 @@ func schema_ironcore_api_compute_v1alpha1_MachinePoolCondition(ref common.Refere Format: "int64", }, }, + "lastUpdateTime": { + SchemaProps: spec.SchemaProps{ + Description: "LastUpdateTime is the last time this condition was updated.", + Ref: ref(metav1.Time{}.OpenAPIModelName()), + }, + }, "lastTransitionTime": { SchemaProps: spec.SchemaProps{ Description: "LastTransitionTime is the last time the status of a condition has transitioned from one state to another.", @@ -5112,6 +5119,73 @@ func schema_ironcore_api_storage_v1alpha1_BucketPool(ref common.ReferenceCallbac } } +func schema_ironcore_api_storage_v1alpha1_BucketPoolCondition(ref common.ReferenceCallback) common.OpenAPIDefinition { + return common.OpenAPIDefinition{ + Schema: spec.Schema{ + SchemaProps: spec.SchemaProps{ + Description: "BucketPoolCondition is one of the conditions of a BucketPool.", + Type: []string{"object"}, + Properties: map[string]spec.Schema{ + "type": { + SchemaProps: spec.SchemaProps{ + Description: "Type is the type of the condition.", + Default: "", + Type: []string{"string"}, + Format: "", + }, + }, + "status": { + SchemaProps: spec.SchemaProps{ + Description: "Status is the status of the condition.", + Default: "", + Type: []string{"string"}, + Format: "", + }, + }, + "reason": { + SchemaProps: spec.SchemaProps{ + Description: "Reason is a machine-readable indication of why the condition is in a certain state.", + Default: "", + Type: []string{"string"}, + Format: "", + }, + }, + "message": { + SchemaProps: spec.SchemaProps{ + Description: "Message is a human-readable explanation of why the condition has a certain reason / state.", + Default: "", + Type: []string{"string"}, + Format: "", + }, + }, + "observedGeneration": { + SchemaProps: spec.SchemaProps{ + Description: "ObservedGeneration represents the .metadata.generation that the condition was set based upon.", + Type: []string{"integer"}, + Format: "int64", + }, + }, + "lastUpdateTime": { + SchemaProps: spec.SchemaProps{ + Description: "LastUpdateTime is the last time this condition was updated.", + Ref: ref(metav1.Time{}.OpenAPIModelName()), + }, + }, + "lastTransitionTime": { + SchemaProps: spec.SchemaProps{ + Description: "LastTransitionTime is the last time the status of a condition has transitioned from one state to another.", + Ref: ref(metav1.Time{}.OpenAPIModelName()), + }, + }, + }, + Required: []string{"type", "status", "reason", "message"}, + }, + }, + Dependencies: []string{ + metav1.Time{}.OpenAPIModelName()}, + } +} + func schema_ironcore_api_storage_v1alpha1_BucketPoolList(ref common.ReferenceCallback) common.OpenAPIDefinition { return common.OpenAPIDefinition{ Schema: spec.Schema{ @@ -5213,6 +5287,19 @@ func schema_ironcore_api_storage_v1alpha1_BucketPoolStatus(ref common.ReferenceC Format: "", }, }, + "conditions": { + SchemaProps: spec.SchemaProps{ + Type: []string{"array"}, + Items: &spec.SchemaOrArray{ + Schema: &spec.Schema{ + SchemaProps: spec.SchemaProps{ + Default: map[string]interface{}{}, + Ref: ref(storagev1alpha1.BucketPoolCondition{}.OpenAPIModelName()), + }, + }, + }, + }, + }, "availableBucketClasses": { SchemaProps: spec.SchemaProps{ Description: "AvailableBucketClasses list the references of any supported BucketClass of this pool", @@ -5231,7 +5318,7 @@ func schema_ironcore_api_storage_v1alpha1_BucketPoolStatus(ref common.ReferenceC }, }, Dependencies: []string{ - v1.LocalObjectReference{}.OpenAPIModelName()}, + storagev1alpha1.BucketPoolCondition{}.OpenAPIModelName(), v1.LocalObjectReference{}.OpenAPIModelName()}, } } @@ -5857,6 +5944,12 @@ func schema_ironcore_api_storage_v1alpha1_VolumePoolCondition(ref common.Referen Format: "int64", }, }, + "lastUpdateTime": { + SchemaProps: spec.SchemaProps{ + Description: "LastUpdateTime is the last time this condition was updated.", + Ref: ref(metav1.Time{}.OpenAPIModelName()), + }, + }, "lastTransitionTime": { SchemaProps: spec.SchemaProps{ Description: "LastTransitionTime is the last time the status of a condition has transitioned from one state to another.", diff --git a/cmd/ironcore-controller-manager/main.go b/cmd/ironcore-controller-manager/main.go index afcee54fc..5cbdf315d 100644 --- a/cmd/ironcore-controller-manager/main.go +++ b/cmd/ironcore-controller-manager/main.go @@ -64,6 +64,7 @@ const ( machineEphemeralVolumeController = "machineephemeralvolume" machineSchedulerController = "machinescheduler" machineClassController = "machineclass" + machinePoolLifecycleController = "machinepoollifecycle" // storage controllers bucketScheduler = "bucketscheduler" @@ -114,6 +115,7 @@ func main() { var volumeBindTimeout time.Duration var virtualIPBindTimeout time.Duration var networkInterfaceBindTimeout time.Duration + var machinePoolLifecycleGracePeriod time.Duration var tlsOpts []func(*tls.Config) flag.StringVar(&metricsAddr, "metrics-bind-address", "0", "The address the metrics endpoint binds to. "+ "Use :8443 for HTTPS or :8080 for HTTP, or leave as 0 to disable the metrics service.") @@ -134,6 +136,7 @@ func main() { flag.DurationVar(&volumeBindTimeout, "volume-bind-timeout", 10*time.Second, "Time to wait until considering a volume bind to be failed.") flag.DurationVar(&virtualIPBindTimeout, "virtual-ip-bind-timeout", 10*time.Second, "Time to wait until considering a virtual ip bind to be failed.") flag.DurationVar(&networkInterfaceBindTimeout, "network-interface-bind-timeout", 10*time.Second, "Time to wait until considering a network interface bind to be failed.") + flag.DurationVar(&machinePoolLifecycleGracePeriod, "machine-pool-lifecycle-grace-period", 50*time.Second, "Grace period without a heartbeat before a machine pool's Ready condition is marked Unknown.") controllers := switches.New( // compute controllers @@ -141,6 +144,7 @@ func main() { machineEphemeralVolumeController, machineSchedulerController, machineClassController, + machinePoolLifecycleController, // storage controllers bucketScheduler, @@ -321,6 +325,16 @@ func main() { } } + if controllers.Enabled(machinePoolLifecycleController) { + if err := (&computecontrollers.MachinePoolLifecycleReconciler{ + Client: mgr.GetClient(), + GracePeriod: machinePoolLifecycleGracePeriod, + }).SetupWithManager(mgr); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "MachinePoolLifecycle") + os.Exit(1) + } + } + // storage controllers if controllers.Enabled(bucketScheduler) { diff --git a/config/apiserver/standalone-etcdless/kustomization.yaml b/config/apiserver/standalone-etcdless/kustomization.yaml index 4a0c75d61..8ae631695 100644 --- a/config/apiserver/standalone-etcdless/kustomization.yaml +++ b/config/apiserver/standalone-etcdless/kustomization.yaml @@ -3,10 +3,11 @@ # Standalone install of the ironcore apiserver against an external etcd # (no in-cluster etcd StatefulSet): same content as config/apiserver/etcdless -# plus the ironcore-system Namespace. +# plus the Namespaces and lease-namespace RBAC. apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: - ../etcdless - ../../namespaces/ironcore-system + - ../../namespaces/machinepool-lease diff --git a/config/apiserver/standalone/kustomization.yaml b/config/apiserver/standalone/kustomization.yaml index b9f9a5ee9..099a38402 100644 --- a/config/apiserver/standalone/kustomization.yaml +++ b/config/apiserver/standalone/kustomization.yaml @@ -2,12 +2,13 @@ # SPDX-License-Identifier: Apache-2.0 # Standalone install of the ironcore apiserver (with the bundled etcd -# StatefulSet): same content as config/apiserver/default plus the -# ironcore-system Namespace. Use this when deploying the apiserver without -# the controller manager. +# StatefulSet): same content as config/apiserver/default plus the Namespaces +# and lease-namespace RBAC. Use this when deploying the apiserver without the +# controller manager. apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: - ../default - ../../namespaces/ironcore-system + - ../../namespaces/machinepool-lease diff --git a/config/controller/standalone/kustomization.yaml b/config/controller/standalone/kustomization.yaml index c0bc3a893..e4b265233 100644 --- a/config/controller/standalone/kustomization.yaml +++ b/config/controller/standalone/kustomization.yaml @@ -2,9 +2,11 @@ # SPDX-License-Identifier: Apache-2.0 # Standalone install of the ironcore controller manager: same content as -# config/controller/default plus the ironcore-system Namespace. Use this when -# deploying the controller without the apiserver. The combined config/default -# install references the Namespace kustomization directly, not via this +# config/controller/default, plus the Namespaces (ironcore-system for the +# manager itself, ironcore-machinepool-lease for the lifecycle controller's +# lease watch) and the lease-namespace RBAC. Use this when deploying the +# controller without the apiserver. The combined config/default install +# references the Namespace and lease kustomizations directly, not via this # wrapper. apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization @@ -12,3 +14,4 @@ kind: Kustomization resources: - ../default - ../../namespaces/ironcore-system + - ../../namespaces/machinepool-lease diff --git a/config/default/kustomization.yaml b/config/default/kustomization.yaml index 1abc87e85..75fda58d2 100644 --- a/config/default/kustomization.yaml +++ b/config/default/kustomization.yaml @@ -2,7 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 # Combined install: apiserver (with bundled etcd) + controller manager. -# Both bases are namespace-free; the namespace kustomization is referenced +# Both bases are namespace-free; the namespace kustomizations are referenced # directly here. This replaces the previous remove-namespace.yaml patch dance. apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization @@ -11,3 +11,4 @@ resources: - ../apiserver/default - ../controller/default - ../namespaces/ironcore-system + - ../namespaces/machinepool-lease diff --git a/config/etcdless/kustomization.yaml b/config/etcdless/kustomization.yaml index 3fe4f2870..85c7a876b 100644 --- a/config/etcdless/kustomization.yaml +++ b/config/etcdless/kustomization.yaml @@ -11,3 +11,4 @@ resources: - ../apiserver/etcdless - ../controller/default - ../namespaces/ironcore-system + - ../namespaces/machinepool-lease diff --git a/config/machinepoollet-broker/poollet-rbac/role.yaml b/config/machinepoollet-broker/poollet-rbac/role.yaml index 51befd791..45411fabb 100644 --- a/config/machinepoollet-broker/poollet-rbac/role.yaml +++ b/config/machinepoollet-broker/poollet-rbac/role.yaml @@ -139,3 +139,19 @@ rules: - patch - update - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: manager-role + namespace: ironcore-machinepool-lease +rules: +- apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - create + - get + - patch + - update diff --git a/config/namespaces/machinepool-lease/controller_role.yaml b/config/namespaces/machinepool-lease/controller_role.yaml new file mode 100644 index 000000000..633b2ffb4 --- /dev/null +++ b/config/namespaces/machinepool-lease/controller_role.yaml @@ -0,0 +1,20 @@ +# SPDX-FileCopyrightText: 2026 SAP SE or an SAP affiliate company and IronCore contributors +# SPDX-License-Identifier: Apache-2.0 + +# Lets the ironcore controller manager observe pool leases. The lifecycle +# controller (see internal/controllers/compute/machinepool_lifecycle_controller.go) +# watches and reads these leases to detect failed pools. +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: ironcore-controller-manager:lease-reader + namespace: ironcore-machinepool-lease +rules: +- apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - get + - list + - watch diff --git a/config/namespaces/machinepool-lease/controller_rolebinding.yaml b/config/namespaces/machinepool-lease/controller_rolebinding.yaml new file mode 100644 index 000000000..a5605169b --- /dev/null +++ b/config/namespaces/machinepool-lease/controller_rolebinding.yaml @@ -0,0 +1,21 @@ +# SPDX-FileCopyrightText: 2026 SAP SE or an SAP affiliate company and IronCore contributors +# SPDX-License-Identifier: Apache-2.0 + +# Cross-namespace binding: the controller runs in ironcore-system but needs to +# read leases in ironcore-machinepool-lease. The subject name is hard-coded with +# the `ironcore-` prefix because this binding lives outside the layer that +# applies `namePrefix: ironcore-`. If that prefix is ever changed in +# config/controller/default, this name must be updated too. +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: ironcore-controller-manager:lease-reader + namespace: ironcore-machinepool-lease +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: ironcore-controller-manager:lease-reader +subjects: + - kind: ServiceAccount + name: ironcore-controller-manager + namespace: ironcore-system diff --git a/config/namespaces/machinepool-lease/kustomization.yaml b/config/namespaces/machinepool-lease/kustomization.yaml new file mode 100644 index 000000000..555482660 --- /dev/null +++ b/config/namespaces/machinepool-lease/kustomization.yaml @@ -0,0 +1,9 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + - namespace.yaml + - poollet_role.yaml + - poollet_rolebinding.yaml + - controller_role.yaml + - controller_rolebinding.yaml diff --git a/config/namespaces/machinepool-lease/namespace.yaml b/config/namespaces/machinepool-lease/namespace.yaml new file mode 100644 index 000000000..dfd84627a --- /dev/null +++ b/config/namespaces/machinepool-lease/namespace.yaml @@ -0,0 +1,10 @@ +# SPDX-FileCopyrightText: 2026 SAP SE or an SAP affiliate company and IronCore contributors +# SPDX-License-Identifier: Apache-2.0 + +# Cluster-shared namespace where every machine poollet renews its pool Lease. +# Lifecycle controllers in the ironcore-system namespace watch leases here to +# detect failed pools. +apiVersion: v1 +kind: Namespace +metadata: + name: ironcore-machinepool-lease diff --git a/config/namespaces/machinepool-lease/poollet_role.yaml b/config/namespaces/machinepool-lease/poollet_role.yaml new file mode 100644 index 000000000..0beb6ae13 --- /dev/null +++ b/config/namespaces/machinepool-lease/poollet_role.yaml @@ -0,0 +1,22 @@ +# SPDX-FileCopyrightText: 2026 SAP SE or an SAP affiliate company and IronCore contributors +# SPDX-License-Identifier: Apache-2.0 + +# Role granting the rights every machine poollet needs in the lease namespace +# to renew its pool lease. Bound by the matching RoleBinding in this directory +# to the `compute.ironcore.dev:system:machinepools` Group, which every poollet +# joins via its client cert organization. +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: compute.ironcore.dev:system:machinepools + namespace: ironcore-machinepool-lease +rules: +- apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - create + - get + - patch + - update diff --git a/config/namespaces/machinepool-lease/poollet_rolebinding.yaml b/config/namespaces/machinepool-lease/poollet_rolebinding.yaml new file mode 100644 index 000000000..2b87d41ed --- /dev/null +++ b/config/namespaces/machinepool-lease/poollet_rolebinding.yaml @@ -0,0 +1,19 @@ +# SPDX-FileCopyrightText: 2026 SAP SE or an SAP affiliate company and IronCore contributors +# SPDX-License-Identifier: Apache-2.0 + +# Binds the lease-renewal Role to every authenticated machine poollet via the +# `compute.ironcore.dev:system:machinepools` Group, which every poollet joins +# through its client cert organization (see api/compute/v1alpha1/common.go). +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: compute.ironcore.dev:system:machinepools + namespace: ironcore-machinepool-lease +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: compute.ironcore.dev:system:machinepools +subjects: + - kind: Group + name: compute.ironcore.dev:system:machinepools + apiGroup: rbac.authorization.k8s.io diff --git a/go.mod b/go.mod index 808e5759e..f26d7ddb2 100644 --- a/go.mod +++ b/go.mod @@ -9,6 +9,7 @@ require ( github.com/go-chi/chi/v5 v5.2.5 github.com/go-logr/logr v1.4.3 github.com/google/go-cmp v0.7.0 + github.com/google/uuid v1.6.0 github.com/ironcore-dev/controller-utils v0.11.0 github.com/onsi/ginkgo/v2 v2.29.0 github.com/onsi/gomega v1.41.0 @@ -77,7 +78,6 @@ require ( github.com/google/cel-go v0.26.0 // indirect github.com/google/gnostic-models v0.7.0 // indirect github.com/google/pprof v0.0.0-20260402051712-545e8a4df936 // indirect - github.com/google/uuid v1.6.0 // indirect github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674 // indirect github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 // indirect github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.3 // indirect diff --git a/hack/update-codegen.sh b/hack/update-codegen.sh index 200ad9c61..404f831f0 100755 --- a/hack/update-codegen.sh +++ b/hack/update-codegen.sh @@ -53,7 +53,7 @@ declare -a GOMODS=( ) echo "Setting permissions for files of relevant go modules to 644" for MOD in "${GOMODS[@]}"; do - find "$(go list -json -m -u "${MOD}" | jq -r '.Dir')" -type f -exec chmod 644 -- {} + + find "$(go list -json -m -u "${MOD}" | jq -r '.Dir')" -type f -exec chmod 644 {} + done echo "Generating ${blue}openapi${normal}" diff --git a/internal/apis/compute/machinepool_types.go b/internal/apis/compute/machinepool_types.go index 891de8f71..ac9511c69 100644 --- a/internal/apis/compute/machinepool_types.go +++ b/internal/apis/compute/machinepool_types.go @@ -97,6 +97,8 @@ type MachinePoolCondition struct { Message string // ObservedGeneration represents the .metadata.generation that the condition was set based upon. ObservedGeneration int64 + // LastUpdateTime is the last time this condition was updated. + LastUpdateTime metav1.Time // LastTransitionTime is the last time the status of a condition has transitioned from one state to another. LastTransitionTime metav1.Time } diff --git a/internal/apis/compute/v1alpha1/zz_generated.conversion.go b/internal/apis/compute/v1alpha1/zz_generated.conversion.go index a75b13888..81ebd3dd7 100644 --- a/internal/apis/compute/v1alpha1/zz_generated.conversion.go +++ b/internal/apis/compute/v1alpha1/zz_generated.conversion.go @@ -713,6 +713,7 @@ func autoConvert_v1alpha1_MachinePoolCondition_To_compute_MachinePoolCondition(i out.Reason = in.Reason out.Message = in.Message out.ObservedGeneration = in.ObservedGeneration + out.LastUpdateTime = in.LastUpdateTime out.LastTransitionTime = in.LastTransitionTime return nil } @@ -728,6 +729,7 @@ func autoConvert_compute_MachinePoolCondition_To_v1alpha1_MachinePoolCondition(i out.Reason = in.Reason out.Message = in.Message out.ObservedGeneration = in.ObservedGeneration + out.LastUpdateTime = in.LastUpdateTime out.LastTransitionTime = in.LastTransitionTime return nil } diff --git a/internal/apis/compute/zz_generated.deepcopy.go b/internal/apis/compute/zz_generated.deepcopy.go index fc2ca7405..16df3a087 100644 --- a/internal/apis/compute/zz_generated.deepcopy.go +++ b/internal/apis/compute/zz_generated.deepcopy.go @@ -365,6 +365,7 @@ func (in *MachinePoolAddress) DeepCopy() *MachinePoolAddress { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *MachinePoolCondition) DeepCopyInto(out *MachinePoolCondition) { *out = *in + in.LastUpdateTime.DeepCopyInto(&out.LastUpdateTime) in.LastTransitionTime.DeepCopyInto(&out.LastTransitionTime) return } diff --git a/internal/apis/storage/bucketpool_types.go b/internal/apis/storage/bucketpool_types.go index 7a5f1fda4..dc1225ae9 100644 --- a/internal/apis/storage/bucketpool_types.go +++ b/internal/apis/storage/bucketpool_types.go @@ -22,11 +22,38 @@ type BucketPoolSpec struct { // BucketPoolStatus defines the observed state of BucketPool type BucketPoolStatus struct { // State represents the infrastructure state of a BucketPool. - State BucketPoolState + State BucketPoolState + Conditions []BucketPoolCondition // AvailableBucketClasses list the references of any supported BucketClass of this pool AvailableBucketClasses []corev1.LocalObjectReference } +// BucketPoolConditionType is a type a BucketPoolCondition can have. +type BucketPoolConditionType string + +const ( + // BucketPoolReady means the bucket pool is healthy and ready to accept buckets. + BucketPoolReady BucketPoolConditionType = "Ready" +) + +// BucketPoolCondition is one of the conditions of a BucketPool. +type BucketPoolCondition struct { + // Type is the type of the condition. + Type BucketPoolConditionType + // Status is the status of the condition. + Status corev1.ConditionStatus + // Reason is a machine-readable indication of why the condition is in a certain state. + Reason string + // Message is a human-readable explanation of why the condition has a certain reason / state. + Message string + // ObservedGeneration represents the .metadata.generation that the condition was set based upon. + ObservedGeneration int64 + // LastUpdateTime is the last time this condition was updated. + LastUpdateTime metav1.Time + // LastTransitionTime is the last time the status of a condition has transitioned from one state to another. + LastTransitionTime metav1.Time +} + type BucketPoolState string const ( diff --git a/internal/apis/storage/v1alpha1/zz_generated.conversion.go b/internal/apis/storage/v1alpha1/zz_generated.conversion.go index 9abebfae8..eb2e319ec 100644 --- a/internal/apis/storage/v1alpha1/zz_generated.conversion.go +++ b/internal/apis/storage/v1alpha1/zz_generated.conversion.go @@ -100,6 +100,16 @@ func RegisterConversions(s *runtime.Scheme) error { }); err != nil { return err } + if err := s.AddGeneratedConversionFunc((*storagev1alpha1.BucketPoolCondition)(nil), (*storage.BucketPoolCondition)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1alpha1_BucketPoolCondition_To_storage_BucketPoolCondition(a.(*storagev1alpha1.BucketPoolCondition), b.(*storage.BucketPoolCondition), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*storage.BucketPoolCondition)(nil), (*storagev1alpha1.BucketPoolCondition)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_storage_BucketPoolCondition_To_v1alpha1_BucketPoolCondition(a.(*storage.BucketPoolCondition), b.(*storagev1alpha1.BucketPoolCondition), scope) + }); err != nil { + return err + } if err := s.AddGeneratedConversionFunc((*storagev1alpha1.BucketPoolList)(nil), (*storage.BucketPoolList)(nil), func(a, b interface{}, scope conversion.Scope) error { return Convert_v1alpha1_BucketPoolList_To_storage_BucketPoolList(a.(*storagev1alpha1.BucketPoolList), b.(*storage.BucketPoolList), scope) }); err != nil { @@ -555,6 +565,38 @@ func Convert_storage_BucketPool_To_v1alpha1_BucketPool(in *storage.BucketPool, o return autoConvert_storage_BucketPool_To_v1alpha1_BucketPool(in, out, s) } +func autoConvert_v1alpha1_BucketPoolCondition_To_storage_BucketPoolCondition(in *storagev1alpha1.BucketPoolCondition, out *storage.BucketPoolCondition, s conversion.Scope) error { + out.Type = storage.BucketPoolConditionType(in.Type) + out.Status = v1.ConditionStatus(in.Status) + out.Reason = in.Reason + out.Message = in.Message + out.ObservedGeneration = in.ObservedGeneration + out.LastUpdateTime = in.LastUpdateTime + out.LastTransitionTime = in.LastTransitionTime + return nil +} + +// Convert_v1alpha1_BucketPoolCondition_To_storage_BucketPoolCondition is an autogenerated conversion function. +func Convert_v1alpha1_BucketPoolCondition_To_storage_BucketPoolCondition(in *storagev1alpha1.BucketPoolCondition, out *storage.BucketPoolCondition, s conversion.Scope) error { + return autoConvert_v1alpha1_BucketPoolCondition_To_storage_BucketPoolCondition(in, out, s) +} + +func autoConvert_storage_BucketPoolCondition_To_v1alpha1_BucketPoolCondition(in *storage.BucketPoolCondition, out *storagev1alpha1.BucketPoolCondition, s conversion.Scope) error { + out.Type = storagev1alpha1.BucketPoolConditionType(in.Type) + out.Status = v1.ConditionStatus(in.Status) + out.Reason = in.Reason + out.Message = in.Message + out.ObservedGeneration = in.ObservedGeneration + out.LastUpdateTime = in.LastUpdateTime + out.LastTransitionTime = in.LastTransitionTime + return nil +} + +// Convert_storage_BucketPoolCondition_To_v1alpha1_BucketPoolCondition is an autogenerated conversion function. +func Convert_storage_BucketPoolCondition_To_v1alpha1_BucketPoolCondition(in *storage.BucketPoolCondition, out *storagev1alpha1.BucketPoolCondition, s conversion.Scope) error { + return autoConvert_storage_BucketPoolCondition_To_v1alpha1_BucketPoolCondition(in, out, s) +} + func autoConvert_v1alpha1_BucketPoolList_To_storage_BucketPoolList(in *storagev1alpha1.BucketPoolList, out *storage.BucketPoolList, s conversion.Scope) error { out.ListMeta = in.ListMeta out.Items = *(*[]storage.BucketPool)(unsafe.Pointer(&in.Items)) @@ -601,6 +643,7 @@ func Convert_storage_BucketPoolSpec_To_v1alpha1_BucketPoolSpec(in *storage.Bucke func autoConvert_v1alpha1_BucketPoolStatus_To_storage_BucketPoolStatus(in *storagev1alpha1.BucketPoolStatus, out *storage.BucketPoolStatus, s conversion.Scope) error { out.State = storage.BucketPoolState(in.State) + out.Conditions = *(*[]storage.BucketPoolCondition)(unsafe.Pointer(&in.Conditions)) out.AvailableBucketClasses = *(*[]v1.LocalObjectReference)(unsafe.Pointer(&in.AvailableBucketClasses)) return nil } @@ -612,6 +655,7 @@ func Convert_v1alpha1_BucketPoolStatus_To_storage_BucketPoolStatus(in *storagev1 func autoConvert_storage_BucketPoolStatus_To_v1alpha1_BucketPoolStatus(in *storage.BucketPoolStatus, out *storagev1alpha1.BucketPoolStatus, s conversion.Scope) error { out.State = storagev1alpha1.BucketPoolState(in.State) + out.Conditions = *(*[]storagev1alpha1.BucketPoolCondition)(unsafe.Pointer(&in.Conditions)) out.AvailableBucketClasses = *(*[]v1.LocalObjectReference)(unsafe.Pointer(&in.AvailableBucketClasses)) return nil } @@ -959,6 +1003,7 @@ func autoConvert_v1alpha1_VolumePoolCondition_To_storage_VolumePoolCondition(in out.Reason = in.Reason out.Message = in.Message out.ObservedGeneration = in.ObservedGeneration + out.LastUpdateTime = in.LastUpdateTime out.LastTransitionTime = in.LastTransitionTime return nil } @@ -974,6 +1019,7 @@ func autoConvert_storage_VolumePoolCondition_To_v1alpha1_VolumePoolCondition(in out.Reason = in.Reason out.Message = in.Message out.ObservedGeneration = in.ObservedGeneration + out.LastUpdateTime = in.LastUpdateTime out.LastTransitionTime = in.LastTransitionTime return nil } diff --git a/internal/apis/storage/volumepool_types.go b/internal/apis/storage/volumepool_types.go index 06def7153..2e1c4d6c3 100644 --- a/internal/apis/storage/volumepool_types.go +++ b/internal/apis/storage/volumepool_types.go @@ -55,6 +55,8 @@ type VolumePoolCondition struct { Message string // ObservedGeneration represents the .metadata.generation that the condition was set based upon. ObservedGeneration int64 + // LastUpdateTime is the last time this condition was updated. + LastUpdateTime metav1.Time // LastTransitionTime is the last time the status of a condition has transitioned from one state to another. LastTransitionTime metav1.Time } diff --git a/internal/apis/storage/zz_generated.deepcopy.go b/internal/apis/storage/zz_generated.deepcopy.go index 94ccb5dfc..6ad8153a9 100644 --- a/internal/apis/storage/zz_generated.deepcopy.go +++ b/internal/apis/storage/zz_generated.deepcopy.go @@ -208,6 +208,24 @@ func (in *BucketPool) DeepCopyObject() runtime.Object { return nil } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *BucketPoolCondition) DeepCopyInto(out *BucketPoolCondition) { + *out = *in + in.LastUpdateTime.DeepCopyInto(&out.LastUpdateTime) + in.LastTransitionTime.DeepCopyInto(&out.LastTransitionTime) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new BucketPoolCondition. +func (in *BucketPoolCondition) DeepCopy() *BucketPoolCondition { + if in == nil { + return nil + } + out := new(BucketPoolCondition) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *BucketPoolList) DeepCopyInto(out *BucketPoolList) { *out = *in @@ -265,6 +283,13 @@ func (in *BucketPoolSpec) DeepCopy() *BucketPoolSpec { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *BucketPoolStatus) DeepCopyInto(out *BucketPoolStatus) { *out = *in + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]BucketPoolCondition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } if in.AvailableBucketClasses != nil { in, out := &in.AvailableBucketClasses, &out.AvailableBucketClasses *out = make([]v1.LocalObjectReference, len(*in)) @@ -638,6 +663,7 @@ func (in *VolumePool) DeepCopyObject() runtime.Object { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *VolumePoolCondition) DeepCopyInto(out *VolumePoolCondition) { *out = *in + in.LastUpdateTime.DeepCopyInto(&out.LastUpdateTime) in.LastTransitionTime.DeepCopyInto(&out.LastTransitionTime) return } diff --git a/internal/controllers/compute/machinepool_lifecycle_controller.go b/internal/controllers/compute/machinepool_lifecycle_controller.go new file mode 100644 index 000000000..01a785f79 --- /dev/null +++ b/internal/controllers/compute/machinepool_lifecycle_controller.go @@ -0,0 +1,179 @@ +// SPDX-FileCopyrightText: 2026 SAP SE or an SAP affiliate company and IronCore contributors +// SPDX-License-Identifier: Apache-2.0 + +package compute + +import ( + "context" + "fmt" + "sync" + "time" + + "github.com/go-logr/logr" + computev1alpha1 "github.com/ironcore-dev/ironcore/api/compute/v1alpha1" + "github.com/ironcore-dev/ironcore/utils/equality" + coordinationv1 "k8s.io/api/coordination/v1" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/utils/ptr" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/builder" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/handler" + "sigs.k8s.io/controller-runtime/pkg/predicate" +) + +type MachinePoolLifecycleReconciler struct { + client.Client + GracePeriod time.Duration + + healthDataMu sync.RWMutex + healthData map[string]*MachinePoolHealth +} + +type MachinePoolHealth struct { + lastObservedTime time.Time + lastChangeDetectedTime time.Time + readyCondition *computev1alpha1.MachinePoolCondition + leaseRenewTime time.Time +} + +func (r *MachinePoolLifecycleReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + log := ctrl.LoggerFrom(ctx) + + machinePool := &computev1alpha1.MachinePool{} + if err := r.Get(ctx, req.NamespacedName, machinePool); err != nil { + return ctrl.Result{}, client.IgnoreNotFound(err) + } + + return r.reconcileExists(ctx, log, machinePool) +} + +func (r *MachinePoolLifecycleReconciler) getMachinePoolHealth(machinePoolName string) *MachinePoolHealth { + r.healthDataMu.RLock() + defer r.healthDataMu.RUnlock() + + return r.healthData[machinePoolName] +} + +func (r *MachinePoolLifecycleReconciler) setMachinePoolHealth(machinePoolName string, machinePoolHealh *MachinePoolHealth) { + r.healthDataMu.Lock() + defer r.healthDataMu.Unlock() + + if r.healthData == nil { + r.healthData = make(map[string]*MachinePoolHealth) + } + + r.healthData[machinePoolName] = machinePoolHealh +} + +func getPreviousHealthValues(machinePoolHealth *MachinePoolHealth) (*computev1alpha1.MachinePoolCondition, *time.Time) { + var ( + prevReadyCondition *computev1alpha1.MachinePoolCondition + prevLeaseRenewTime *time.Time + ) + if machinePoolHealth != nil { + prevReadyCondition = machinePoolHealth.readyCondition + if !machinePoolHealth.leaseRenewTime.IsZero() { + prevLeaseRenewTime = &machinePoolHealth.leaseRenewTime + } + } + return prevReadyCondition, prevLeaseRenewTime +} + +func (r *MachinePoolLifecycleReconciler) getCurrentLeaseRenewTime(ctx context.Context, machinePoolName string) (*time.Time, error) { + lease := &coordinationv1.Lease{} + if err := r.Get(ctx, client.ObjectKey{ + Namespace: computev1alpha1.NamespaceMachinePoolLease, + Name: machinePoolName, + }, lease); err != nil { + if !apierrors.IsNotFound(err) { + return nil, fmt.Errorf("getting machine pool lease: %w", err) + } + return nil, nil + } + if lease.Spec.RenewTime != nil { + return ptr.To(lease.Spec.RenewTime.Time), nil + } + return nil, nil +} + +func (r *MachinePoolLifecycleReconciler) reconcileExists(ctx context.Context, log logr.Logger, machinePool *computev1alpha1.MachinePool) (ctrl.Result, error) { + now := time.Now() + prev := r.getMachinePoolHealth(machinePool.Name) + prevReadyCondition, prevLeaseRenewTime := getPreviousHealthValues(prev) + + currentReadyCondition := computev1alpha1.FindMachinePoolCondition(machinePool.Status.Conditions, computev1alpha1.MachinePoolReady) + currentLeaseRenewTime, err := r.getCurrentLeaseRenewTime(ctx, machinePool.Name) + if err != nil { + return ctrl.Result{}, err + } + + changed := !ptr.Equal(prevLeaseRenewTime, currentLeaseRenewTime) || !equality.Semantic.DeepEqual(prevReadyCondition, currentReadyCondition) + + next := &MachinePoolHealth{ + lastObservedTime: now, + readyCondition: currentReadyCondition, + leaseRenewTime: ptr.Deref(currentLeaseRenewTime, time.Time{}), + } + + switch { + case prev == nil: + log.V(1).Info("First observation of machine pool") + next.lastChangeDetectedTime = now + case changed: + log.V(1).Info("Lease or ready condition changed") + next.lastChangeDetectedTime = now + default: + next.lastChangeDetectedTime = prev.lastChangeDetectedTime + + if time.Since(prev.lastChangeDetectedTime) > r.GracePeriod { + if currentReadyCondition != nil && currentReadyCondition.Status == corev1.ConditionUnknown { + log.V(1).Info("Grace period exceeded, ready condition already unknown — no patch needed", + "gracePeriod", r.GracePeriod, "lastChangeDetected", prev.lastChangeDetectedTime) + } else { + log.Info("Grace period exceeded without health update, marking machine pool status unknown", + "gracePeriod", r.GracePeriod, "lastChangeDetected", prev.lastChangeDetectedTime) + patch := client.StrategicMergeFrom(machinePool.DeepCopy()) + newReadyCondition := computev1alpha1.MachinePoolCondition{ + Type: computev1alpha1.MachinePoolReady, + Status: corev1.ConditionUnknown, + Reason: "MachinePoolStatusUnknown", + Message: "machinepoollet stopped posting machine pool status.", + ObservedGeneration: machinePool.Generation, + } + machinePool.Status.Conditions = computev1alpha1.SetMachinePoolCondition(machinePool.Status.Conditions, newReadyCondition) + + if err := r.Status().Patch(ctx, machinePool, patch); err != nil { + // On patch failure, leave health state untouched so the next reconcile retries. + return ctrl.Result{}, fmt.Errorf("error patching: %w", err) + } + next.readyCondition = &newReadyCondition + } + } else { + log.V(1).Info("No change, still within grace period", + "gracePeriod", r.GracePeriod, "elapsed", time.Since(prev.lastChangeDetectedTime)) + } + } + + r.setMachinePoolHealth(machinePool.Name, next) + + // requeue when this pool's grace period runs out, but never sooner than 50ms from now. + return ctrl.Result{RequeueAfter: max(50*time.Millisecond, time.Until(next.lastChangeDetectedTime.Add(r.GracePeriod)))}, nil +} + +func (r *MachinePoolLifecycleReconciler) SetupWithManager(mgr ctrl.Manager) error { + return ctrl.NewControllerManagedBy(mgr). + Named("MachinePoolLifecycle"). + For(&computev1alpha1.MachinePool{}). + Watches( + &coordinationv1.Lease{}, + handler.EnqueueRequestsFromMapFunc(func(ctx context.Context, obj client.Object) []ctrl.Request { + return []ctrl.Request{{NamespacedName: client.ObjectKey{Name: obj.GetName()}}} + }), + builder.WithPredicates(predicate.NewPredicateFuncs(func(obj client.Object) bool { + return obj.GetNamespace() == computev1alpha1.NamespaceMachinePoolLease + })), + ). + Complete(r) +} diff --git a/internal/controllers/compute/machinepool_lifecycle_controller_test.go b/internal/controllers/compute/machinepool_lifecycle_controller_test.go new file mode 100644 index 000000000..a517809db --- /dev/null +++ b/internal/controllers/compute/machinepool_lifecycle_controller_test.go @@ -0,0 +1,378 @@ +// SPDX-FileCopyrightText: 2026 SAP SE or an SAP affiliate company and IronCore contributors +// SPDX-License-Identifier: Apache-2.0 + +package compute + +import ( + "fmt" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/ptr" + "sigs.k8s.io/controller-runtime/pkg/client" + + computev1alpha1 "github.com/ironcore-dev/ironcore/api/compute/v1alpha1" + coordinationv1 "k8s.io/api/coordination/v1" +) + +var _ = Describe("machinepool lifecycle controller", func() { + machinePool := SetupMachinePool() + + Context("when neither lease nor ready condition see progress within the grace period", func() { + It("should set the ready condition to Unknown (Lease w/o RenewTime) and update Unknown only once", func(ctx SpecContext) { + By("creating a lease for the machine pool without RenewTime") + lease := &coordinationv1.Lease{ + ObjectMeta: metav1.ObjectMeta{ + Name: machinePool.Name, + Namespace: "ironcore-machinepool-lease", + }, + Spec: coordinationv1.LeaseSpec{ + HolderIdentity: ptr.To(machinePool.Name), + LeaseDurationSeconds: ptr.To(int32(3600)), + }, + } + Expect(k8sClient.Create(ctx, lease)).To(Succeed(), "failed to create lease") + DeferCleanup(func(ctx SpecContext) { + Expect(client.IgnoreNotFound(k8sClient.Delete(ctx, lease))).To(Succeed()) + }) + + By("checking that the MachinePool Ready condition is set to Unknown") + machinePoolKey := client.ObjectKeyFromObject(machinePool) + Eventually(func(g Gomega) { + err := k8sClient.Get(ctx, machinePoolKey, machinePool) + g.Expect(err).NotTo(HaveOccurred()) + + readyCondition := computev1alpha1.FindMachinePoolCondition(machinePool.Status.Conditions, computev1alpha1.MachinePoolReady) + + g.Expect(readyCondition).NotTo(BeNil()) + g.Expect(readyCondition.Status).To(Equal(corev1.ConditionUnknown)) + }).WithTimeout(2 * machinePoolLifecycleGracePeriod).Should(Succeed()) + + By("verifying the Unknown status is only set once (no further status patches)") + Expect(k8sClient.Get(ctx, machinePoolKey, machinePool)).To(Succeed()) + + resourceVersion := machinePool.ResourceVersion + + Consistently(func(g Gomega) { + err := k8sClient.Get(ctx, machinePoolKey, machinePool) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(machinePool.ResourceVersion).To(Equal(resourceVersion)) + }).WithTimeout(3 * machinePoolLifecycleGracePeriod).Should(Succeed()) + }) + + It("should set the ready condition to Unknown (Lease with RenewTime)", func(ctx SpecContext) { + By("creating a lease for the machine pool") + lease := &coordinationv1.Lease{ + ObjectMeta: metav1.ObjectMeta{ + Name: machinePool.Name, + Namespace: "ironcore-machinepool-lease", + }, + Spec: coordinationv1.LeaseSpec{ + HolderIdentity: ptr.To(machinePool.Name), + LeaseDurationSeconds: ptr.To(int32(3600)), + RenewTime: ptr.To(metav1.NowMicro()), + }, + } + Expect(k8sClient.Create(ctx, lease)).To(Succeed(), "failed to create lease") + DeferCleanup(func(ctx SpecContext) { + Expect(client.IgnoreNotFound(k8sClient.Delete(ctx, lease))).To(Succeed()) + }) + + By("checking that the MachinePool Ready condition is set to Unknown") + machinePoolKey := client.ObjectKeyFromObject(machinePool) + Eventually(func(g Gomega) { + err := k8sClient.Get(ctx, machinePoolKey, machinePool) + g.Expect(err).NotTo(HaveOccurred()) + + readyCondition := computev1alpha1.FindMachinePoolCondition(machinePool.Status.Conditions, computev1alpha1.MachinePoolReady) + + g.Expect(readyCondition).NotTo(BeNil()) + g.Expect(readyCondition.Status).To(Equal(corev1.ConditionUnknown)) + }).Should(Succeed()) + }) + + It("should set the ready condition to Unknown (No lease)", func(ctx SpecContext) { + By("checking that the MachinePool Ready condition is set to Unknown") + machinePoolKey := client.ObjectKeyFromObject(machinePool) + Eventually(func(g Gomega) { + err := k8sClient.Get(ctx, machinePoolKey, machinePool) + g.Expect(err).NotTo(HaveOccurred()) + + readyCondition := computev1alpha1.FindMachinePoolCondition(machinePool.Status.Conditions, computev1alpha1.MachinePoolReady) + + g.Expect(readyCondition).NotTo(BeNil()) + g.Expect(readyCondition.Status).To(Equal(corev1.ConditionUnknown)) + }).Should(Succeed()) + }) + }) + + Context("when the lease is renewed frequently", func() { + It("should not set the ready condition to Unknown when the lease is regularly renewed", func(ctx SpecContext) { + By("creating a lease for the machine pool with a current renewTime") + lease := &coordinationv1.Lease{ + ObjectMeta: metav1.ObjectMeta{ + Name: machinePool.Name, + Namespace: "ironcore-machinepool-lease", + }, + Spec: coordinationv1.LeaseSpec{ + HolderIdentity: ptr.To(machinePool.Name), + LeaseDurationSeconds: ptr.To(int32(3600)), + RenewTime: ptr.To(metav1.NowMicro()), + }, + } + Expect(k8sClient.Create(ctx, lease)).To(Succeed()) + DeferCleanup(func(ctx SpecContext) { + Expect(client.IgnoreNotFound(k8sClient.Delete(ctx, lease))).To(Succeed()) + }) + + By("continuously renewing the lease well within the grace period") + stopCh := make(chan struct{}) + done := make(chan struct{}) + go func() { + defer close(done) + ticker := time.NewTicker(50 * time.Millisecond) + defer ticker.Stop() + for { + select { + case <-stopCh: + return + case <-ticker.C: + fresh := &coordinationv1.Lease{} + if err := k8sClient.Get(ctx, client.ObjectKeyFromObject(lease), fresh); err != nil { + return + } + fresh.Spec.RenewTime = ptr.To(metav1.NowMicro()) + _ = k8sClient.Update(ctx, fresh) + } + } + }() + DeferCleanup(func(_ SpecContext) { + close(stopCh) + <-done + }) + + By("verifying the ready condition never becomes Unknown over 15 seconds") + machinePoolKey := client.ObjectKeyFromObject(machinePool) + Consistently(func(g Gomega) { + err := k8sClient.Get(ctx, machinePoolKey, machinePool) + g.Expect(err).NotTo(HaveOccurred()) + + readyCondition := computev1alpha1.FindMachinePoolCondition(machinePool.Status.Conditions, computev1alpha1.MachinePoolReady) + if readyCondition != nil { + g.Expect(readyCondition.Status).NotTo(Equal(corev1.ConditionUnknown)) + } + }).WithTimeout(3 * machinePoolLifecycleGracePeriod).Should(Succeed()) + }) + + }) + + Context("when the ready condition progresses", func() { + DescribeTable("should not flip a freshly-progressing ready condition to Unknown", + func(ctx SpecContext, initialStatus corev1.ConditionStatus, nextStatus corev1.ConditionStatus) { + By("setting the initial ready condition on the machine pool") + machinePoolKey := client.ObjectKeyFromObject(machinePool) + patchReadyCondition(ctx, machinePoolKey, initialStatus, "Initial", "initial state") + + By("continuously refreshing the ready condition well within the grace period") + stop := startReadyConditionRenewer(ctx, machinePoolKey, nextStatus) + DeferCleanup(stop) + + By("verifying the ready condition never becomes Unknown") + Consistently(func(g Gomega) { + pool := &computev1alpha1.MachinePool{} + g.Expect(k8sClient.Get(ctx, machinePoolKey, pool)).To(Succeed()) + + readyCondition := computev1alpha1.FindMachinePoolCondition(pool.Status.Conditions, computev1alpha1.MachinePoolReady) + g.Expect(readyCondition).NotTo(BeNil()) + g.Expect(readyCondition.Status).NotTo(Equal(corev1.ConditionUnknown)) + }).WithTimeout(3 * machinePoolLifecycleGracePeriod).Should(Succeed()) + }, + Entry("True remains True", corev1.ConditionTrue, corev1.ConditionTrue), + Entry("False remains False", corev1.ConditionFalse, corev1.ConditionFalse), + Entry("False progressing to True", corev1.ConditionFalse, corev1.ConditionTrue), + Entry("True progressing to False", corev1.ConditionTrue, corev1.ConditionFalse), + ) + }) + + Context("when only one health signal stays fresh", func() { + It("should keep the ready condition healthy when the lease is stale but the ready condition is refreshed", func(ctx SpecContext) { + By("creating a lease with a stale RenewTime") + lease := &coordinationv1.Lease{ + ObjectMeta: metav1.ObjectMeta{ + Name: machinePool.Name, + Namespace: "ironcore-machinepool-lease", + }, + Spec: coordinationv1.LeaseSpec{ + HolderIdentity: ptr.To(machinePool.Name), + LeaseDurationSeconds: ptr.To(int32(3600)), + RenewTime: ptr.To(metav1.NewMicroTime(time.Now().Add(-time.Hour))), + }, + } + Expect(k8sClient.Create(ctx, lease)).To(Succeed()) + DeferCleanup(func(ctx SpecContext) { + Expect(client.IgnoreNotFound(k8sClient.Delete(ctx, lease))).To(Succeed()) + }) + + By("continuously refreshing the ready condition") + machinePoolKey := client.ObjectKeyFromObject(machinePool) + patchReadyCondition(ctx, machinePoolKey, corev1.ConditionTrue, "Healthy", "machinepoollet healthy") + stop := startReadyConditionRenewer(ctx, machinePoolKey, corev1.ConditionTrue) + DeferCleanup(stop) + + By("verifying the ready condition never becomes Unknown") + Consistently(func(g Gomega) { + pool := &computev1alpha1.MachinePool{} + g.Expect(k8sClient.Get(ctx, machinePoolKey, pool)).To(Succeed()) + + readyCondition := computev1alpha1.FindMachinePoolCondition(pool.Status.Conditions, computev1alpha1.MachinePoolReady) + g.Expect(readyCondition).NotTo(BeNil()) + g.Expect(readyCondition.Status).NotTo(Equal(corev1.ConditionUnknown)) + }).WithTimeout(3 * machinePoolLifecycleGracePeriod).Should(Succeed()) + }) + + It("should keep the ready condition healthy when the ready condition is stale but the lease is renewed", func(ctx SpecContext) { + By("seeding a stale ready condition on the machine pool") + machinePoolKey := client.ObjectKeyFromObject(machinePool) + patchReadyCondition(ctx, machinePoolKey, corev1.ConditionTrue, "Healthy", "machinepoollet healthy") + + By("creating a lease for the machine pool") + lease := &coordinationv1.Lease{ + ObjectMeta: metav1.ObjectMeta{ + Name: machinePool.Name, + Namespace: "ironcore-machinepool-lease", + }, + Spec: coordinationv1.LeaseSpec{ + HolderIdentity: ptr.To(machinePool.Name), + LeaseDurationSeconds: ptr.To(int32(3600)), + RenewTime: ptr.To(metav1.NowMicro()), + }, + } + Expect(k8sClient.Create(ctx, lease)).To(Succeed()) + DeferCleanup(func(ctx SpecContext) { + Expect(client.IgnoreNotFound(k8sClient.Delete(ctx, lease))).To(Succeed()) + }) + + By("continuously renewing the lease, while the ready condition stays untouched") + stop := startLeaseRenewer(ctx, client.ObjectKeyFromObject(lease)) + DeferCleanup(stop) + + By("verifying the ready condition never becomes Unknown") + Consistently(func(g Gomega) { + pool := &computev1alpha1.MachinePool{} + g.Expect(k8sClient.Get(ctx, machinePoolKey, pool)).To(Succeed()) + + readyCondition := computev1alpha1.FindMachinePoolCondition(pool.Status.Conditions, computev1alpha1.MachinePoolReady) + g.Expect(readyCondition).NotTo(BeNil()) + g.Expect(readyCondition.Status).NotTo(Equal(corev1.ConditionUnknown)) + }).WithTimeout(3 * machinePoolLifecycleGracePeriod).Should(Succeed()) + }) + }) + + Context("when a fresh signal arrives after the controller marked the pool Unknown", func() { + It("should stop patching the status once the machinepoollet posts a fresh ready condition", func(ctx SpecContext) { + By("waiting for the controller to set the ready condition to Unknown (no lease, no progress)") + machinePoolKey := client.ObjectKeyFromObject(machinePool) + Eventually(func(g Gomega) { + pool := &computev1alpha1.MachinePool{} + g.Expect(k8sClient.Get(ctx, machinePoolKey, pool)).To(Succeed()) + + readyCondition := computev1alpha1.FindMachinePoolCondition(pool.Status.Conditions, computev1alpha1.MachinePoolReady) + g.Expect(readyCondition).NotTo(BeNil()) + g.Expect(readyCondition.Status).To(Equal(corev1.ConditionUnknown)) + }).WithTimeout(2 * machinePoolLifecycleGracePeriod).Should(Succeed()) + + By("posting a fresh ready=True condition as the machinepoollet would") + patchReadyCondition(ctx, machinePoolKey, corev1.ConditionTrue, "Healthy", "machinepoollet recovered") + + By("verifying the controller does not flip the fresh ready condition back to Unknown within the grace period") + Consistently(func(g Gomega) { + current := &computev1alpha1.MachinePool{} + g.Expect(k8sClient.Get(ctx, machinePoolKey, current)).To(Succeed()) + + readyCondition := computev1alpha1.FindMachinePoolCondition(current.Status.Conditions, computev1alpha1.MachinePoolReady) + g.Expect(readyCondition).NotTo(BeNil()) + g.Expect(readyCondition.Status).NotTo(Equal(corev1.ConditionUnknown)) + }).WithTimeout(machinePoolLifecycleGracePeriod / 2).Should(Succeed()) + }) + }) +}) + +func patchReadyCondition(ctx SpecContext, key client.ObjectKey, status corev1.ConditionStatus, reason, message string) { + GinkgoHelper() + pool := &computev1alpha1.MachinePool{} + Expect(k8sClient.Get(ctx, key, pool)).To(Succeed()) + + patch := client.MergeFrom(pool.DeepCopy()) + pool.Status.Conditions = computev1alpha1.SetMachinePoolCondition(pool.Status.Conditions, computev1alpha1.MachinePoolCondition{ + Type: computev1alpha1.MachinePoolReady, + Status: status, + Reason: reason, + Message: message, + }) + Expect(k8sClient.Status().Patch(ctx, pool, patch)).To(Succeed()) +} + +func startReadyConditionRenewer(ctx SpecContext, key client.ObjectKey, status corev1.ConditionStatus) func(SpecContext) { + stopCh := make(chan struct{}) + done := make(chan struct{}) + go func() { + defer close(done) + ticker := time.NewTicker(50 * time.Millisecond) + defer ticker.Stop() + counter := 0 + for { + select { + case <-stopCh: + return + case <-ticker.C: + pool := &computev1alpha1.MachinePool{} + if err := k8sClient.Get(ctx, key, pool); err != nil { + return + } + patch := client.MergeFrom(pool.DeepCopy()) + counter++ + pool.Status.Conditions = computev1alpha1.SetMachinePoolCondition(pool.Status.Conditions, computev1alpha1.MachinePoolCondition{ + Type: computev1alpha1.MachinePoolReady, + Status: status, + Reason: "MachinePoolReadyChanged", + Message: fmt.Sprintf("machinepool ready changed: %s %d", status, counter), + }) + _ = k8sClient.Status().Patch(ctx, pool, patch) + } + } + }() + return func(_ SpecContext) { + close(stopCh) + <-done + } +} + +func startLeaseRenewer(ctx SpecContext, key client.ObjectKey) func(SpecContext) { + stopCh := make(chan struct{}) + done := make(chan struct{}) + go func() { + defer close(done) + ticker := time.NewTicker(50 * time.Millisecond) + defer ticker.Stop() + for { + select { + case <-stopCh: + return + case <-ticker.C: + fresh := &coordinationv1.Lease{} + if err := k8sClient.Get(ctx, key, fresh); err != nil { + return + } + fresh.Spec.RenewTime = ptr.To(metav1.NowMicro()) + _ = k8sClient.Update(ctx, fresh) + } + } + }() + return func(_ SpecContext) { + close(stopCh) + <-done + } +} diff --git a/internal/controllers/compute/suite_test.go b/internal/controllers/compute/suite_test.go index e229ce592..2d7383a1c 100644 --- a/internal/controllers/compute/suite_test.go +++ b/internal/controllers/compute/suite_test.go @@ -26,6 +26,7 @@ import ( . "github.com/ironcore-dev/ironcore/utils/testing" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/kubernetes/scheme" @@ -46,10 +47,11 @@ import ( // http://onsi.github.io/ginkgo/ to learn more about Ginkgo. const ( - pollingInterval = 50 * time.Millisecond - eventuallyTimeout = 3 * time.Second - consistentlyDuration = 1 * time.Second - apiServiceTimeout = 5 * time.Minute + pollingInterval = 50 * time.Millisecond + eventuallyTimeout = 3 * time.Second + consistentlyDuration = 1 * time.Second + apiServiceTimeout = 5 * time.Minute + machinePoolLifecycleGracePeriod = 500 * time.Millisecond ) var ( @@ -82,7 +84,7 @@ var _ = BeforeSuite(func() { // default path defined in controller-runtime which is /usr/local/kubebuilder/. // Note that you must have the required binaries setup under the bin directory to perform // the tests directly. When we run make test it will be setup and used automatically. - BinaryAssetsDirectory: filepath.Join("..", "..", "bin", "k8s", + BinaryAssetsDirectory: filepath.Join("..", "..", "..", "bin", "k8s", fmt.Sprintf("1.35.0-%s-%s", runtime.GOOS, runtime.GOARCH)), } testEnvExt = &utilsenvtest.EnvironmentExtensions{ @@ -168,10 +170,22 @@ var _ = BeforeSuite(func() { APIReader: k8sManager.GetAPIReader(), }).SetupWithManager(k8sManager)).To(Succeed()) + Expect((&MachinePoolLifecycleReconciler{ + Client: k8sManager.GetClient(), + GracePeriod: machinePoolLifecycleGracePeriod, + }).SetupWithManager(k8sManager)).To(Succeed()) + go func() { defer GinkgoRecover() Expect(k8sManager.Start(ctx)).To(Succeed(), "failed to start manager") }() + + leaseNamespace := &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: computev1alpha1.NamespaceMachinePoolLease, + }, + } + Expect(k8sClient.Create(ctx, leaseNamespace)).To(Succeed(), "failed to create lease namespace") }) func SetupMachineClass() *computev1alpha1.MachineClass { @@ -204,3 +218,13 @@ func SetupVolumeClass() *storagev1alpha1.VolumeClass { } }) } + +func SetupMachinePool() *computev1alpha1.MachinePool { + return SetupObjectStruct[*computev1alpha1.MachinePool](&k8sClient, func(machinePool *computev1alpha1.MachinePool) { + *machinePool = computev1alpha1.MachinePool{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: "test-pool-", + }, + } + }) +} diff --git a/poollet/machinepoollet/cmd/machinepoollet/app/app.go b/poollet/machinepoollet/cmd/machinepoollet/app/app.go index f301a0dcf..79c817897 100644 --- a/poollet/machinepoollet/cmd/machinepoollet/app/app.go +++ b/poollet/machinepoollet/cmd/machinepoollet/app/app.go @@ -97,6 +97,10 @@ type Options struct { DialTimeout time.Duration MachineClassMapperSyncTimeout time.Duration + HeartbeatInterval time.Duration + HeartbeatLeaseDuration time.Duration + HeartbeatStatusTimeout time.Duration + ChannelCapacity int RelistPeriod time.Duration RelistThreshold time.Duration @@ -149,6 +153,10 @@ func (o *Options) AddFlags(fs *pflag.FlagSet) { fs.DurationVar(&o.DialTimeout, "dial-timeout", 1*time.Second, "Timeout for dialing to the machine runtime endpoint.") fs.DurationVar(&o.MachineClassMapperSyncTimeout, "mcm-sync-timeout", 10*time.Second, "Timeout waiting for the machine class mapper to sync.") + fs.DurationVar(&o.HeartbeatInterval, "heartbeat-interval", 10*time.Second, "Interval between machine pool heartbeats.") + fs.DurationVar(&o.HeartbeatLeaseDuration, "heartbeat-lease-duration", 40*time.Second, "leaseDurationSeconds to publish on the machine pool lease.") + fs.DurationVar(&o.HeartbeatStatusTimeout, "heartbeat-status-timeout", 5*time.Second, "Timeout for the IRI Status probe used as the heartbeat readiness check.") + fs.IntVar(&o.ChannelCapacity, "channel-capacity", 1024, "channel capacity for the machine event generator.") fs.DurationVar(&o.RelistPeriod, "relist-period", 5*time.Second, "event channel relisting period.") fs.DurationVar(&o.RelistThreshold, "relist-threshold", 3*time.Minute, "event channel relisting threshold.") @@ -228,6 +236,18 @@ func Run(ctx context.Context, opts Options) error { return fmt.Errorf("error getting port from address: %w", err) } + if opts.HeartbeatInterval <= 0 { + return fmt.Errorf("--heartbeat-interval must be > 0, got %s", opts.HeartbeatInterval) + } + if opts.HeartbeatLeaseDuration <= opts.HeartbeatInterval { + return fmt.Errorf("--heartbeat-lease-duration (%s) must be greater than --heartbeat-interval (%s)", + opts.HeartbeatLeaseDuration, opts.HeartbeatInterval) + } + if opts.HeartbeatStatusTimeout <= 0 || opts.HeartbeatStatusTimeout >= opts.HeartbeatInterval { + return fmt.Errorf("--heartbeat-status-timeout (%s) must be > 0 and less than --heartbeat-interval (%s)", + opts.HeartbeatStatusTimeout, opts.HeartbeatInterval) + } + getter, err := machinepoolletconfig.NewGetter(opts.MachinePoolName) if err != nil { return fmt.Errorf("error creating new getter: %w", err) @@ -482,6 +502,17 @@ func Run(ctx context.Context, opts Options) error { return fmt.Errorf("error setting up machine pool reconciler with manager: %w", err) } + if err := mgr.Add(controllers.NewMachinePoolHeartbeat( + mgr.GetClient(), + opts.MachinePoolName, + machineRuntime, + opts.HeartbeatInterval, + opts.HeartbeatLeaseDuration, + opts.HeartbeatStatusTimeout, + )); err != nil { + return fmt.Errorf("error adding machine pool heartbeat: %w", err) + } + if err := (&controllers.MachinePoolAnnotatorReconciler{ Client: mgr.GetClient(), MachinePoolName: opts.MachinePoolName, diff --git a/poollet/machinepoollet/controllers/controllers_suite_test.go b/poollet/machinepoollet/controllers/controllers_suite_test.go index c27a63dee..c54ffbb26 100644 --- a/poollet/machinepoollet/controllers/controllers_suite_test.go +++ b/poollet/machinepoollet/controllers/controllers_suite_test.go @@ -146,6 +146,13 @@ var _ = BeforeSuite(func() { Expect(ctrlMgr.Start()).To(Succeed()) DeferCleanup(ctrlMgr.Stop) + + leaseNamespace := &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: computev1alpha1.NamespaceMachinePoolLease, + }, + } + Expect(k8sClient.Create(context.TODO(), leaseNamespace)).To(Succeed(), "failed to create lease namespace") }) func SetupTest() (*corev1.Namespace, *computev1alpha1.MachinePool, *computev1alpha1.MachineClass, *machine.FakeRuntimeService) { diff --git a/poollet/machinepoollet/controllers/machinepool_heartbeat.go b/poollet/machinepoollet/controllers/machinepool_heartbeat.go new file mode 100644 index 000000000..c5eccbb54 --- /dev/null +++ b/poollet/machinepoollet/controllers/machinepool_heartbeat.go @@ -0,0 +1,209 @@ +// SPDX-FileCopyrightText: 2026 SAP SE or an SAP affiliate company and IronCore contributors +// SPDX-License-Identifier: Apache-2.0 + +package controllers + +import ( + "context" + "fmt" + "time" + + "github.com/go-logr/logr" + "github.com/google/uuid" + computev1alpha1 "github.com/ironcore-dev/ironcore/api/compute/v1alpha1" + "github.com/ironcore-dev/ironcore/iri/apis/machine" + iri "github.com/ironcore-dev/ironcore/iri/apis/machine/v1alpha1" + coordinationv1 "k8s.io/api/coordination/v1" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/ptr" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +const ( + readyReasonHeartbeatReceived = "HeartbeatReceived" + readyReasonRuntimeUnreachable = "RuntimeUnreachable" + + readyMessageHeartbeatReceived = "machine runtime status probe succeeded" +) + +// ComputeReadyCondition returns the MachinePoolCondition the heartbeat wants +// to put on the pool, given the current pool generation and the result of +// the most recent IRI Status probe. +func ComputeReadyCondition(generation int64, probeErr error) computev1alpha1.MachinePoolCondition { + if probeErr != nil { + return computev1alpha1.MachinePoolCondition{ + Type: computev1alpha1.MachinePoolReady, + Status: corev1.ConditionFalse, + Reason: readyReasonRuntimeUnreachable, + Message: probeErr.Error(), + ObservedGeneration: generation, + } + } + return computev1alpha1.MachinePoolCondition{ + Type: computev1alpha1.MachinePoolReady, + Status: corev1.ConditionTrue, + Reason: readyReasonHeartbeatReceived, + Message: readyMessageHeartbeatReceived, + ObservedGeneration: generation, + } +} + +// ReadyConditionsDiffer reports whether existing differs from desired in any +// field that justifies a status patch. LastUpdateTime and LastTransitionTime +// are intentionally ignored — they are bookkeeping that should only advance +// when something else also advances. A nil existing always differs. +func ReadyConditionsDiffer(existing *computev1alpha1.MachinePoolCondition, desired computev1alpha1.MachinePoolCondition) bool { + if existing == nil { + return true + } + return existing.Status != desired.Status || + existing.Reason != desired.Reason || + existing.Message != desired.Message || + existing.ObservedGeneration != desired.ObservedGeneration +} + +// MachinePoolHeartbeat is a manager.Runnable that periodically renews the +// pool's Lease in NamespaceMachinePoolLease and updates the Ready condition +// on the MachinePool's status. It is the poollet side of IEP-15. +type MachinePoolHeartbeat struct { + Client client.Client + MachinePoolName string + MachineRuntime machine.RuntimeService + + HeartbeatInterval time.Duration + LeaseDuration time.Duration + StatusProbeTimeout time.Duration + + holderIdentity string +} + +// NewMachinePoolHeartbeat constructs a heartbeat runnable. The holderIdentity +// is fixed for the process lifetime: _. +func NewMachinePoolHeartbeat( + c client.Client, + machinePoolName string, + machineRuntime machine.RuntimeService, + heartbeatInterval, leaseDuration, statusProbeTimeout time.Duration, +) *MachinePoolHeartbeat { + return &MachinePoolHeartbeat{ + Client: c, + MachinePoolName: machinePoolName, + MachineRuntime: machineRuntime, + HeartbeatInterval: heartbeatInterval, + LeaseDuration: leaseDuration, + StatusProbeTimeout: statusProbeTimeout, + holderIdentity: fmt.Sprintf("%s_%s", machinePoolName, uuid.NewString()), + } +} + +// Start runs the heartbeat loop until ctx is canceled. It satisfies +// sigs.k8s.io/controller-runtime/pkg/manager.Runnable. +func (h *MachinePoolHeartbeat) Start(ctx context.Context) error { + log := ctrl.LoggerFrom(ctx).WithName("machinepool-heartbeat") + log.Info("Starting machine pool heartbeat", + "interval", h.HeartbeatInterval, + "leaseDuration", h.LeaseDuration, + "holderIdentity", h.holderIdentity, + ) + + h.tick(ctx, log) + + ticker := time.NewTicker(h.HeartbeatInterval) + defer ticker.Stop() + for { + select { + case <-ctx.Done(): + log.Info("Stopping machine pool heartbeat") + return nil + case <-ticker.C: + h.tick(ctx, log) + } + } +} + +func (h *MachinePoolHeartbeat) tick(ctx context.Context, log logr.Logger) { + probeCtx, cancel := context.WithTimeout(ctx, h.StatusProbeTimeout) + _, statusErr := h.MachineRuntime.Status(probeCtx, &iri.StatusRequest{}) + cancel() + + if err := h.reconcileLease(ctx); err != nil && ctx.Err() == nil { + log.Error(err, "Failed to reconcile machine pool lease") + } + if err := h.reconcileReadyCondition(ctx, statusErr); err != nil && ctx.Err() == nil { + log.Error(err, "Failed to reconcile machine pool ready condition") + } +} + +func (h *MachinePoolHeartbeat) reconcileLease(ctx context.Context) error { + leaseDurationSeconds := int32(h.LeaseDuration.Seconds()) + now := metav1.NewMicroTime(time.Now()) + + lease := &coordinationv1.Lease{} + key := client.ObjectKey{Namespace: computev1alpha1.NamespaceMachinePoolLease, Name: h.MachinePoolName} + if err := h.Client.Get(ctx, key, lease); err != nil { + if !apierrors.IsNotFound(err) { + return fmt.Errorf("getting lease: %w", err) + } + newLease := &coordinationv1.Lease{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: key.Namespace, + Name: key.Name, + }, + Spec: coordinationv1.LeaseSpec{ + HolderIdentity: ptr.To(h.holderIdentity), + LeaseDurationSeconds: ptr.To(leaseDurationSeconds), + AcquireTime: ptr.To(now), + RenewTime: ptr.To(now), + }, + } + if err := h.Client.Create(ctx, newLease); err != nil { + return fmt.Errorf("creating lease: %w", err) + } + return nil + } + + base := lease.DeepCopy() + if lease.Spec.HolderIdentity == nil || *lease.Spec.HolderIdentity != h.holderIdentity { + // Take ownership; the previous owner (likely a previous poollet process for + // this pool) is gone or stale. + log := ctrl.LoggerFrom(ctx) + previousHolder := "" + if lease.Spec.HolderIdentity != nil { + previousHolder = *lease.Spec.HolderIdentity + } + log.Info("Taking ownership of stale machine pool lease", "previousHolder", previousHolder, "newHolder", h.holderIdentity) + lease.Spec.HolderIdentity = ptr.To(h.holderIdentity) + lease.Spec.AcquireTime = ptr.To(now) + } + lease.Spec.LeaseDurationSeconds = ptr.To(leaseDurationSeconds) + lease.Spec.RenewTime = ptr.To(now) + + if err := h.Client.Patch(ctx, lease, client.MergeFrom(base)); err != nil { + return fmt.Errorf("patching lease: %w", err) + } + return nil +} + +func (h *MachinePoolHeartbeat) reconcileReadyCondition(ctx context.Context, statusErr error) error { + pool := &computev1alpha1.MachinePool{} + if err := h.Client.Get(ctx, client.ObjectKey{Name: h.MachinePoolName}, pool); err != nil { + return fmt.Errorf("getting machine pool: %w", err) + } + + desired := ComputeReadyCondition(pool.Generation, statusErr) + existing := computev1alpha1.FindMachinePoolCondition(pool.Status.Conditions, computev1alpha1.MachinePoolReady) + if !ReadyConditionsDiffer(existing, desired) { + return nil + } + + base := pool.DeepCopy() + // Currently only the heartbeat sets conditions, so MergeFrom patch is sufficient. + pool.Status.Conditions = computev1alpha1.SetMachinePoolCondition(pool.Status.Conditions, desired) + if err := h.Client.Status().Patch(ctx, pool, client.MergeFrom(base)); err != nil { + return fmt.Errorf("patching machine pool ready condition: %w", err) + } + return nil +} diff --git a/poollet/machinepoollet/controllers/machinepool_heartbeat_envtest_test.go b/poollet/machinepoollet/controllers/machinepool_heartbeat_envtest_test.go new file mode 100644 index 000000000..bceff4ca0 --- /dev/null +++ b/poollet/machinepoollet/controllers/machinepool_heartbeat_envtest_test.go @@ -0,0 +1,184 @@ +// SPDX-FileCopyrightText: 2026 SAP SE or an SAP affiliate company and IronCore contributors +// SPDX-License-Identifier: Apache-2.0 + +package controllers_test + +import ( + "context" + "errors" + "sync/atomic" + "time" + + computev1alpha1 "github.com/ironcore-dev/ironcore/api/compute/v1alpha1" + "github.com/ironcore-dev/ironcore/iri/apis/machine" + iri "github.com/ironcore-dev/ironcore/iri/apis/machine/v1alpha1" + fakemachine "github.com/ironcore-dev/ironcore/iri/testing/machine" + "github.com/ironcore-dev/ironcore/poollet/machinepoollet/controllers" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + coordinationv1 "k8s.io/api/coordination/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes/scheme" + "k8s.io/utils/ptr" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + ctrlconfig "sigs.k8s.io/controller-runtime/pkg/config" + metricserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" +) + +// errorInjectingRuntime wraps a real fake runtime and lets a test toggle +// errors on the Status method only. All other calls pass through. +type errorInjectingRuntime struct { + machine.RuntimeService + statusErr atomic.Pointer[error] +} + +func (r *errorInjectingRuntime) Status(ctx context.Context, req *iri.StatusRequest) (*iri.StatusResponse, error) { + if errPtr := r.statusErr.Load(); errPtr != nil && *errPtr != nil { + return nil, *errPtr + } + return r.RuntimeService.Status(ctx, req) +} + +func (r *errorInjectingRuntime) setStatusErr(err error) { + if err == nil { + r.statusErr.Store(nil) + return + } + r.statusErr.Store(&err) +} + +var _ = Describe("MachinePoolHeartbeat", func() { + var ( + mp *computev1alpha1.MachinePool + runner *errorInjectingRuntime + ) + + BeforeEach(func(ctx SpecContext) { + By("creating a pool for this spec") + mp = &computev1alpha1.MachinePool{ + ObjectMeta: metav1.ObjectMeta{GenerateName: "heartbeat-mp-"}, + } + Expect(k8sClient.Create(ctx, mp)).To(Succeed()) + DeferCleanup(k8sClient.Delete, mp) + + fake := fakemachine.NewFakeRuntimeService() + runner = &errorInjectingRuntime{RuntimeService: fake} + + // Stand up a manager just for this spec — keep it isolated from the + // shared SetupTest manager so we control which runnables are added. + mgr, err := ctrl.NewManager(cfg, ctrl.Options{ + Scheme: scheme.Scheme, + Metrics: metricserver.Options{BindAddress: "0"}, + Controller: ctrlconfig.Controller{SkipNameValidation: ptr.To(true)}, + }) + Expect(err).NotTo(HaveOccurred()) + + hb := controllers.NewMachinePoolHeartbeat( + mgr.GetClient(), mp.Name, runner, + 500*time.Millisecond, // interval + 3*time.Second, // lease duration + 200*time.Millisecond, // status probe timeout + ) + Expect(mgr.Add(hb)).To(Succeed()) + + mgrCtx, cancel := context.WithCancel(context.Background()) + DeferCleanup(cancel) + go func() { + defer GinkgoRecover() + Expect(mgr.Start(mgrCtx)).To(Succeed()) + }() + }) + + It("creates and renews the lease and sets Ready=True on success", func(ctx SpecContext) { + leaseKey := client.ObjectKey{ + Namespace: computev1alpha1.NamespaceMachinePoolLease, + Name: mp.Name, + } + + By("waiting for the lease to be created with the expected shape") + Eventually(func(g Gomega) { + lease := &coordinationv1.Lease{} + g.Expect(k8sClient.Get(ctx, leaseKey, lease)).To(Succeed()) + g.Expect(lease.Spec.HolderIdentity).NotTo(BeNil()) + g.Expect(*lease.Spec.HolderIdentity).To(HavePrefix(mp.Name + "_")) + g.Expect(lease.Spec.LeaseDurationSeconds).NotTo(BeNil()) + g.Expect(*lease.Spec.LeaseDurationSeconds).To(Equal(int32(3))) + g.Expect(lease.Spec.RenewTime).NotTo(BeNil()) + }).Should(Succeed()) + + By("waiting for the Ready condition to be set to True") + Eventually(func(g Gomega) { + pool := &computev1alpha1.MachinePool{} + g.Expect(k8sClient.Get(ctx, client.ObjectKey{Name: mp.Name}, pool)).To(Succeed()) + cond := computev1alpha1.FindMachinePoolCondition(pool.Status.Conditions, computev1alpha1.MachinePoolReady) + g.Expect(cond).NotTo(BeNil()) + g.Expect(cond.Status).To(Equal(corev1.ConditionTrue)) + g.Expect(cond.Reason).To(Equal("HeartbeatReceived")) + }).Should(Succeed()) + + By("capturing the current renewTime") + var first time.Time + Eventually(func(g Gomega) { + lease := &coordinationv1.Lease{} + g.Expect(k8sClient.Get(ctx, leaseKey, lease)).To(Succeed()) + g.Expect(lease.Spec.RenewTime).NotTo(BeNil()) + first = lease.Spec.RenewTime.Time + }).Should(Succeed()) + + By("observing that the lease gets renewed") + Eventually(func(g Gomega) { + lease := &coordinationv1.Lease{} + g.Expect(k8sClient.Get(ctx, leaseKey, lease)).To(Succeed()) + g.Expect(lease.Spec.RenewTime).NotTo(BeNil()) + g.Expect(lease.Spec.RenewTime.Time.After(first)).To(BeTrue(), "lease should have been renewed") + }).Should(Succeed()) + }) + + It("does not bump the pool's resourceVersion on no-op heartbeats", func(ctx SpecContext) { + By("waiting for the first Ready=True patch and capturing the resourceVersion") + var initialRV string + Eventually(func(g Gomega) { + pool := &computev1alpha1.MachinePool{} + g.Expect(k8sClient.Get(ctx, client.ObjectKey{Name: mp.Name}, pool)).To(Succeed()) + cond := computev1alpha1.FindMachinePoolCondition(pool.Status.Conditions, computev1alpha1.MachinePoolReady) + g.Expect(cond).NotTo(BeNil()) + g.Expect(cond.Status).To(Equal(corev1.ConditionTrue)) + initialRV = pool.ResourceVersion + }).Should(Succeed()) + + By("verifying the resourceVersion does not change across subsequent ticks") + Consistently(func(g Gomega) { + pool := &computev1alpha1.MachinePool{} + g.Expect(k8sClient.Get(ctx, client.ObjectKey{Name: mp.Name}, pool)).To(Succeed()) + g.Expect(pool.ResourceVersion).To(Equal(initialRV)) + }, 2*time.Second, 200*time.Millisecond).Should(Succeed()) + }) + + It("flips Ready to False when the runtime probe errors", func(ctx SpecContext) { + By("waiting for Ready=True before injecting failures") + Eventually(func(g Gomega) { + pool := &computev1alpha1.MachinePool{} + g.Expect(k8sClient.Get(ctx, client.ObjectKey{Name: mp.Name}, pool)).To(Succeed()) + cond := computev1alpha1.FindMachinePoolCondition(pool.Status.Conditions, computev1alpha1.MachinePoolReady) + g.Expect(cond).NotTo(BeNil()) + g.Expect(cond.Status).To(Equal(corev1.ConditionTrue)) + }).Should(Succeed()) + + By("injecting a Status error into the fake runtime") + runner.setStatusErr(errors.New("simulated runtime down")) + + By("observing Ready flip to False with RuntimeUnreachable") + Eventually(func(g Gomega) { + pool := &computev1alpha1.MachinePool{} + g.Expect(k8sClient.Get(ctx, client.ObjectKey{Name: mp.Name}, pool)).To(Succeed()) + cond := computev1alpha1.FindMachinePoolCondition(pool.Status.Conditions, computev1alpha1.MachinePoolReady) + g.Expect(cond).NotTo(BeNil()) + g.Expect(cond.Status).To(Equal(corev1.ConditionFalse)) + g.Expect(cond.Reason).To(Equal("RuntimeUnreachable")) + g.Expect(cond.Message).To(ContainSubstring("simulated runtime down")) + }).Should(Succeed()) + }) +}) diff --git a/poollet/machinepoollet/controllers/machinepool_heartbeat_test.go b/poollet/machinepoollet/controllers/machinepool_heartbeat_test.go new file mode 100644 index 000000000..46dd14a70 --- /dev/null +++ b/poollet/machinepoollet/controllers/machinepool_heartbeat_test.go @@ -0,0 +1,72 @@ +// SPDX-FileCopyrightText: 2026 SAP SE or an SAP affiliate company and IronCore contributors +// SPDX-License-Identifier: Apache-2.0 + +package controllers_test + +import ( + "errors" + + computev1alpha1 "github.com/ironcore-dev/ironcore/api/compute/v1alpha1" + "github.com/ironcore-dev/ironcore/poollet/machinepoollet/controllers" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" +) + +var _ = Describe("ComputeReadyCondition", func() { + It("returns Ready=True with HeartbeatReceived when the probe succeeds", func() { + got := controllers.ComputeReadyCondition(int64(7), nil) + Expect(got.Type).To(Equal(computev1alpha1.MachinePoolReady)) + Expect(got.Status).To(Equal(corev1.ConditionTrue)) + Expect(got.Reason).To(Equal("HeartbeatReceived")) + Expect(got.ObservedGeneration).To(Equal(int64(7))) + }) + + It("returns Ready=False with RuntimeUnreachable when the probe errors", func() { + got := controllers.ComputeReadyCondition(int64(3), errors.New("boom")) + Expect(got.Status).To(Equal(corev1.ConditionFalse)) + Expect(got.Reason).To(Equal("RuntimeUnreachable")) + Expect(got.Message).To(Equal("boom")) + Expect(got.ObservedGeneration).To(Equal(int64(3))) + }) +}) + +var _ = Describe("ReadyConditionsDiffer", func() { + base := computev1alpha1.MachinePoolCondition{ + Type: computev1alpha1.MachinePoolReady, + Status: corev1.ConditionTrue, + Reason: "HeartbeatReceived", + Message: "ok", + ObservedGeneration: 5, + } + + It("treats a nil existing as a diff", func() { + desired := computev1alpha1.MachinePoolCondition{ + Type: computev1alpha1.MachinePoolReady, + Status: corev1.ConditionTrue, + } + Expect(controllers.ReadyConditionsDiffer(nil, desired)).To(BeTrue()) + }) + + It("returns false for identical conditions", func() { + Expect(controllers.ReadyConditionsDiffer(&base, base)).To(BeFalse()) + }) + + It("ignores LastUpdateTime and LastTransitionTime", func() { + desired := base // value copy; timestamps stay zero + Expect(controllers.ReadyConditionsDiffer(&base, desired)).To(BeFalse()) + }) + + DescribeTable("reports a diff when a meaningful field changes", + func(modify func(*computev1alpha1.MachinePoolCondition)) { + desired := base + modify(&desired) + Expect(controllers.ReadyConditionsDiffer(&base, desired)).To(BeTrue()) + }, + Entry("status", func(c *computev1alpha1.MachinePoolCondition) { c.Status = corev1.ConditionFalse }), + Entry("reason", func(c *computev1alpha1.MachinePoolCondition) { c.Reason = "Other" }), + Entry("message", func(c *computev1alpha1.MachinePoolCondition) { c.Message = "different" }), + Entry("observedGeneration", func(c *computev1alpha1.MachinePoolCondition) { c.ObservedGeneration = 6 }), + ) +}) diff --git a/poollet/machinepoollet/controllers/rbac.go b/poollet/machinepoollet/controllers/rbac.go index 2810703f8..82d6a179b 100644 --- a/poollet/machinepoollet/controllers/rbac.go +++ b/poollet/machinepoollet/controllers/rbac.go @@ -13,3 +13,6 @@ package controllers // Rules required for machinepoollet delegated authentication //+kubebuilder:rbac:groups=authentication.k8s.io,resources=tokenreviews,verbs=create //+kubebuilder:rbac:groups=authorization.k8s.io,resources=subjectaccessreviews,verbs=create + +// Rules required for machine pool heartbeat +//+kubebuilder:rbac:groups=coordination.k8s.io,namespace=ironcore-machinepool-lease,resources=leases,verbs=get;create;update;patch