-
Notifications
You must be signed in to change notification settings - Fork 73
🐛 fix: (boxcutter) Enable archival and spec changes for ProgressDeadlineExceeded revisions #2610
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -785,7 +785,12 @@ func TestClusterExtensionBoxcutterApplierFailsDoesNotLeakDeprecationErrors(t *te | |
| // the same inputs the runtime would see. | ||
| d.RevisionStatesGetter = &MockRevisionStatesGetter{ | ||
| RevisionStates: &controllers.RevisionStates{ | ||
| RollingOut: []*controllers.RevisionMetadata{{}}, | ||
| RollingOut: []*controllers.RevisionMetadata{{ | ||
| // Different digest from current spec - ensures we call the resolver | ||
| // (This simulates a rolling-out revision from a previous reconcile) | ||
| ResolutionDigest: "different-digest-from-previous-reconcile", | ||
| BundleMetadata: ocv1.BundleMetadata{Version: "1.0.0"}, | ||
| }}, | ||
| }, | ||
| } | ||
| d.Resolver = resolve.Func(func(ctx context.Context, ext *ocv1.ClusterExtension, installedBundle *ocv1.BundleMetadata) (*declcfg.Bundle, *bundle.VersionRelease, *declcfg.Deprecation, error) { | ||
|
|
@@ -845,21 +850,18 @@ func TestClusterExtensionBoxcutterApplierFailsDoesNotLeakDeprecationErrors(t *te | |
|
|
||
| deprecatedCond := apimeta.FindStatusCondition(clusterExtension.Status.Conditions, ocv1.TypeDeprecated) | ||
| require.NotNil(t, deprecatedCond) | ||
| require.Equal(t, metav1.ConditionUnknown, deprecatedCond.Status, "no catalog data during rollout, so Unknown") | ||
| require.Equal(t, ocv1.ReasonDeprecationStatusUnknown, deprecatedCond.Reason) | ||
| require.Equal(t, "deprecation status unknown: catalog data unavailable", deprecatedCond.Message) | ||
| require.Equal(t, metav1.ConditionFalse, deprecatedCond.Status, "catalog data available from resolution, not deprecated") | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Worth noting: this is a behavioral change beyond the bug fix. Previously, when a revision was rolling out, deprecation status was always Unknown ("no catalog data during rollout"). Now, when the spec changes and triggers re-resolution, deprecation is set based on actual catalog data. This is an improvement — the test updates are correct — but it's a side effect worth calling out in the PR description since it changes user-visible condition values. |
||
| require.Equal(t, ocv1.ReasonNotDeprecated, deprecatedCond.Reason) | ||
|
|
||
| packageCond := apimeta.FindStatusCondition(clusterExtension.Status.Conditions, ocv1.TypePackageDeprecated) | ||
| require.NotNil(t, packageCond) | ||
| require.Equal(t, metav1.ConditionUnknown, packageCond.Status, "no catalog data during rollout, so Unknown") | ||
| require.Equal(t, ocv1.ReasonDeprecationStatusUnknown, packageCond.Reason) | ||
| require.Equal(t, "deprecation status unknown: catalog data unavailable", packageCond.Message) | ||
| require.Equal(t, metav1.ConditionFalse, packageCond.Status, "catalog data available from resolution, package not deprecated") | ||
| require.Equal(t, ocv1.ReasonNotDeprecated, packageCond.Reason) | ||
|
|
||
| channelCond := apimeta.FindStatusCondition(clusterExtension.Status.Conditions, ocv1.TypeChannelDeprecated) | ||
| require.NotNil(t, channelCond) | ||
| require.Equal(t, metav1.ConditionUnknown, channelCond.Status, "no catalog data during rollout, so Unknown") | ||
| require.Equal(t, ocv1.ReasonDeprecationStatusUnknown, channelCond.Reason) | ||
| require.Equal(t, "deprecation status unknown: catalog data unavailable", channelCond.Message) | ||
| require.Equal(t, metav1.ConditionFalse, channelCond.Status, "catalog data available from resolution, channel not deprecated") | ||
| require.Equal(t, ocv1.ReasonNotDeprecated, channelCond.Reason) | ||
|
|
||
| bundleCond := apimeta.FindStatusCondition(clusterExtension.Status.Conditions, ocv1.TypeBundleDeprecated) | ||
| require.NotNil(t, bundleCond) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,8 +18,12 @@ package controllers | |
|
|
||
| import ( | ||
| "context" | ||
| "crypto/sha256" | ||
| "encoding/base64" | ||
| "encoding/json" | ||
| "errors" | ||
| "fmt" | ||
| "slices" | ||
|
|
||
| apierrors "k8s.io/apimachinery/pkg/api/errors" | ||
| apimeta "k8s.io/apimachinery/pkg/api/meta" | ||
|
|
@@ -37,6 +41,58 @@ import ( | |
| imageutil "github.com/operator-framework/operator-controller/internal/shared/util/image" | ||
| ) | ||
|
|
||
| // resolutionInputs captures the catalog filter fields that affect bundle resolution. | ||
| // Changes to any of these fields require re-resolution. | ||
| type resolutionInputs struct { | ||
| PackageName string `json:"packageName"` | ||
| Version string `json:"version"` | ||
| Channels []string `json:"channels,omitempty"` | ||
| Selector string `json:"selector,omitempty"` | ||
| UpgradeConstraintPolicy string `json:"upgradeConstraintPolicy,omitempty"` | ||
| } | ||
camilamacedo86 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| // calculateResolutionDigest computes a SHA256 hash of the catalog filter inputs | ||
| // that affect bundle resolution. This digest enables detection of spec changes that | ||
| // require re-resolution versus when a rolling-out revision can be reused. | ||
| // | ||
| // The digest is base64 URL-safe encoded (43 characters) to fit within Kubernetes | ||
| // label value limits (63 characters max) when stored in Helm release metadata. | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The doc comment says "to fit within Kubernetes label value limits (63 characters max) when stored in Helm release metadata" — but in the boxcutter path, It is stored as a Helm release label via |
||
| func calculateResolutionDigest(catalog *ocv1.CatalogFilter) (string, error) { | ||
| if catalog == nil { | ||
| return "", nil | ||
| } | ||
|
|
||
| // Sort channels for deterministic hashing | ||
| channels := slices.Clone(catalog.Channels) | ||
| slices.Sort(channels) | ||
|
|
||
| inputs := resolutionInputs{ | ||
| PackageName: catalog.PackageName, | ||
| Version: catalog.Version, | ||
| Channels: channels, | ||
| UpgradeConstraintPolicy: string(catalog.UpgradeConstraintPolicy), | ||
| } | ||
camilamacedo86 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| // Convert selector to canonical string representation for deterministic hashing | ||
| if catalog.Selector != nil { | ||
| selector, err := metav1.LabelSelectorAsSelector(catalog.Selector) | ||
| if err != nil { | ||
| return "", fmt.Errorf("converting selector: %w", err) | ||
| } | ||
| inputs.Selector = selector.String() | ||
| } | ||
camilamacedo86 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| // Marshal to JSON for consistent hashing | ||
| inputsBytes, err := json.Marshal(inputs) | ||
| if err != nil { | ||
| return "", fmt.Errorf("marshaling resolution inputs: %w", err) | ||
| } | ||
|
|
||
| // Compute SHA256 hash and encode as base64 URL-safe (43 chars, fits in label value limit) | ||
| hash := sha256.Sum256(inputsBytes) | ||
| return base64.RawURLEncoding.EncodeToString(hash[:]), nil | ||
| } | ||
|
|
||
| func HandleFinalizers(f finalizer.Finalizer) ReconcileStepFunc { | ||
| return func(ctx context.Context, state *reconcileState, ext *ocv1.ClusterExtension) (*ctrl.Result, error) { | ||
| l := log.FromContext(ctx) | ||
|
|
@@ -140,15 +196,32 @@ func ResolveBundle(r resolve.Resolver, c client.Client) ReconcileStepFunc { | |
| return func(ctx context.Context, state *reconcileState, ext *ocv1.ClusterExtension) (*ctrl.Result, error) { | ||
| l := log.FromContext(ctx) | ||
|
|
||
| // If already rolling out, use existing revision and set deprecation to Unknown (no catalog check) | ||
| // If already rolling out, check if the latest rolling-out revision matches the current spec. | ||
| // If spec has changed (e.g., version, channels, selector, upgradeConstraintPolicy), we need | ||
| // to resolve a new bundle instead of reusing the rolling-out revision. | ||
| // | ||
| // Note: RollingOut slice is sorted oldest→newest, so use the last element (most recent). | ||
| // This avoids re-resolving when a newer revision already matches the spec, even if an | ||
| // older revision is still stuck rolling out. | ||
| if len(state.revisionStates.RollingOut) > 0 { | ||
| installedBundleName := "" | ||
| if state.revisionStates.Installed != nil { | ||
| installedBundleName = state.revisionStates.Installed.Name | ||
| latestRollingOutRevision := state.revisionStates.RollingOut[len(state.revisionStates.RollingOut)-1] | ||
|
|
||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This changes from However, it's also a behavioral change beyond the digest feature: even when digests match, the metadata propagated to |
||
| // Calculate digest of current spec's catalog filter | ||
| currentDigest, err := calculateResolutionDigest(ext.Spec.Source.Catalog) | ||
| if err != nil { | ||
| l.Error(err, "failed to calculate resolution digest from spec") | ||
| // On digest calculation error, fall through to re-resolve for safety | ||
| } else if currentDigest == latestRollingOutRevision.ResolutionDigest { | ||
| // Resolution inputs haven't changed - reuse the rolling-out revision | ||
| installedBundleName := "" | ||
| if state.revisionStates.Installed != nil { | ||
| installedBundleName = state.revisionStates.Installed.Name | ||
| } | ||
| SetDeprecationStatus(ext, installedBundleName, nil, false) | ||
| state.resolvedRevisionMetadata = latestRollingOutRevision | ||
| return nil, nil | ||
| } | ||
| SetDeprecationStatus(ext, installedBundleName, nil, false) | ||
| state.resolvedRevisionMetadata = state.revisionStates.RollingOut[0] | ||
| return nil, nil | ||
| // Resolution inputs changed - fall through to resolve new bundle from catalog | ||
| } | ||
camilamacedo86 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| // Resolve a new bundle from the catalog | ||
|
|
@@ -188,9 +261,17 @@ func ResolveBundle(r resolve.Resolver, c client.Client) ReconcileStepFunc { | |
| return handleResolutionError(ctx, c, state, ext, err) | ||
| } | ||
|
|
||
| // Calculate digest of the resolution inputs for change detection | ||
| digest, err := calculateResolutionDigest(ext.Spec.Source.Catalog) | ||
| if err != nil { | ||
| l.Error(err, "failed to calculate resolution digest, continuing without it") | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we can continue, perhaps this should be just info? |
||
| digest = "" | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If |
||
| } | ||
|
|
||
| state.resolvedRevisionMetadata = &RevisionMetadata{ | ||
| Package: resolvedBundle.Package, | ||
| Image: resolvedBundle.Image, | ||
| Package: resolvedBundle.Package, | ||
| Image: resolvedBundle.Image, | ||
| ResolutionDigest: digest, | ||
| // TODO: Right now, operator-controller only supports registry+v1 bundles and has no concept | ||
| // of a "release" field. If/when we add a release field concept or a new bundle format | ||
| // we need to re-evaluate use of `AsLegacyRegistryV1Version` so that we avoid propagating | ||
|
|
@@ -409,10 +490,11 @@ func ApplyBundle(a Applier) ReconcileStepFunc { | |
| return func(ctx context.Context, state *reconcileState, ext *ocv1.ClusterExtension) (*ctrl.Result, error) { | ||
| l := log.FromContext(ctx) | ||
| revisionAnnotations := map[string]string{ | ||
| labels.BundleNameKey: state.resolvedRevisionMetadata.Name, | ||
| labels.PackageNameKey: state.resolvedRevisionMetadata.Package, | ||
| labels.BundleVersionKey: state.resolvedRevisionMetadata.Version, | ||
| labels.BundleReferenceKey: state.resolvedRevisionMetadata.Image, | ||
| labels.BundleNameKey: state.resolvedRevisionMetadata.Name, | ||
| labels.PackageNameKey: state.resolvedRevisionMetadata.Package, | ||
| labels.BundleVersionKey: state.resolvedRevisionMetadata.Version, | ||
| labels.BundleReferenceKey: state.resolvedRevisionMetadata.Image, | ||
| labels.ResolutionDigestKey: state.resolvedRevisionMetadata.ResolutionDigest, | ||
| } | ||
| objLbls := map[string]string{ | ||
| labels.OwnerKindKey: ocv1.ClusterExtensionKind, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -31,7 +31,6 @@ import ( | |
| "sigs.k8s.io/controller-runtime/pkg/builder" | ||
| "sigs.k8s.io/controller-runtime/pkg/client" | ||
| "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" | ||
| "sigs.k8s.io/controller-runtime/pkg/event" | ||
| "sigs.k8s.io/controller-runtime/pkg/handler" | ||
| "sigs.k8s.io/controller-runtime/pkg/log" | ||
| "sigs.k8s.io/controller-runtime/pkg/predicate" | ||
|
|
@@ -81,7 +80,10 @@ func (c *ClusterObjectSetReconciler) Reconcile(ctx context.Context, req ctrl.Req | |
| reconciledRev := existingRev.DeepCopy() | ||
| res, reconcileErr := c.reconcile(ctx, reconciledRev) | ||
|
|
||
| if pd := existingRev.Spec.ProgressDeadlineMinutes; pd > 0 { | ||
| // Progress deadline enforcement only applies to Active revisions, not Archived ones. | ||
| // Archived revisions may need to retry teardown operations and should not have their | ||
| // Progressing condition or requeue behavior overridden by the deadline check. | ||
| if pd := existingRev.Spec.ProgressDeadlineMinutes; pd > 0 && reconciledRev.Spec.LifecycleState != ocv1.ClusterObjectSetLifecycleStateArchived { | ||
| cnd := meta.FindStatusCondition(reconciledRev.Status.Conditions, ocv1.ClusterObjectSetTypeProgressing) | ||
| isStillProgressing := cnd != nil && cnd.Status == metav1.ConditionTrue && cnd.Reason != ocv1.ReasonSucceeded | ||
| succeeded := meta.IsStatusConditionTrue(reconciledRev.Status.Conditions, ocv1.ClusterObjectSetTypeSucceeded) | ||
|
|
@@ -333,29 +335,12 @@ type Sourcoser interface { | |
| } | ||
|
|
||
| func (c *ClusterObjectSetReconciler) SetupWithManager(mgr ctrl.Manager) error { | ||
| skipProgressDeadlineExceededPredicate := predicate.Funcs{ | ||
| UpdateFunc: func(e event.UpdateEvent) bool { | ||
| rev, ok := e.ObjectNew.(*ocv1.ClusterObjectSet) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would extract the whole update func into standalone function, not just the logic under
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @pedjak |
||
| if !ok { | ||
| return true | ||
| } | ||
| // allow deletions to happen | ||
| if !rev.DeletionTimestamp.IsZero() { | ||
| return true | ||
| } | ||
| if cnd := meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterObjectSetTypeProgressing); cnd != nil && cnd.Status == metav1.ConditionFalse && cnd.Reason == ocv1.ReasonProgressDeadlineExceeded { | ||
| return false | ||
| } | ||
| return true | ||
| }, | ||
| } | ||
| c.Clock = clock.RealClock{} | ||
| return ctrl.NewControllerManagedBy(mgr). | ||
| For( | ||
| &ocv1.ClusterObjectSet{}, | ||
| builder.WithPredicates( | ||
| predicate.ResourceVersionChangedPredicate{}, | ||
| skipProgressDeadlineExceededPredicate, | ||
| ), | ||
| ). | ||
| WatchesRawSource( | ||
|
|
@@ -638,7 +623,33 @@ func setRetryingConditions(cos *ocv1.ClusterObjectSet, message string) { | |
| } | ||
| } | ||
|
|
||
| // markAsProgressing sets the Progressing condition to True with the given reason. | ||
| // | ||
| // Once ProgressDeadlineExceeded has been set for the current generation, this function | ||
| // blocks only the RollingOut reason to prevent a reconcile loop for Active revisions. | ||
| // The loop would occur when reconcile() sees in-transition objects and sets | ||
| // Progressing=True/RollingOut, then the progress deadline check immediately resets it | ||
| // to ProgressDeadlineExceeded, causing a status-only update that triggers another reconcile. | ||
| // | ||
| // Archived revisions are exempt because they skip progress deadline enforcement entirely, | ||
| // so their status updates won't be overridden. | ||
| // | ||
| // Other reasons like Succeeded and Retrying are always allowed because: | ||
| // - Succeeded represents terminal success (rollout eventually completed) | ||
| // - Retrying represents transient errors that need to be visible in status | ||
| // | ||
| // If the generation changed since ProgressDeadlineExceeded was recorded, the guard | ||
| // allows all updates through so the condition reflects the new generation. | ||
| func markAsProgressing(cos *ocv1.ClusterObjectSet, reason, message string) { | ||
| if cnd := meta.FindStatusCondition(cos.Status.Conditions, ocv1.ClusterObjectSetTypeProgressing); cnd != nil && | ||
| cnd.Status == metav1.ConditionFalse && cnd.Reason == ocv1.ReasonProgressDeadlineExceeded && | ||
| cnd.ObservedGeneration == cos.Generation && | ||
| reason == ocv1.ReasonRollingOut && | ||
| cos.Spec.LifecycleState != ocv1.ClusterObjectSetLifecycleStateArchived { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good design — the guard is narrow and well-targeted. Only One thought: if a new reason were added in the future without updating this guard, it could accidentally bypass the deadline. Consider adding a brief comment noting the assumption about the current set of possible reasons, so future authors know to check here. |
||
| log.Log.V(1).Info("skipping markAsProgressing: ProgressDeadlineExceeded is sticky for RollingOut on Active revisions", | ||
| "requestedReason", reason, "revision", cos.Name, "lifecycleState", cos.Spec.LifecycleState) | ||
| return | ||
| } | ||
camilamacedo86 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| meta.SetStatusCondition(&cos.Status.Conditions, metav1.Condition{ | ||
| Type: ocv1.ClusterObjectSetTypeProgressing, | ||
| Status: metav1.ConditionTrue, | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.