Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,11 @@ func (d *BoxcutterRevisionStatesGetter) GetRevisionStates(ctx context.Context, e
// is fairly decoupled from this code where we get the annotations back out. We may want to co-locate
// the set/get logic a bit better to make it more maintainable and less likely to get out of sync.
rm := &RevisionMetadata{
RevisionName: rev.Name,
Package: rev.Annotations[labels.PackageNameKey],
Image: rev.Annotations[labels.BundleReferenceKey],
Conditions: rev.Status.Conditions,
RevisionName: rev.Name,
Package: rev.Annotations[labels.PackageNameKey],
Image: rev.Annotations[labels.BundleReferenceKey],
ResolutionDigest: rev.Annotations[labels.ResolutionDigestKey],
Conditions: rev.Status.Conditions,
BundleMetadata: ocv1.BundleMetadata{
Name: rev.Annotations[labels.BundleNameKey],
Version: rev.Annotations[labels.BundleVersionKey],
Expand Down Expand Up @@ -100,10 +101,11 @@ func ApplyBundleWithBoxcutter(apply func(ctx context.Context, contentFS fs.FS, e
return func(ctx context.Context, state *reconcileState, ext *ocv1.ClusterExtension) (*ctrl.Result, error) {
l := log.FromContext(ctx)
revisionAnnotations := map[string]string{
labels.BundleNameKey: state.resolvedRevisionMetadata.Name,
labels.PackageNameKey: state.resolvedRevisionMetadata.Package,
labels.BundleVersionKey: state.resolvedRevisionMetadata.Version,
labels.BundleReferenceKey: state.resolvedRevisionMetadata.Image,
labels.BundleNameKey: state.resolvedRevisionMetadata.Name,
labels.PackageNameKey: state.resolvedRevisionMetadata.Package,
labels.BundleVersionKey: state.resolvedRevisionMetadata.Version,
labels.BundleReferenceKey: state.resolvedRevisionMetadata.Image,
labels.ResolutionDigestKey: state.resolvedRevisionMetadata.ResolutionDigest,
}
objLbls := map[string]string{
labels.OwnerKindKey: ocv1.ClusterExtensionKind,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -499,9 +499,10 @@ func clusterExtensionRequestsForCatalog(c client.Reader, logger logr.Logger) crh
}

type RevisionMetadata struct {
RevisionName string
Package string
Image string
RevisionName string
Package string
Image string
ResolutionDigest string
ocv1.BundleMetadata
Conditions []metav1.Condition
}
Expand Down Expand Up @@ -535,8 +536,9 @@ func (d *HelmRevisionStatesGetter) GetRevisionStates(ctx context.Context, ext *o
for _, rel := range relhis {
if rel.Info != nil && rel.Info.Status == release.StatusDeployed {
rs.Installed = &RevisionMetadata{
Package: rel.Labels[labels.PackageNameKey],
Image: rel.Labels[labels.BundleReferenceKey],
Package: rel.Labels[labels.PackageNameKey],
Image: rel.Labels[labels.BundleReferenceKey],
ResolutionDigest: rel.Labels[labels.ResolutionDigestKey],
BundleMetadata: ocv1.BundleMetadata{
Name: rel.Labels[labels.BundleNameKey],
Version: rel.Labels[labels.BundleVersionKey],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -785,7 +785,12 @@ func TestClusterExtensionBoxcutterApplierFailsDoesNotLeakDeprecationErrors(t *te
// the same inputs the runtime would see.
d.RevisionStatesGetter = &MockRevisionStatesGetter{
RevisionStates: &controllers.RevisionStates{
RollingOut: []*controllers.RevisionMetadata{{}},
RollingOut: []*controllers.RevisionMetadata{{
// Different digest from current spec - ensures we call the resolver
// (This simulates a rolling-out revision from a previous reconcile)
ResolutionDigest: "different-digest-from-previous-reconcile",
BundleMetadata: ocv1.BundleMetadata{Version: "1.0.0"},
}},
},
}
d.Resolver = resolve.Func(func(ctx context.Context, ext *ocv1.ClusterExtension, installedBundle *ocv1.BundleMetadata) (*declcfg.Bundle, *bundle.VersionRelease, *declcfg.Deprecation, error) {
Expand Down Expand Up @@ -845,21 +850,18 @@ func TestClusterExtensionBoxcutterApplierFailsDoesNotLeakDeprecationErrors(t *te

deprecatedCond := apimeta.FindStatusCondition(clusterExtension.Status.Conditions, ocv1.TypeDeprecated)
require.NotNil(t, deprecatedCond)
require.Equal(t, metav1.ConditionUnknown, deprecatedCond.Status, "no catalog data during rollout, so Unknown")
require.Equal(t, ocv1.ReasonDeprecationStatusUnknown, deprecatedCond.Reason)
require.Equal(t, "deprecation status unknown: catalog data unavailable", deprecatedCond.Message)
require.Equal(t, metav1.ConditionFalse, deprecatedCond.Status, "catalog data available from resolution, not deprecated")
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Worth noting: this is a behavioral change beyond the bug fix. Previously, when a revision was rolling out, deprecation status was always Unknown ("no catalog data during rollout"). Now, when the spec changes and triggers re-resolution, deprecation is set based on actual catalog data.

This is an improvement — the test updates are correct — but it's a side effect worth calling out in the PR description since it changes user-visible condition values.

require.Equal(t, ocv1.ReasonNotDeprecated, deprecatedCond.Reason)

packageCond := apimeta.FindStatusCondition(clusterExtension.Status.Conditions, ocv1.TypePackageDeprecated)
require.NotNil(t, packageCond)
require.Equal(t, metav1.ConditionUnknown, packageCond.Status, "no catalog data during rollout, so Unknown")
require.Equal(t, ocv1.ReasonDeprecationStatusUnknown, packageCond.Reason)
require.Equal(t, "deprecation status unknown: catalog data unavailable", packageCond.Message)
require.Equal(t, metav1.ConditionFalse, packageCond.Status, "catalog data available from resolution, package not deprecated")
require.Equal(t, ocv1.ReasonNotDeprecated, packageCond.Reason)

channelCond := apimeta.FindStatusCondition(clusterExtension.Status.Conditions, ocv1.TypeChannelDeprecated)
require.NotNil(t, channelCond)
require.Equal(t, metav1.ConditionUnknown, channelCond.Status, "no catalog data during rollout, so Unknown")
require.Equal(t, ocv1.ReasonDeprecationStatusUnknown, channelCond.Reason)
require.Equal(t, "deprecation status unknown: catalog data unavailable", channelCond.Message)
require.Equal(t, metav1.ConditionFalse, channelCond.Status, "catalog data available from resolution, channel not deprecated")
require.Equal(t, ocv1.ReasonNotDeprecated, channelCond.Reason)

bundleCond := apimeta.FindStatusCondition(clusterExtension.Status.Conditions, ocv1.TypeBundleDeprecated)
require.NotNil(t, bundleCond)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,12 @@ package controllers

import (
"context"
"crypto/sha256"
"encoding/base64"
"encoding/json"
"errors"
"fmt"
"slices"

apierrors "k8s.io/apimachinery/pkg/api/errors"
apimeta "k8s.io/apimachinery/pkg/api/meta"
Expand All @@ -37,6 +41,58 @@ import (
imageutil "github.com/operator-framework/operator-controller/internal/shared/util/image"
)

// resolutionInputs captures the catalog filter fields that affect bundle resolution.
// Changes to any of these fields require re-resolution.
type resolutionInputs struct {
PackageName string `json:"packageName"`
Version string `json:"version"`
Channels []string `json:"channels,omitempty"`
Selector string `json:"selector,omitempty"`
UpgradeConstraintPolicy string `json:"upgradeConstraintPolicy,omitempty"`
}

// calculateResolutionDigest computes a SHA256 hash of the catalog filter inputs
// that affect bundle resolution. This digest enables detection of spec changes that
// require re-resolution versus when a rolling-out revision can be reused.
//
// The digest is base64 URL-safe encoded (43 characters) to fit within Kubernetes
// label value limits (63 characters max) when stored in Helm release metadata.
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The doc comment says "to fit within Kubernetes label value limits (63 characters max) when stored in Helm release metadata" — but in the boxcutter path, ResolutionDigestKey is stored as an annotation (in revisionAnnotations maps), not a label. Annotations don't have the 63-char limit.

It is stored as a Helm release label via rel.Labels[labels.ResolutionDigestKey] in clusterextension_controller.go, so the 63-char constraint matters there. Consider clarifying that the size constraint comes from the Helm label path specifically.

func calculateResolutionDigest(catalog *ocv1.CatalogFilter) (string, error) {
if catalog == nil {
return "", nil
}

// Sort channels for deterministic hashing
channels := slices.Clone(catalog.Channels)
slices.Sort(channels)

inputs := resolutionInputs{
PackageName: catalog.PackageName,
Version: catalog.Version,
Channels: channels,
UpgradeConstraintPolicy: string(catalog.UpgradeConstraintPolicy),
}

// Convert selector to canonical string representation for deterministic hashing
if catalog.Selector != nil {
selector, err := metav1.LabelSelectorAsSelector(catalog.Selector)
if err != nil {
return "", fmt.Errorf("converting selector: %w", err)
}
inputs.Selector = selector.String()
}

// Marshal to JSON for consistent hashing
inputsBytes, err := json.Marshal(inputs)
if err != nil {
return "", fmt.Errorf("marshaling resolution inputs: %w", err)
}

// Compute SHA256 hash and encode as base64 URL-safe (43 chars, fits in label value limit)
hash := sha256.Sum256(inputsBytes)
return base64.RawURLEncoding.EncodeToString(hash[:]), nil
}

func HandleFinalizers(f finalizer.Finalizer) ReconcileStepFunc {
return func(ctx context.Context, state *reconcileState, ext *ocv1.ClusterExtension) (*ctrl.Result, error) {
l := log.FromContext(ctx)
Expand Down Expand Up @@ -140,15 +196,32 @@ func ResolveBundle(r resolve.Resolver, c client.Client) ReconcileStepFunc {
return func(ctx context.Context, state *reconcileState, ext *ocv1.ClusterExtension) (*ctrl.Result, error) {
l := log.FromContext(ctx)

// If already rolling out, use existing revision and set deprecation to Unknown (no catalog check)
// If already rolling out, check if the latest rolling-out revision matches the current spec.
// If spec has changed (e.g., version, channels, selector, upgradeConstraintPolicy), we need
// to resolve a new bundle instead of reusing the rolling-out revision.
//
// Note: RollingOut slice is sorted oldest→newest, so use the last element (most recent).
// This avoids re-resolving when a newer revision already matches the spec, even if an
// older revision is still stuck rolling out.
if len(state.revisionStates.RollingOut) > 0 {
installedBundleName := ""
if state.revisionStates.Installed != nil {
installedBundleName = state.revisionStates.Installed.Name
latestRollingOutRevision := state.revisionStates.RollingOut[len(state.revisionStates.RollingOut)-1]

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This changes from RollingOut[0] (oldest) to RollingOut[len-1] (newest). Since the slice is sorted ascending by Spec.Revision, this is correct for digest comparison — you want the most recent revision.

However, it's also a behavioral change beyond the digest feature: even when digests match, the metadata propagated to state.resolvedRevisionMetadata now comes from the newest rolling-out revision instead of the oldest. Worth calling out in the PR description.

// Calculate digest of current spec's catalog filter
currentDigest, err := calculateResolutionDigest(ext.Spec.Source.Catalog)
if err != nil {
l.Error(err, "failed to calculate resolution digest from spec")
// On digest calculation error, fall through to re-resolve for safety
} else if currentDigest == latestRollingOutRevision.ResolutionDigest {
// Resolution inputs haven't changed - reuse the rolling-out revision
installedBundleName := ""
if state.revisionStates.Installed != nil {
installedBundleName = state.revisionStates.Installed.Name
}
SetDeprecationStatus(ext, installedBundleName, nil, false)
state.resolvedRevisionMetadata = latestRollingOutRevision
return nil, nil
}
SetDeprecationStatus(ext, installedBundleName, nil, false)
state.resolvedRevisionMetadata = state.revisionStates.RollingOut[0]
return nil, nil
// Resolution inputs changed - fall through to resolve new bundle from catalog
}

// Resolve a new bundle from the catalog
Expand Down Expand Up @@ -188,9 +261,17 @@ func ResolveBundle(r resolve.Resolver, c client.Client) ReconcileStepFunc {
return handleResolutionError(ctx, c, state, ext, err)
}

// Calculate digest of the resolution inputs for change detection
digest, err := calculateResolutionDigest(ext.Spec.Source.Catalog)
if err != nil {
l.Error(err, "failed to calculate resolution digest, continuing without it")
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we can continue, perhaps this should be just info?

digest = ""
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If calculateResolutionDigest fails here, digest is set to "" and stored in the annotation. On the next reconcile, calculateResolutionDigest for the same (valid) spec would succeed, producing a non-empty digest that won't match "", causing an unnecessary re-resolution. This is safe (self-healing) but might be worth a brief comment noting the behavior.

}

state.resolvedRevisionMetadata = &RevisionMetadata{
Package: resolvedBundle.Package,
Image: resolvedBundle.Image,
Package: resolvedBundle.Package,
Image: resolvedBundle.Image,
ResolutionDigest: digest,
// TODO: Right now, operator-controller only supports registry+v1 bundles and has no concept
// of a "release" field. If/when we add a release field concept or a new bundle format
// we need to re-evaluate use of `AsLegacyRegistryV1Version` so that we avoid propagating
Expand Down Expand Up @@ -409,10 +490,11 @@ func ApplyBundle(a Applier) ReconcileStepFunc {
return func(ctx context.Context, state *reconcileState, ext *ocv1.ClusterExtension) (*ctrl.Result, error) {
l := log.FromContext(ctx)
revisionAnnotations := map[string]string{
labels.BundleNameKey: state.resolvedRevisionMetadata.Name,
labels.PackageNameKey: state.resolvedRevisionMetadata.Package,
labels.BundleVersionKey: state.resolvedRevisionMetadata.Version,
labels.BundleReferenceKey: state.resolvedRevisionMetadata.Image,
labels.BundleNameKey: state.resolvedRevisionMetadata.Name,
labels.PackageNameKey: state.resolvedRevisionMetadata.Package,
labels.BundleVersionKey: state.resolvedRevisionMetadata.Version,
labels.BundleReferenceKey: state.resolvedRevisionMetadata.Image,
labels.ResolutionDigestKey: state.resolvedRevisionMetadata.ResolutionDigest,
}
objLbls := map[string]string{
labels.OwnerKindKey: ocv1.ClusterExtensionKind,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ import (
"sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
"sigs.k8s.io/controller-runtime/pkg/event"
"sigs.k8s.io/controller-runtime/pkg/handler"
"sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/predicate"
Expand Down Expand Up @@ -81,7 +80,10 @@ func (c *ClusterObjectSetReconciler) Reconcile(ctx context.Context, req ctrl.Req
reconciledRev := existingRev.DeepCopy()
res, reconcileErr := c.reconcile(ctx, reconciledRev)

if pd := existingRev.Spec.ProgressDeadlineMinutes; pd > 0 {
// Progress deadline enforcement only applies to Active revisions, not Archived ones.
// Archived revisions may need to retry teardown operations and should not have their
// Progressing condition or requeue behavior overridden by the deadline check.
if pd := existingRev.Spec.ProgressDeadlineMinutes; pd > 0 && reconciledRev.Spec.LifecycleState != ocv1.ClusterObjectSetLifecycleStateArchived {
cnd := meta.FindStatusCondition(reconciledRev.Status.Conditions, ocv1.ClusterObjectSetTypeProgressing)
isStillProgressing := cnd != nil && cnd.Status == metav1.ConditionTrue && cnd.Reason != ocv1.ReasonSucceeded
succeeded := meta.IsStatusConditionTrue(reconciledRev.Status.Conditions, ocv1.ClusterObjectSetTypeSucceeded)
Expand Down Expand Up @@ -333,29 +335,12 @@ type Sourcoser interface {
}

func (c *ClusterObjectSetReconciler) SetupWithManager(mgr ctrl.Manager) error {
skipProgressDeadlineExceededPredicate := predicate.Funcs{
UpdateFunc: func(e event.UpdateEvent) bool {
rev, ok := e.ObjectNew.(*ocv1.ClusterObjectSet)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would extract the whole update func into standalone function, not just the logic under shouldAllowProgressDeadlineExceededUpdate

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pedjak
I think we might can remove the predicate.
See the current proposal now.
WDYT? I tried to update the PR desc as well

if !ok {
return true
}
// allow deletions to happen
if !rev.DeletionTimestamp.IsZero() {
return true
}
if cnd := meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterObjectSetTypeProgressing); cnd != nil && cnd.Status == metav1.ConditionFalse && cnd.Reason == ocv1.ReasonProgressDeadlineExceeded {
return false
}
return true
},
}
c.Clock = clock.RealClock{}
return ctrl.NewControllerManagedBy(mgr).
For(
&ocv1.ClusterObjectSet{},
builder.WithPredicates(
predicate.ResourceVersionChangedPredicate{},
skipProgressDeadlineExceededPredicate,
),
).
WatchesRawSource(
Expand Down Expand Up @@ -638,7 +623,33 @@ func setRetryingConditions(cos *ocv1.ClusterObjectSet, message string) {
}
}

// markAsProgressing sets the Progressing condition to True with the given reason.
//
// Once ProgressDeadlineExceeded has been set for the current generation, this function
// blocks only the RollingOut reason to prevent a reconcile loop for Active revisions.
// The loop would occur when reconcile() sees in-transition objects and sets
// Progressing=True/RollingOut, then the progress deadline check immediately resets it
// to ProgressDeadlineExceeded, causing a status-only update that triggers another reconcile.
//
// Archived revisions are exempt because they skip progress deadline enforcement entirely,
// so their status updates won't be overridden.
//
// Other reasons like Succeeded and Retrying are always allowed because:
// - Succeeded represents terminal success (rollout eventually completed)
// - Retrying represents transient errors that need to be visible in status
//
// If the generation changed since ProgressDeadlineExceeded was recorded, the guard
// allows all updates through so the condition reflects the new generation.
func markAsProgressing(cos *ocv1.ClusterObjectSet, reason, message string) {
if cnd := meta.FindStatusCondition(cos.Status.Conditions, ocv1.ClusterObjectSetTypeProgressing); cnd != nil &&
cnd.Status == metav1.ConditionFalse && cnd.Reason == ocv1.ReasonProgressDeadlineExceeded &&
cnd.ObservedGeneration == cos.Generation &&
reason == ocv1.ReasonRollingOut &&
cos.Spec.LifecycleState != ocv1.ClusterObjectSetLifecycleStateArchived {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good design — the guard is narrow and well-targeted. Only RollingOut is blocked to prevent the specific flip-flop loop; Succeeded and Retrying pass through, and the ObservedGeneration check resets everything on new spec revisions.

One thought: if a new reason were added in the future without updating this guard, it could accidentally bypass the deadline. Consider adding a brief comment noting the assumption about the current set of possible reasons, so future authors know to check here.

log.Log.V(1).Info("skipping markAsProgressing: ProgressDeadlineExceeded is sticky for RollingOut on Active revisions",
"requestedReason", reason, "revision", cos.Name, "lifecycleState", cos.Spec.LifecycleState)
return
}
meta.SetStatusCondition(&cos.Status.Conditions, metav1.Condition{
Type: ocv1.ClusterObjectSetTypeProgressing,
Status: metav1.ConditionTrue,
Expand Down
Loading
Loading