@@ -12,13 +12,15 @@ import (
1212
1313 configv1 "github.com/openshift/api/config/v1"
1414 features "github.com/openshift/api/features"
15+ opv1 "github.com/openshift/api/operator/v1"
1516 cov1helpers "github.com/openshift/library-go/pkg/config/clusteroperator/v1helpers"
1617 corev1 "k8s.io/api/core/v1"
1718 "k8s.io/apimachinery/pkg/api/equality"
1819 apierrors "k8s.io/apimachinery/pkg/api/errors"
1920 "k8s.io/apimachinery/pkg/api/meta"
2021 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2122 "k8s.io/apimachinery/pkg/labels"
23+ k8sversion "k8s.io/apimachinery/pkg/util/version"
2224 "k8s.io/klog/v2"
2325 "k8s.io/utils/clock"
2426
@@ -269,6 +271,18 @@ func (optr *Operator) syncUpgradeableStatus(co *configv1.ClusterOperator) error
269271 Reason : asExpectedReason ,
270272 }
271273
274+ // Check boot image skew upgradeable guards
275+ skewErrorExists , skewErrorMessage , err := optr .checkBootImageSkewUpgradeableGuard ()
276+ if err != nil {
277+ return err
278+ }
279+
280+ if skewErrorExists {
281+ coStatusCondition .Status = configv1 .ConditionFalse
282+ coStatusCondition .Reason = "ClusterBootImageSkewError"
283+ coStatusCondition .Message = skewErrorMessage
284+ }
285+
272286 var degraded , interrupted bool
273287 for _ , pool := range pools {
274288 interrupted = isPoolStatusConditionTrue (pool , mcfgv1 .MachineConfigPoolBuildInterrupted )
@@ -600,3 +614,172 @@ func machineConfigPoolStatus(fgHandler ctrlcommon.FeatureGatesHandler, pool *mcf
600614func taskFailed (task string ) string {
601615 return task + "Failed"
602616}
617+
618+ // checkBootImageSkewUpgradeableGuard checks if the boot image version is within acceptable limits.
619+ // It returns an error if there is no skew enforcement opinion specified. If one is specified,
620+ // it checks if boot image skew is within the expected limit.
621+ func (optr * Operator ) checkBootImageSkewUpgradeableGuard () (bool , string , error ) {
622+ // Check if feature gate is enabled
623+ if ! optr .fgHandler .Enabled (features .FeatureGateBootImageSkewEnforcement ) {
624+ return false , "" , nil
625+ }
626+
627+ // Fetch MachineConfiguration
628+ mcop , err := optr .mcopLister .Get (ctrlcommon .MCOOperatorKnobsObjectName )
629+ if err != nil {
630+ if apierrors .IsNotFound (err ) {
631+ klog .V (4 ).Infof ("MachineConfiguration not found, skipping boot image skew enforcement" )
632+ return false , "" , nil
633+ }
634+ return false , "" , fmt .Errorf ("failed to get MachineConfiguration: %w" , err )
635+ }
636+
637+ // Perform boot image skew enforcement based on mode
638+ skewLimitExceeded := false
639+ skewLimitExceededMessage := ""
640+
641+ switch mcop .Status .BootImageSkewEnforcementStatus .Mode {
642+ case opv1 .BootImageSkewEnforcementModeStatusAutomatic :
643+ skewLimitExceeded , skewLimitExceededMessage = checkBootImageSkew (
644+ mcop .Status .BootImageSkewEnforcementStatus .Automatic .OCPVersion ,
645+ mcop .Status .BootImageSkewEnforcementStatus .Automatic .RHCOSVersion ,
646+ )
647+ case opv1 .BootImageSkewEnforcementModeStatusManual :
648+ skewLimitExceeded , skewLimitExceededMessage = checkBootImageSkew (
649+ mcop .Status .BootImageSkewEnforcementStatus .Manual .OCPVersion ,
650+ mcop .Status .BootImageSkewEnforcementStatus .Manual .RHCOSVersion ,
651+ )
652+ case opv1 .BootImageSkewEnforcementModeStatusNone :
653+ // TODO: Set a low level prom alert to set scaling risk
654+ // Tracked in https://issues.redhat.com/browse/MCO-2035
655+ klog .V (4 ).Infof ("evaluating boot image skew enforcement: mode set to None" )
656+ return false , "" , nil
657+ default :
658+ // Sanity check, this should only be possible if status hasn't been populated yet.
659+ return false , "" , nil
660+ }
661+
662+ if skewLimitExceeded {
663+ // TODO: Update error message; tracked in https://issues.redhat.com/browse/MCO-2034
664+ return true , fmt .Sprintf ("Upgrades have been disabled because %s. To enable upgrades, please update your boot images following the documentation at [TODO: insert link], or disable boot image skew enforcement at [TODO: insert link]" , skewLimitExceededMessage ), nil
665+ }
666+
667+ return false , "" , nil
668+ }
669+
670+ // checkBootImageSkew determines if the cluster's boot images are within acceptable version skew.
671+ // It compares the oldest boot image version (currentOCPVersion, currentRHCOSVersion) against the minimum
672+ // supported version.
673+ // Returns true if the boot image version is older than the minimum, along with an error message.
674+ func checkBootImageSkew (currentOCPVersion , currentRHCOSVersion string ) (bool , string ) {
675+
676+ if currentOCPVersion != "" {
677+ return checkOCPVersionSkew (currentOCPVersion )
678+ }
679+
680+ if currentRHCOSVersion != "" {
681+ return checkRHCOSVersionSkew (currentRHCOSVersion )
682+ }
683+
684+ // This isn't possible due to API validations; more of a sanity check for safety
685+ klog .Warningf ("no boot image versions provided, skipping skew check" )
686+ return false , ""
687+ }
688+
689+ // checkOCPVersionSkew compares a version string against the minimum supported version.
690+ // Returns true if the version is below the minimum, along with an error message.
691+ func checkOCPVersionSkew (version string ) (bool , string ) {
692+ // Parse the boot image version
693+ bootImageVersion , err := k8sversion .ParseGeneric (version )
694+ if err != nil {
695+ klog .Warningf ("Failed to parse boot image version %q: %v" , version , err )
696+ return false , ""
697+ }
698+
699+ // Parse the minimum supported version
700+ minSupportedVersion , err := k8sversion .ParseGeneric (ctrlcommon .OCPVersionBootImageSkewLimit )
701+ if err != nil {
702+ klog .Errorf ("Failed to parse OCPVersionBootImageSkewLimit constant %q: %v" , ctrlcommon .OCPVersionBootImageSkewLimit , err )
703+ return false , ""
704+ }
705+
706+ // Check if boot image version is less than the minimum supported version
707+ if bootImageVersion .LessThan (minSupportedVersion ) {
708+ return true , fmt .Sprintf ("the cluster is using OCP boot image version %s, which is below the minimum required version %s" ,
709+ version , ctrlcommon .OCPVersionBootImageSkewLimit )
710+ }
711+
712+ klog .V (4 ).Infof ("Boot image version %s meets minimum version requirement (>= %s)" ,
713+ version , ctrlcommon .OCPVersionBootImageSkewLimit )
714+ return false , ""
715+ }
716+
717+ // checkRHCOSVersionSkew compares an RHCOS version string against the minimum supported version.
718+ // Returns true if the version is below the minimum, along with an error message.
719+ //
720+ // Note: RHCOS versions can either have formatting of [major].[minor].[datestamp(YYYYMMDD)]-[buildnumber] (example:9.6.20251023-0) or the legacy
721+ // format of [major].[minor].[timestamp(YYYYMMDDHHmm)]-[buildnumber] (example: 48.84.202208021106-0). In the modern(or RHEL) formatting, we just
722+ // need to compare [major.minor] against the RHCOS skew limit. In the legacy format, the minor version includes the whole RHEL major/minor
723+ // and only that bit should be used to compare against the RHCOS skew limit.
724+ func checkRHCOSVersionSkew (version string ) (bool , string ) {
725+ // Split version to extract components
726+ parts := strings .Split (version , "." )
727+ if len (parts ) < 3 {
728+ klog .Warningf ("Failed to parse RHCOS version %q: expected at least 3 parts" , version )
729+ return false , ""
730+ }
731+
732+ major := parts [0 ]
733+ minor := parts [1 ]
734+
735+ // Extract timestamp (remove build number suffix if present)
736+ timestampPart := parts [2 ]
737+ if idx := strings .Index (timestampPart , "-" ); idx != - 1 {
738+ timestampPart = timestampPart [:idx ]
739+ }
740+
741+ var versionToCompare string
742+
743+ // Determine format based on timestamp length
744+ switch len (timestampPart ) {
745+ case 8 :
746+ // Modern format (YYYYMMDD): compare major.minor directly
747+ versionToCompare = fmt .Sprintf ("%s.%s" , major , minor )
748+ case 12 :
749+ // Legacy format (YYYYMMDDHHmm): minor contains RHEL version (e.g., 84 = RHEL 8.4, 810 = RHEL 8.10)
750+ // First digit is RHEL major, remaining digits are RHEL minor.
751+ if len (minor ) >= 2 {
752+ versionToCompare = fmt .Sprintf ("%s.%s" , minor [:1 ], minor [1 :])
753+ } else {
754+ klog .Warningf ("Failed to parse RHCOS legacy version %q: minor version too short" , version )
755+ return false , ""
756+ }
757+ default :
758+ klog .Warningf ("Failed to parse RHCOS version %q: unexpected timestamp format (length %d)" , version , len (timestampPart ))
759+ return false , ""
760+ }
761+
762+ // Parse the version to compare
763+ bootImageVersion , err := k8sversion .ParseGeneric (versionToCompare )
764+ if err != nil {
765+ klog .Warningf ("Failed to parse RHCOS version %q (extracted %q): %v" , version , versionToCompare , err )
766+ return false , ""
767+ }
768+
769+ // Parse the minimum supported version
770+ minSupportedVersion , err := k8sversion .ParseGeneric (ctrlcommon .RHCOSVersionBootImageSkewLimit )
771+ if err != nil {
772+ klog .Errorf ("Failed to parse RHCOSVersionBootImageSkewLimit constant %q: %v" , ctrlcommon .RHCOSVersionBootImageSkewLimit , err )
773+ return false , ""
774+ }
775+
776+ // Check if boot image version is less than the minimum supported version
777+ if bootImageVersion .LessThan (minSupportedVersion ) {
778+ return true , fmt .Sprintf ("the cluster is using RHCOS boot image version %s(RHEL version: %s), which is below the minimum required RHEL version %s" ,
779+ version , versionToCompare , ctrlcommon .RHCOSVersionBootImageSkewLimit )
780+ }
781+
782+ klog .V (4 ).Infof ("RHCOS boot image version %s meets minimum version requirement (>= %s)" ,
783+ version , ctrlcommon .RHCOSVersionBootImageSkewLimit )
784+ return false , ""
785+ }
0 commit comments