Skip to content

Commit 76ef5eb

Browse files
Merge pull request #5612 from isabella-janssen/ocpbugs-74692
OCPBUGS-74692: Write all units when forcefile exists to recover from systemd unit config drift
2 parents 908fca9 + 24a0730 commit 76ef5eb

1 file changed

Lines changed: 26 additions & 6 deletions

File tree

pkg/daemon/update.go

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1026,6 +1026,9 @@ func (dn *Daemon) update(oldConfig, newConfig *mcfgv1.MachineConfig, skipCertifi
10261026

10271027
var nodeDisruptionActions []opv1.NodeDisruptionPolicyStatusAction
10281028
var actions []string
1029+
// Check for forcefile before calculatePostConfigChange* functions delete it.
1030+
// This is needed for updateFiles to know whether to write all units (OCPBUGS-74692).
1031+
forceFilePresent := forceFileExists()
10291032
// Node Disruption Policies cannot be used during firstboot as API is not accessible.
10301033
if !firstBoot {
10311034
nodeDisruptionActions, err = dn.calculatePostConfigChangeNodeDisruptionAction(diff, diffFileSet, allChangedUnitNames)
@@ -1134,13 +1137,13 @@ func (dn *Daemon) update(oldConfig, newConfig *mcfgv1.MachineConfig, skipCertifi
11341137
}
11351138

11361139
// update files on disk that need updating
1137-
if err := dn.updateFiles(oldIgnConfig, newIgnConfig, addedOrChangedUnits, skipCertificateWrite); err != nil {
1140+
if err := dn.updateFiles(oldIgnConfig, newIgnConfig, addedOrChangedUnits, skipCertificateWrite, forceFilePresent); err != nil {
11381141
return err
11391142
}
11401143

11411144
defer func() {
11421145
if retErr != nil {
1143-
if err := dn.updateFiles(newIgnConfig, oldIgnConfig, addedOrChangedUnits, skipCertificateWrite); err != nil {
1146+
if err := dn.updateFiles(newIgnConfig, oldIgnConfig, addedOrChangedUnits, skipCertificateWrite, false); err != nil {
11441147
errs := kubeErrs.NewAggregate([]error{err, retErr})
11451148
retErr = fmt.Errorf("error rolling back files writes: %w", errs)
11461149
return
@@ -1278,15 +1281,18 @@ func (dn *Daemon) updateHypershift(oldConfig, newConfig *mcfgv1.MachineConfig, d
12781281
unitDiff := ctrlcommon.GetChangedConfigUnitsByType(&oldIgnConfig, &newIgnConfig)
12791282
addedOrChangedUnits := slices.Concat(unitDiff.Added, unitDiff.Updated)
12801283

1284+
// Check for forcefile to support config drift recovery (OCPBUGS-74692)
1285+
forceFilePresent := forceFileExists()
1286+
12811287
// update files on disk that need updating
12821288
// We should't skip the certificate write in HyperShift since it does not run the extra daemon process
1283-
if err := dn.updateFiles(oldIgnConfig, newIgnConfig, addedOrChangedUnits, false); err != nil {
1289+
if err := dn.updateFiles(oldIgnConfig, newIgnConfig, addedOrChangedUnits, false, forceFilePresent); err != nil {
12841290
return err
12851291
}
12861292

12871293
defer func() {
12881294
if retErr != nil {
1289-
if err := dn.updateFiles(newIgnConfig, oldIgnConfig, addedOrChangedUnits, false); err != nil {
1295+
if err := dn.updateFiles(newIgnConfig, oldIgnConfig, addedOrChangedUnits, false, false); err != nil {
12901296
errs := kubeErrs.NewAggregate([]error{err, retErr})
12911297
retErr = fmt.Errorf("error rolling back files writes: %w", errs)
12921298
return
@@ -1811,12 +1817,26 @@ func (dn *CoreOSDaemon) getKernelPackagesForTargetRelease() (releaseKernelPackag
18111817
// whatever has been written is picked up by the appropriate daemons, if
18121818
// required. in particular, a daemon-reload and restart for any unit files
18131819
// touched.
1814-
func (dn *Daemon) updateFiles(oldIgnConfig, newIgnConfig ign3types.Config, addedOrChangedUnits []ign3types.Unit, skipCertificateWrite bool) error {
1820+
func (dn *Daemon) updateFiles(oldIgnConfig, newIgnConfig ign3types.Config, addedOrChangedUnits []ign3types.Unit, skipCertificateWrite, forceFilePresent bool) error {
18151821
klog.Info("Updating files")
18161822
if err := dn.writeFiles(newIgnConfig.Storage.Files, skipCertificateWrite); err != nil {
18171823
return err
18181824
}
1819-
if err := dn.writeUnits(addedOrChangedUnits); err != nil {
1825+
1826+
// With OCPBUGS-58023, we updated this flow to only write units that were either added or
1827+
// updated. As can be seen in OCPBUGS-74692, this impacted the traditional method to recover
1828+
// from config drifts with systemd units. It makes the `touch /run/machine-config-daemon-force`
1829+
// command useless since the new flow does not rewrite all files, only the ones that have been
1830+
// added or changed with the latest MC. To keep the fix for OCPBUGS-58023 and allow continue
1831+
// supporting the traditional config drift recovery for systemd units, all units should be
1832+
// written when a forcefile exists.
1833+
unitsToWrite := addedOrChangedUnits
1834+
if forceFilePresent {
1835+
klog.Info("Forcefile exists, writing all units")
1836+
unitsToWrite = newIgnConfig.Systemd.Units
1837+
}
1838+
1839+
if err := dn.writeUnits(unitsToWrite); err != nil {
18201840
return err
18211841
}
18221842
return dn.deleteStaleData(oldIgnConfig, newIgnConfig)

0 commit comments

Comments
 (0)