Skip to content

Commit 24a0730

Browse files
daemon: rewrite all systemd units when forcefile exists
1 parent 33ed444 commit 24a0730

1 file changed

Lines changed: 26 additions & 6 deletions

File tree

pkg/daemon/update.go

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1025,6 +1025,9 @@ func (dn *Daemon) update(oldConfig, newConfig *mcfgv1.MachineConfig, skipCertifi
10251025

10261026
var nodeDisruptionActions []opv1.NodeDisruptionPolicyStatusAction
10271027
var actions []string
1028+
// Check for forcefile before calculatePostConfigChange* functions delete it.
1029+
// This is needed for updateFiles to know whether to write all units (OCPBUGS-74692).
1030+
forceFilePresent := forceFileExists()
10281031
// Node Disruption Policies cannot be used during firstboot as API is not accessible.
10291032
if !firstBoot {
10301033
nodeDisruptionActions, err = dn.calculatePostConfigChangeNodeDisruptionAction(diff, diffFileSet, allChangedUnitNames)
@@ -1133,13 +1136,13 @@ func (dn *Daemon) update(oldConfig, newConfig *mcfgv1.MachineConfig, skipCertifi
11331136
}
11341137

11351138
// update files on disk that need updating
1136-
if err := dn.updateFiles(oldIgnConfig, newIgnConfig, addedOrChangedUnits, skipCertificateWrite); err != nil {
1139+
if err := dn.updateFiles(oldIgnConfig, newIgnConfig, addedOrChangedUnits, skipCertificateWrite, forceFilePresent); err != nil {
11371140
return err
11381141
}
11391142

11401143
defer func() {
11411144
if retErr != nil {
1142-
if err := dn.updateFiles(newIgnConfig, oldIgnConfig, addedOrChangedUnits, skipCertificateWrite); err != nil {
1145+
if err := dn.updateFiles(newIgnConfig, oldIgnConfig, addedOrChangedUnits, skipCertificateWrite, false); err != nil {
11431146
errs := kubeErrs.NewAggregate([]error{err, retErr})
11441147
retErr = fmt.Errorf("error rolling back files writes: %w", errs)
11451148
return
@@ -1277,15 +1280,18 @@ func (dn *Daemon) updateHypershift(oldConfig, newConfig *mcfgv1.MachineConfig, d
12771280
unitDiff := ctrlcommon.GetChangedConfigUnitsByType(&oldIgnConfig, &newIgnConfig)
12781281
addedOrChangedUnits := slices.Concat(unitDiff.Added, unitDiff.Updated)
12791282

1283+
// Check for forcefile to support config drift recovery (OCPBUGS-74692)
1284+
forceFilePresent := forceFileExists()
1285+
12801286
// update files on disk that need updating
12811287
// We should't skip the certificate write in HyperShift since it does not run the extra daemon process
1282-
if err := dn.updateFiles(oldIgnConfig, newIgnConfig, addedOrChangedUnits, false); err != nil {
1288+
if err := dn.updateFiles(oldIgnConfig, newIgnConfig, addedOrChangedUnits, false, forceFilePresent); err != nil {
12831289
return err
12841290
}
12851291

12861292
defer func() {
12871293
if retErr != nil {
1288-
if err := dn.updateFiles(newIgnConfig, oldIgnConfig, addedOrChangedUnits, false); err != nil {
1294+
if err := dn.updateFiles(newIgnConfig, oldIgnConfig, addedOrChangedUnits, false, false); err != nil {
12891295
errs := kubeErrs.NewAggregate([]error{err, retErr})
12901296
retErr = fmt.Errorf("error rolling back files writes: %w", errs)
12911297
return
@@ -1810,12 +1816,26 @@ func (dn *CoreOSDaemon) getKernelPackagesForTargetRelease() (releaseKernelPackag
18101816
// whatever has been written is picked up by the appropriate daemons, if
18111817
// required. in particular, a daemon-reload and restart for any unit files
18121818
// touched.
1813-
func (dn *Daemon) updateFiles(oldIgnConfig, newIgnConfig ign3types.Config, addedOrChangedUnits []ign3types.Unit, skipCertificateWrite bool) error {
1819+
func (dn *Daemon) updateFiles(oldIgnConfig, newIgnConfig ign3types.Config, addedOrChangedUnits []ign3types.Unit, skipCertificateWrite, forceFilePresent bool) error {
18141820
klog.Info("Updating files")
18151821
if err := dn.writeFiles(newIgnConfig.Storage.Files, skipCertificateWrite); err != nil {
18161822
return err
18171823
}
1818-
if err := dn.writeUnits(addedOrChangedUnits); err != nil {
1824+
1825+
// With OCPBUGS-58023, we updated this flow to only write units that were either added or
1826+
// updated. As can be seen in OCPBUGS-74692, this impacted the traditional method to recover
1827+
// from config drifts with systemd units. It makes the `touch /run/machine-config-daemon-force`
1828+
// command useless since the new flow does not rewrite all files, only the ones that have been
1829+
// added or changed with the latest MC. To keep the fix for OCPBUGS-58023 and allow continue
1830+
// supporting the traditional config drift recovery for systemd units, all units should be
1831+
// written when a forcefile exists.
1832+
unitsToWrite := addedOrChangedUnits
1833+
if forceFilePresent {
1834+
klog.Info("Forcefile exists, writing all units")
1835+
unitsToWrite = newIgnConfig.Systemd.Units
1836+
}
1837+
1838+
if err := dn.writeUnits(unitsToWrite); err != nil {
18191839
return err
18201840
}
18211841
return dn.deleteStaleData(oldIgnConfig, newIgnConfig)

0 commit comments

Comments
 (0)