Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ Other values are ignored. If you wish to use use a third-party network provider


### Configuring OVNKubernetes
OVNKubernetes supports the following configuration options, all of which are optional and once set at cluster creation, they can't be changed except for `gatewayConfig` and `IPsec` which can be changed at runtime:
OVNKubernetes supports the following configuration options, all of which are optional and once set at cluster creation, they can't be changed except for `gatewayConfig`, `IPsec` and `reachabilityTotalTimeoutSeconds` which can be changed at runtime:
* `MTU`: The MTU to use for the geneve overlay. The default is the MTU of the node that the cluster-network-operator is first run on, minus 100 bytes for geneve overhead. If the nodes in your cluster don't all have the same MTU then you may need to set this explicitly.
* `genevePort`: The UDP port to use for the Geneve overlay. The default is 6081.
* `hybridOverlayConfig`: hybrid linux/windows cluster (see below).
Expand Down
14 changes: 11 additions & 3 deletions pkg/network/ovn_kubernetes.go
Original file line number Diff line number Diff line change
Expand Up @@ -501,10 +501,17 @@ func renderOVNKubernetes(conf *operv1.NetworkSpec, bootstrapResult *bootstrap.Bo
}
data.Data["OVNKubeConfigHash"] = hex.EncodeToString(h.Sum(nil))

var reachabilityTimeoutStr string
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Make sure hash is consistent when values do not change.

if timeout, ok := data.Data["ReachabilityTotalTimeoutSeconds"].(*uint32); ok && timeout != nil {
reachabilityTimeoutStr = strconv.FormatUint(uint64(*timeout), 10)
}

// Compute a separate hash for no-overlay node config (outboundSNAT).
// This allows ovnkube-node to restart when outboundSNAT changes
nodeNoOverlayHash := sha1.New()
nodeNoOverlayHashData := fmt.Sprintf("outboundSNAT=%v", data.Data["NoOverlayOutboundSNAT"])
nodeNoOverlayHashData := fmt.Sprintf("outboundSNAT=%v,reachabilityTimeout=%s",
data.Data["NoOverlayOutboundSNAT"],
reachabilityTimeoutStr)
if _, err := nodeNoOverlayHash.Write([]byte(nodeNoOverlayHashData)); err != nil {
return nil, progressing, errors.Wrap(err, "failed to hash node no-overlay config")
}
Expand All @@ -514,9 +521,10 @@ func renderOVNKubernetes(conf *operv1.NetworkSpec, bootstrapResult *bootstrap.Bo
// This allows ovnkube-control-plane to restart when bgpManagedConfig changes,
// without restarting ovnkube-node pods.
cpHash := sha1.New()
cpHashData := fmt.Sprintf("asNumber=%v,topology=%v",
cpHashData := fmt.Sprintf("asNumber=%v,topology=%v,reachabilityTimeout=%s",
data.Data["NoOverlayManagedASNumber"],
data.Data["NoOverlayManagedTopology"])
data.Data["NoOverlayManagedTopology"],
reachabilityTimeoutStr)
if _, err := cpHash.Write([]byte(cpHashData)); err != nil {
return nil, progressing, errors.Wrap(err, "failed to hash control-plane config")
}
Expand Down
164 changes: 164 additions & 0 deletions pkg/network/ovn_kubernetes_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3944,6 +3944,170 @@ func TestRenderOVNKubernetesEnablePersistentIPs(t *testing.T) {
g.Expect(objs).To(ContainElement(HaveKubernetesID("CustomResourceDefinition", "", "ipamclaims.k8s.cni.cncf.io")))
}

// TestRenderOVNKubernetesReachability tests egress IP reachability timeout rendering
func TestRenderOVNKubernetesReachability(t *testing.T) {
g := NewGomegaWithT(t)

testCases := []struct {
name string
reachabilityTimeout *uint32
expectHashChanged bool
expectKubernetesFeatureReachability bool
expectErr bool
}{
{
name: "No reachability timeout (nil)",
reachabilityTimeout: nil,
expectHashChanged: false,
expectKubernetesFeatureReachability: false,
expectErr: false,
},
{
name: "Reachability timeout set to 0",
reachabilityTimeout: ptrToUint32(0),
expectHashChanged: true,
expectKubernetesFeatureReachability: true,
expectErr: false,
},
{
name: "Reachability timeout changed to 10",
reachabilityTimeout: ptrToUint32(10),
expectHashChanged: true,
expectKubernetesFeatureReachability: true,
expectErr: false,
},
{
name: "Reachability timeout unchanged to 10",
reachabilityTimeout: ptrToUint32(10),
expectHashChanged: false,
expectKubernetesFeatureReachability: true,
expectErr: false,
},
{
name: "Reachability timeout changed to 5",
reachabilityTimeout: ptrToUint32(5),
expectHashChanged: true,
expectKubernetesFeatureReachability: true,
expectErr: false,
},
{
name: "Reachability timeout disabled",
reachabilityTimeout: nil,
expectHashChanged: true,
expectKubernetesFeatureReachability: false,
expectErr: false,
},
}

// Track hashes across test iterations
var prevNodeHash, prevControlHash string

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
crd := OVNKubernetesConfig.DeepCopy()
config := &crd.Spec
config.DefaultNetwork.OVNKubernetesConfig.EgressIPConfig.ReachabilityTotalTimeoutSeconds = tc.reachabilityTimeout

errs := validateOVNKubernetes(config)
g.Expect(errs).To(HaveLen(0))
fillDefaults(config, nil)

// at the same time we have an upgrade
t.Setenv("RELEASE_VERSION", "2.0.0")

bootstrapResult := fakeBootstrapResult()
bootstrapResult.OVN = bootstrap.OVNBootstrapResult{
ControlPlaneReplicaCount: 3,
OVNKubernetesConfig: &bootstrap.OVNConfigBoostrapResult{
DpuHostModeLabel: OVN_NODE_SELECTOR_DEFAULT_DPU_HOST,
DpuModeLabel: OVN_NODE_SELECTOR_DEFAULT_DPU,
SmartNicModeLabel: OVN_NODE_SELECTOR_DEFAULT_SMART_NIC,
MgmtPortResourceName: "",
HyperShiftConfig: &bootstrap.OVNHyperShiftBootstrapResult{
Enabled: false,
},
},
}

featureGatesCNO := getDefaultFeatureGates()
fakeClient := cnofake.NewFakeClient()
// Set is as Hypershift hosted control plane.
bootstrapResult.Infra = bootstrap.InfraStatus{}
bootstrapResult.Infra.HostedControlPlane = &hypershift.HostedControlPlane{}
objs, _, err := renderOVNKubernetes(config, bootstrapResult, manifestDirOvn, fakeClient, featureGatesCNO)
if tc.expectErr {
g.Expect(err).To(HaveOccurred())
return
}
g.Expect(err).NotTo(HaveOccurred())

// Find the ovnkube-config ConfigMap and check the template data
var configMap *uns.Unstructured
for _, obj := range objs {
if obj.GetKind() == "ConfigMap" && obj.GetName() == "ovnkube-config" {
configMap = obj
break
}
}
g.Expect(configMap).NotTo(BeNil(), "ovnkube-config ConfigMap should exist")

// Check the transport value in the rendered ConfigMap
configMapData, found, err := uns.NestedStringMap(configMap.Object, "data")
g.Expect(err).NotTo(HaveOccurred())
g.Expect(found).To(BeTrue(), "ConfigMap should have data field")
ovnkubeConf := configMapData["ovnkube.conf"]
if tc.expectKubernetesFeatureReachability {
g.Expect(ovnkubeConf).To(ContainSubstring("[ovnkubernetesfeature]"),
"ConfigMap should contain [ovnkubernetesfeature] section when enabled")
g.Expect(ovnkubeConf).To(ContainSubstring("egressip-reachability-total-timeout="),
"ConfigMap should contain egressip-reachability-total-timeout=")
} else {
g.Expect(ovnkubeConf).NotTo(ContainSubstring("egressip-reachability-total-timeout="),
"ConfigMap should not contain egressip-reachability-total-timeout= section when disabled")
}

// Verify that hash annotations exist and change when expected
renderedNode := findInObjs("apps", "DaemonSet", "ovnkube-node", "openshift-ovn-kubernetes", objs)
g.Expect(renderedNode).NotTo(BeNil(), "ovnkube-node DaemonSet should exist")

// Check pod template annotations (this is what triggers restarts)
podTemplateAnnotations, found, err := uns.NestedStringMap(renderedNode.Object, "spec", "template", "metadata", "annotations")
g.Expect(err).NotTo(HaveOccurred())
g.Expect(found).To(BeTrue(), "ovnkube-node DaemonSet pod template should have annotations")

nodeHash, ok := podTemplateAnnotations["network.operator.openshift.io/ovnkube-node-config-hash"]
g.Expect(ok).To(BeTrue(), "ovnkube-node DaemonSet pod template should have node-config-hash annotation")
g.Expect(nodeHash).NotTo(BeEmpty(), "node-config-hash should not be empty")

if tc.expectHashChanged && prevNodeHash != "" {
g.Expect(nodeHash).NotTo(Equal(prevNodeHash), "node hash should change when reachability timeout changes")
} else if !tc.expectHashChanged && prevNodeHash != "" {
g.Expect(nodeHash).To(Equal(prevNodeHash), "node hash should not change when reachability timeout stays the same")
}
prevNodeHash = nodeHash

renderedControlPlane := findInObjs("apps", "Deployment", "ovnkube-control-plane", "openshift-ovn-kubernetes", objs)
g.Expect(renderedControlPlane).NotTo(BeNil(), "ovnkube-control-plane Deployment should exist")

// Check pod template annotations (this is what triggers restarts)
cpPodTemplateAnnotations, found, err := uns.NestedStringMap(renderedControlPlane.Object, "spec", "template", "metadata", "annotations")
g.Expect(err).NotTo(HaveOccurred())
g.Expect(found).To(BeTrue(), "ovnkube-control-plane Deployment pod template should have annotations")

controlHash, ok := cpPodTemplateAnnotations["network.operator.openshift.io/ovnkube-control-plane-config-hash"]
g.Expect(ok).To(BeTrue(), "ovnkube-control-plane Deployment pod template should have control-plane-config-hash annotation")
g.Expect(controlHash).NotTo(BeEmpty(), "control-plane-config-hash should not be empty")

if tc.expectHashChanged && prevControlHash != "" {
g.Expect(controlHash).NotTo(Equal(prevControlHash), "control-plane hash should change when reachability timeout changes")
} else if !tc.expectHashChanged && prevControlHash != "" {
g.Expect(controlHash).To(Equal(prevControlHash), "control-plane hash should not change when reachability timeout stays the same")
}
prevControlHash = controlHash
})
}
}

type fakeClientReader struct {
configMap *v1.ConfigMap
}
Expand Down