diff --git a/.github/workflows/performance-test.yml b/.github/workflows/performance-test.yml index 9369e659a2..7f691f6b3b 100644 --- a/.github/workflows/performance-test.yml +++ b/.github/workflows/performance-test.yml @@ -209,6 +209,20 @@ jobs: run: | sudo ufw disable + - name: Disable containerd image store + # Workaround for https://github.com/kubernetes-sigs/kind/issues/3795 + run: | + sudo mkdir -p /etc/docker + docker --version || true + containerd --version || true + [ -s "/etc/docker/daemon.json" ] && { + cat "/etc/docker/daemon.json" | jq '. + {"features":{"containerd-snapshotter": false}}' | sudo tee /etc/docker/daemon.$$ + } || { + echo '{"features":{"containerd-snapshotter": false}}' | sudo tee /etc/docker/daemon.$$ + } + sudo mv -f /etc/docker/daemon.$$ /etc/docker/daemon.json + sudo systemctl restart docker + - name: Download test-image-pr uses: actions/download-artifact@v4 with: diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 4955bfffc2..e70c2d9f5d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -355,9 +355,24 @@ jobs: run: | sudo ufw disable + - name: Disable containerd image store + # Workaround for https://github.com/kubernetes-sigs/kind/issues/3795 + run: | + sudo mkdir -p /etc/docker + docker --version || true + containerd --version || true + [ -s "/etc/docker/daemon.json" ] && { + cat "/etc/docker/daemon.json" | jq '. + {"features":{"containerd-snapshotter": false}}' | sudo tee /etc/docker/daemon.$$ + } || { + echo '{"features":{"containerd-snapshotter": false}}' | sudo tee /etc/docker/daemon.$$ + } + sudo mv -f /etc/docker/daemon.$$ /etc/docker/daemon.json + sudo systemctl restart docker + - name: Load docker image run: | docker load --input ${CI_IMAGE_BASE_TAR} && rm -rf ${CI_IMAGE_BASE_TAR} + docker images || true - name: kind setup run: | @@ -634,9 +649,24 @@ jobs: with: name: test-image-pr + - name: Disable containerd image store + # Workaround for https://github.com/kubernetes-sigs/kind/issues/3795 + run: | + sudo mkdir -p /etc/docker + docker --version || true + containerd --version || true + [ -s "/etc/docker/daemon.json" ] && { + cat "/etc/docker/daemon.json" | jq '. + {"features":{"containerd-snapshotter": false}}' | sudo tee /etc/docker/daemon.$$ + } || { + echo '{"features":{"containerd-snapshotter": false}}' | sudo tee /etc/docker/daemon.$$ + } + sudo mv -f /etc/docker/daemon.$$ /etc/docker/daemon.json + sudo systemctl restart docker + - name: Load docker image run: | docker load --input ${CI_IMAGE_PR_TAR} && rm -rf ${CI_IMAGE_PR_TAR} + docker images || true - name: kind setup timeout-minutes: 30 @@ -791,9 +821,25 @@ jobs: with: name: test-image-pr + - name: Disable containerd image store + # Workaround for https://github.com/kubernetes-sigs/kind/issues/3795 + run: | + sudo mkdir -p /etc/docker + docker --version || true + containerd --version || true + [ -s "/etc/docker/daemon.json" ] && { + cat "/etc/docker/daemon.json" | jq '. + {"features":{"containerd-snapshotter": false}}' | sudo tee /etc/docker/daemon.$$ + } || { + echo '{"features":{"containerd-snapshotter": false}}' | sudo tee /etc/docker/daemon.$$ + } + sudo mv -f /etc/docker/daemon.$$ /etc/docker/daemon.json + sudo systemctl restart docker + - name: Load docker image run: | docker load --input ${CI_IMAGE_PR_TAR} && rm -rf ${CI_IMAGE_PR_TAR} + docker images || true + - name: kind IPv4 setup run: | diff --git a/ADOPTERS.md b/ADOPTERS.md index 0254f998c3..784dde2030 100644 --- a/ADOPTERS.md +++ b/ADOPTERS.md @@ -5,6 +5,8 @@ 1. Red Hat, Inc. (Uses OVN-Kubernetes as their default CNI in OpenShift product) 2. NVIDIA (Uses OVN-Kubernetes in their production environments) 3. Internet Initiative Japan Inc. (Uses OVN-Kubernetes in their on-premise Kubernetes platform) +4. SAIC Motor Corp. Ltd (Uses OVN-Kubernetes as a networking solution to build a multi-tenant private cloud) +5. Nutanix (Builds Flow CNI on OVN-Kubernetes, integrated with Nutanix Flow and VPC networking) ## Projects diff --git a/contrib/kind-common.sh b/contrib/kind-common.sh index 12ed53d7c2..763c63e3f0 100644 --- a/contrib/kind-common.sh +++ b/contrib/kind-common.sh @@ -103,6 +103,7 @@ set_common_default_params() { OVN_ENABLE_DNSNAMERESOLVER=${OVN_ENABLE_DNSNAMERESOLVER:-false} ENABLE_COREDUMPS=${ENABLE_COREDUMPS:-false} METRICS_IP=${METRICS_IP:-""} + OVN_ALLOW_ICMP_NETPOL=${OVN_ALLOW_ICMP_NETPOL:-false} OVN_COMPACT_MODE=${OVN_COMPACT_MODE:-false} if [ "$OVN_COMPACT_MODE" == true ]; then KIND_NUM_WORKER=0 diff --git a/contrib/kind-helm.sh b/contrib/kind-helm.sh index 85764d6d91..285f8e6c0c 100755 --- a/contrib/kind-helm.sh +++ b/contrib/kind-helm.sh @@ -96,6 +96,7 @@ usage() { echo "-ce | --enable-central [DEPRECATED] Deploy with OVN Central (Legacy Architecture)" echo "-npz | --nodes-per-zone Specify number of nodes per zone (Default 0, which means global zone; >0 means interconnect zone, where 1 for single-node zone, >1 for multi-node zone). If this value > 1, then (total k8s nodes (workers + 1) / num of nodes per zone) should be zero." echo "-mps | --multi-pod-subnet Use multiple subnets for the default cluster network" + echo "--allow-icmp-netpol Allows ICMP and ICMPv6 traffic globally, regardless of network policy rules" echo "" } @@ -196,6 +197,8 @@ parse_args() { OVN_ENABLE_INTERCONNECT=false CENTRAL_ARG_PROVIDED=true ;; + --allow-icmp-netpol ) OVN_ALLOW_ICMP_NETPOL=true + ;; -ic | --enable-interconnect ) OVN_ENABLE_INTERCONNECT=true IC_ARG_PROVIDED=true ;; @@ -264,6 +267,7 @@ print_params() { echo "KIND_NUM_WORKER = $KIND_NUM_WORKER" echo "OVN_ENABLE_DNSNAMERESOLVER= $OVN_ENABLE_DNSNAMERESOLVER" echo "MULTI_POD_SUBNET= $MULTI_POD_SUBNET" + echo "OVN_ALLOW_ICMP_NETPOL= $OVN_ALLOW_ICMP_NETPOL" echo "OVN_ENABLE_INTERCONNECT = $OVN_ENABLE_INTERCONNECT" echo "DYNAMIC_UDN_ALLOCATION = $DYNAMIC_UDN_ALLOCATION" echo "DYNAMIC_UDN_GRACE_PERIOD = $DYNAMIC_UDN_GRACE_PERIOD" @@ -371,6 +375,7 @@ helm install ovn-kubernetes . -f "${value_file}" \ --set global.enableNetworkQos=$(if [ "${OVN_NETWORK_QOS_ENABLE}" == "true" ]; then echo "true"; else echo "false"; fi) \ --set global.enableNoOverlay=$(if [ "${ENABLE_NO_OVERLAY}" == "true" ]; then echo "true"; else echo "false"; fi) \ --set global.enableCoredumps=$(if [ "${ENABLE_COREDUMPS}" == "true" ]; then echo "true"; else echo "false"; fi) \ + --set global.allowICMPNetworkPolicy=$(if [ "${OVN_ALLOW_ICMP_NETPOL}" == "true" ]; then echo "true"; else echo "false"; fi) \ ${ovnkube_db_options} EOF ) diff --git a/contrib/kind.sh b/contrib/kind.sh index ea8ce6fc7c..a68b922d95 100755 --- a/contrib/kind.sh +++ b/contrib/kind.sh @@ -126,6 +126,7 @@ echo "-adv | --advertise-default-network Applies a RouteAdvertisement echo "-rud | --routed-udn-isolation-disable Disable isolation across BGP-advertised UDNs (sets advertised-udn-isolation-mode=loose). DEFAULT: strict." echo "-mps | --multi-pod-subnet Use multiple subnets for the default cluster network" echo "-noe | --no-overlay-enable Enable no overlay" +echo "--allow-icmp-netpol Allows ICMP and ICMPv6 traffic globally, regardless of network policy rules" echo "" } @@ -377,6 +378,8 @@ parse_args() { ;; -mps| --multi-pod-subnet ) MULTI_POD_SUBNET=true ;; + --allow-icmp-netpol ) OVN_ALLOW_ICMP_NETPOL=true + ;; -h | --help ) usage exit ;; @@ -481,6 +484,7 @@ print_params() { echo "OVN_MTU= $OVN_MTU" echo "OVN_ENABLE_DNSNAMERESOLVER= $OVN_ENABLE_DNSNAMERESOLVER" echo "MULTI_POD_SUBNET= $MULTI_POD_SUBNET" + echo "OVN_ALLOW_ICMP_NETPOL= $OVN_ALLOW_ICMP_NETPOL" echo "" } @@ -761,7 +765,8 @@ create_ovn_kube_manifests() { --network-qos-enable="${OVN_NETWORK_QOS_ENABLE}" \ --mtu="${OVN_MTU}" \ --enable-dnsnameresolver="${OVN_ENABLE_DNSNAMERESOLVER}" \ - --enable-observ="${OVN_OBSERV_ENABLE}" + --enable-observ="${OVN_OBSERV_ENABLE}" \ + --allow-icmp-netpol="${OVN_ALLOW_ICMP_NETPOL}" popd } diff --git a/dist/images/daemonset.sh b/dist/images/daemonset.sh index 4430d29143..15ad3b1e25 100755 --- a/dist/images/daemonset.sh +++ b/dist/images/daemonset.sh @@ -106,6 +106,8 @@ OVN_NETWORK_QOS_ENABLE= OVN_ENABLE_DNSNAMERESOLVER="false" OVN_NOHOSTSUBNET_LABEL="" OVN_DISABLE_REQUESTEDCHASSIS="false" +OVN_ALLOW_ICMP_NETPOL="false" + # IN_UPGRADE is true only if called by upgrade-ovn.sh during the upgrade test, # it will render only the parts in ovn-setup.yaml related to RBAC permissions. IN_UPGRADE= @@ -402,6 +404,9 @@ while [ "$1" != "" ]; do --enable-dnsnameresolver) OVN_ENABLE_DNSNAMERESOLVER=$VALUE ;; + --allow-icmp-netpol) + OVN_ALLOW_ICMP_NETPOL=$VALUE + ;; --enable-observ) OVN_OBSERV_ENABLE=$VALUE ;; @@ -653,6 +658,9 @@ echo "ovn_network_qos_enable: ${ovn_network_qos_enable}" ovn_enable_dnsnameresolver=${OVN_ENABLE_DNSNAMERESOLVER} echo "ovn_enable_dnsnameresolver: ${ovn_enable_dnsnameresolver}" +ovn_allow_icmp_netpol=${OVN_ALLOW_ICMP_NETPOL} +echo "ovn_allow_icmp_netpol: ${ovn_allow_icmp_netpol}" + ovn_observ_enable=${OVN_OBSERV_ENABLE} echo "ovn_observ_enable: ${ovn_observ_enable}" @@ -892,6 +900,7 @@ ovn_image=${ovnkube_image} \ ovn_enable_persistent_ips=${ovn_enable_persistent_ips} \ ovn_enable_svc_template_support=${ovn_enable_svc_template_support} \ ovn_enable_dnsnameresolver=${ovn_enable_dnsnameresolver} \ + ovn_allow_icmp_netpol=${ovn_allow_icmp_netpol} \ ovn_observ_enable=${ovn_observ_enable} \ ovn_nohostsubnet_label=${ovn_nohostsubnet_label} \ ovn_disable_requestedchassis=${ovn_disable_requestedchassis} \ @@ -948,6 +957,7 @@ ovn_image=${ovnkube_image} \ ovn_v6_transit_subnet=${ovn_v6_transit_subnet} \ ovn_enable_persistent_ips=${ovn_enable_persistent_ips} \ ovn_enable_dnsnameresolver=${ovn_enable_dnsnameresolver} \ + ovn_allow_icmp_netpol=${ovn_allow_icmp_netpol} \ ovn_observ_enable=${ovn_observ_enable} \ enable_coredumps=${enable_coredumps} \ metrics_ip=${metrics_ip} \ @@ -1056,6 +1066,7 @@ ovn_image=${ovnkube_image} \ ovn_enable_persistent_ips=${ovn_enable_persistent_ips} \ ovn_enable_svc_template_support=${ovn_enable_svc_template_support} \ ovn_enable_dnsnameresolver=${ovn_enable_dnsnameresolver} \ + ovn_allow_icmp_netpol=${ovn_allow_icmp_netpol} \ ovn_observ_enable=${ovn_observ_enable} \ enable_coredumps=${enable_coredumps} \ jinjanate ../templates/ovnkube-single-node-zone.yaml.j2 -o ${output_dir}/ovnkube-single-node-zone.yaml @@ -1226,6 +1237,7 @@ ovn_image=${ovnkube_image} \ ovn_enable_persistent_ips=${ovn_enable_persistent_ips} \ ovn_enable_svc_template_support=${ovn_enable_svc_template_support} \ ovn_enable_dnsnameresolver=${ovn_enable_dnsnameresolver} \ + ovn_allow_icmp_netpol=${ovn_allow_icmp_netpol} \ ovn_observ_enable=${ovn_observ_enable} \ enable_coredumps=${enable_coredumps} \ metrics_ip=${metrics_ip} \ @@ -1292,11 +1304,13 @@ net_cidr=${net_cidr} svc_cidr=${svc_cidr} \ ovn_enable_interconnect=${ovn_enable_interconnect} \ ovn_enable_ovnkube_identity=${ovn_enable_ovnkube_identity} \ ovn_enable_dnsnameresolver=${ovn_enable_dnsnameresolver} \ +ovn_allow_icmp_netpol=${ovn_allow_icmp_netpol} \ jinjanate ../templates/rbac-ovnkube-node.yaml.j2 -o ${output_dir}/rbac-ovnkube-node.yaml ovn_network_segmentation_enable=${ovn_network_segmentation_enable} \ ovn_pre_conf_udn_addr_enable=${ovn_pre_conf_udn_addr_enable} \ ovn_enable_dnsnameresolver=${ovn_enable_dnsnameresolver} \ +ovn_allow_icmp_netpol=${ovn_allow_icmp_netpol} \ ovn_route_advertisements_enable=${ovn_route_advertisements_enable} \ ovn_evpn_enable=${ovn_evpn_enable} \ ovn_advertised_udn_isolation_mode=${ovn_advertised_udn_isolation_mode} \ @@ -1304,6 +1318,7 @@ ovn_advertised_udn_isolation_mode=${ovn_advertised_udn_isolation_mode} \ ovn_network_segmentation_enable=${ovn_network_segmentation_enable} \ ovn_enable_dnsnameresolver=${ovn_enable_dnsnameresolver} \ +ovn_allow_icmp_netpol=${ovn_allow_icmp_netpol} \ ovn_route_advertisements_enable=${ovn_route_advertisements_enable} \ ovn_pre_conf_udn_addr_enable=${ovn_pre_conf_udn_addr_enable} \ ovn_advertised_udn_isolation_mode=${ovn_advertised_udn_isolation_mode} \ diff --git a/dist/images/ovnkube.sh b/dist/images/ovnkube.sh index 8f01b2f458..50751aa3da 100755 --- a/dist/images/ovnkube.sh +++ b/dist/images/ovnkube.sh @@ -98,6 +98,7 @@ fi # OVN_NORTHD_BACKOFF_INTERVAL - ovn northd backoff interval in ms (default 300) # OVN_ENABLE_SVC_TEMPLATE_SUPPORT - enable svc template support # OVN_ENABLE_DNSNAMERESOLVER - enable dns name resolver support +# OVN_ALLOW_ICMP_NETPOL - allow ICMP and ICMPv6 regardless of network policy # OVN_OBSERV_ENABLE - enable observability for ovnkube # The argument to the command is the operation to be performed @@ -328,6 +329,8 @@ ovn_enable_svc_template_support=${OVN_ENABLE_SVC_TEMPLATE_SUPPORT:-true} ovn_network_qos_enable=${OVN_NETWORK_QOS_ENABLE:-false} # OVN_ENABLE_DNSNAMERESOLVER - enable dns name resolver support ovn_enable_dnsnameresolver=${OVN_ENABLE_DNSNAMERESOLVER:-false} +# OVN_ALLOW_ICMP_NETPOL - allow ICMP/ICMPv6 with network policy +ovn_allow_icmp_netpol=${OVN_ALLOW_ICMP_NETPOL:-false} # OVN_OBSERV_ENABLE - enable observability for ovnkube ovn_observ_enable=${OVN_OBSERV_ENABLE:-false} # OVN_NOHOSTSUBNET_LABEL - node label indicating nodes managing their own network @@ -1501,6 +1504,12 @@ ovn-master() { fi echo "ovn_enable_dnsnameresolver_flag=${ovn_enable_dnsnameresolver_flag}" + ovn_allow_icmp_netpol_flag= + if [[ ${ovn_allow_icmp_netpol} == "true" ]]; then + ovn_allow_icmp_netpol_flag="--allow-icmp-network-policy" + fi + echo "ovn_allow_icmp_netpol_flag=${ovn_allow_icmp_netpol_flag}" + /usr/bin/ovnkube --init-master ${K8S_NODE} \ ${anp_enabled_flag} \ ${disable_forwarding_flag} \ @@ -1537,6 +1546,7 @@ ovn-master() { ${persistent_ips_enabled_flag} \ ${network_qos_enabled_flag} \ ${ovn_enable_dnsnameresolver_flag} \ + ${ovn_allow_icmp_netpol_flag} \ ${nohostsubnet_label_option} \ ${ovn_stateless_netpol_enable_flag} \ ${ovn_disable_requestedchassis_flag} \ @@ -1844,6 +1854,12 @@ ovnkube-controller() { fi echo "ovn_enable_dnsnameresolver_flag=${ovn_enable_dnsnameresolver_flag}" + ovn_allow_icmp_netpol_flag= + if [[ ${ovn_allow_icmp_netpol} == "true" ]]; then + ovn_allow_icmp_netpol_flag="--allow-icmp-network-policy" + fi + echo "ovn_allow_icmp_netpol_flag=${ovn_allow_icmp_netpol_flag}" + ovn_observ_enable_flag= if [[ ${ovn_observ_enable} == "true" ]]; then ovn_observ_enable_flag="--enable-observability" @@ -1898,6 +1914,7 @@ ovnkube-controller() { ${ovn_enable_dnsnameresolver_flag} \ ${dynamic_udn_allocation_flag} \ ${dynamic_udn_grace_period} \ + ${ovn_allow_icmp_netpol_flag} \ --cluster-subnets ${net_cidr} --k8s-service-cidr=${svc_cidr} \ --gateway-mode=${ovn_gateway_mode} \ --host-network-namespace ${ovn_host_network_namespace} \ @@ -2334,6 +2351,12 @@ ovnkube-controller-with-node() { fi echo "ovn_enable_dnsnameresolver_flag=${ovn_enable_dnsnameresolver_flag}" + ovn_allow_icmp_netpol_flag= + if [[ ${ovn_allow_icmp_netpol} == "true" ]]; then + ovn_allow_icmp_netpol_flag="--allow-icmp-network-policy" + fi + echo "ovn_allow_icmp_netpol_flag=${ovn_allow_icmp_netpol_flag}" + ovn_observ_enable_flag= if [[ ${ovn_observ_enable} == "true" ]]; then ovn_observ_enable_flag="--enable-observability" @@ -2433,6 +2456,7 @@ ovnkube-controller-with-node() { ${ovn_enable_dnsnameresolver_flag} \ ${ovn_disable_requestedchassis_flag} \ ${cluster_access_opts} \ + ${ovn_allow_icmp_netpol_flag} \ --cluster-subnets ${net_cidr} --k8s-service-cidr=${svc_cidr} \ --export-ovs-metrics \ --gateway-mode=${ovn_gateway_mode} ${ovn_gateway_opts} \ @@ -2664,6 +2688,12 @@ ovn-cluster-manager() { fi echo "dynamic_udn_grace_period=${dynamic_udn_grace_period}" + ovn_allow_icmp_netpol_flag= + if [[ ${ovn_allow_icmp_netpol} == "true" ]]; then + ovn_allow_icmp_netpol_flag="--allow-icmp-network-policy" + fi + echo "ovn_allow_icmp_netpol_flag=${ovn_allow_icmp_netpol_flag}" + echo "=============== ovn-cluster-manager ========== MASTER ONLY" /usr/bin/ovnkube --init-cluster-manager ${K8S_NODE} \ ${anp_enabled_flag} \ @@ -2698,6 +2728,7 @@ ovn-cluster-manager() { ${dynamic_udn_allocation_flag} \ ${dynamic_udn_grace_period} \ ${ovn_enable_dnsnameresolver_flag} \ + ${ovn_allow_icmp_netpol_flag} \ --gateway-mode=${ovn_gateway_mode} \ --cluster-subnets ${net_cidr} --k8s-service-cidr=${svc_cidr} \ --host-network-namespace ${ovn_host_network_namespace} \ diff --git a/dist/templates/ovnkube-control-plane.yaml.j2 b/dist/templates/ovnkube-control-plane.yaml.j2 index c74c6f400a..4faa67748a 100644 --- a/dist/templates/ovnkube-control-plane.yaml.j2 +++ b/dist/templates/ovnkube-control-plane.yaml.j2 @@ -208,6 +208,8 @@ spec: value: "{{ ovn_network_qos_enable }}" - name: OVN_ENABLE_DNSNAMERESOLVER value: "{{ ovn_enable_dnsnameresolver }}" + - name: OVN_ALLOW_ICMP_NETPOL + value: "{{ ovn_allow_icmp_netpol }}" # end of container volumes: diff --git a/dist/templates/ovnkube-master.yaml.j2 b/dist/templates/ovnkube-master.yaml.j2 index b4fee83afa..84cd608239 100644 --- a/dist/templates/ovnkube-master.yaml.j2 +++ b/dist/templates/ovnkube-master.yaml.j2 @@ -331,6 +331,8 @@ spec: value: "{{ ovn_network_qos_enable }}" - name: OVN_ENABLE_DNSNAMERESOLVER value: "{{ ovn_enable_dnsnameresolver }}" + - name: OVN_ALLOW_ICMP_NETPOL + value: "{{ ovn_allow_icmp_netpol }}" # end of container volumes: diff --git a/dist/templates/ovnkube-single-node-zone.yaml.j2 b/dist/templates/ovnkube-single-node-zone.yaml.j2 index 258b448e16..3936d90e3a 100644 --- a/dist/templates/ovnkube-single-node-zone.yaml.j2 +++ b/dist/templates/ovnkube-single-node-zone.yaml.j2 @@ -498,6 +498,8 @@ spec: value: "{{ ovn_network_qos_enable }}" - name: OVN_ENABLE_DNSNAMERESOLVER value: "{{ ovn_enable_dnsnameresolver }}" + - name: OVN_ALLOW_ICMP_NETPOL + value: "{{ ovn_allow_icmp_netpol }}" readinessProbe: exec: diff --git a/dist/templates/ovnkube-zone-controller.yaml.j2 b/dist/templates/ovnkube-zone-controller.yaml.j2 index c984eab781..2ab0293729 100644 --- a/dist/templates/ovnkube-zone-controller.yaml.j2 +++ b/dist/templates/ovnkube-zone-controller.yaml.j2 @@ -419,6 +419,8 @@ spec: value: "local" - name: OVN_ENABLE_DNSNAMERESOLVER value: "{{ ovn_enable_dnsnameresolver }}" + - name: OVN_ALLOW_ICMP_NETPOL + value: "{{ ovn_allow_icmp_netpol }}" - name: OVN_OBSERV_ENABLE value: "{{ ovn_observ_enable }}" # end of container diff --git a/dist/templates/rbac-ovnkube-node.yaml.j2 b/dist/templates/rbac-ovnkube-node.yaml.j2 index 40cebd2294..e74c99ce29 100644 --- a/dist/templates/rbac-ovnkube-node.yaml.j2 +++ b/dist/templates/rbac-ovnkube-node.yaml.j2 @@ -235,3 +235,36 @@ rules: {% if ovn_enable_interconnect == "true" -%} - create {%- endif %} + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: ovnkube-node-dpu-leases + namespace: ovn-kubernetes +roleRef: + name: ovnkube-node-dpu-leases + kind: Role + apiGroup: rbac.authorization.k8s.io +subjects: + {% if ovn_enable_ovnkube_identity == "true" -%} + - kind: Group + name: system:ovn-nodes + apiGroup: rbac.authorization.k8s.io + {% else %} + - kind: ServiceAccount + name: ovnkube-node + namespace: ovn-kubernetes + {%- endif %} + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: ovnkube-node-dpu-leases + namespace: ovn-kubernetes +rules: + - apiGroups: ["coordination.k8s.io"] + resources: + - leases + verbs: [ "get", "create", "update" ] diff --git a/docs/blog/dpu-acceleration.md b/docs/blog/dpu-acceleration.md new file mode 100644 index 0000000000..9de7f0daac --- /dev/null +++ b/docs/blog/dpu-acceleration.md @@ -0,0 +1,396 @@ +# Accelerating and Offloading Kubernetes Networking: Leveraging DPUs with OVN-Kubernetes + +## Introduction + +This blog post provides a comprehensive guide on deploying OVN-Kubernetes in an environment leveraging Data Processing Units (DPUs). +This setup is crucial for high-performance networking in cloud-native applications, offloading network processing from the host CPU to the +DPU, and providing better security for the networking control-plane. Additionally, OVN-Kubernetes brings in robust features like User Defined +Networks (UDNs) that enable per tenant network isolation into the Kubernetes environment and integrates with the DPU solution. + +In this guide, **offloading** means moving OVN-Kubernetes SDN control and data plane work from the host into the DPU to free host CPU and memory +resources. Note, this is different from **OVS offloading**, where datapath processing is offloaded from kernel to hardware +ASICs inside a DPU or SmartNIC. +**Acceleration** means using hardware acceleration capabilities inside the DPU (SmartNIC/OVS offload paths) to deliver higher network +throughput and lower latency. + +## Architecture Overview + +Within a typical Kubernetes worker node, the CNI typically runs as a host networked pod alongside other processes in the host. +In an unaccelerated+non-offloaded environment, OVN-Kubernetes behaves the same way, with its stack composed of OVN-Kubernetes, +Open Virtual Network (OVN), and Open vSwitch (OVS). OVN-Kubernetes listens for KAPI events, configures a logical topology in OVN, +and then OVN translates that into OpenFlow which is programmed into the OVS datapath. Here is an overview of a typical setup: + +![Regular OVN-Kubernetes Worker Node](../images/ovnk-unaccelerated.svg) + +--- + +In this environment there are several potential issues to highlight: + +* The SDN control and data planes are consuming CPU/memory resources on the host itself. +* The SDN control and data planes are able to be compromised if an attacker breaks out of a pod into the host namespace. +* Pods are attached with veth interfaces. Although fast path traffic is handled within the kernel using the OVS kernel module, + there are realistic throughput and latency limitations. + +--- + +A DPU is a system on a chip that is pluggable into the server via a PCIe slot. It provides a specialized environment with +its own CPU, memory, storage and OS. It also includes a SmartNIC architecture that provides packet processing acceleration and offload capabilities. +Integrating the DPU into the OVN-Kubernetes CNI provides the ability to move the SDN control and data plane down into the DPU. +This solves the aforementioned issues by: + +* Relieving the Host of resource consumption. +* Adding security so that if a pod breaks out on the host, it cannot access/compromise the SDN control and data plane. +* Pods are accelerated with Virtual Function (VF) interfaces which combined with specialized smart NIC hardware to offload + the OVS datapath results in best in class throughput and latency. + +--- + +Here is a diagram of a DPU accelerated worker node with OVN-Kubernetes: + +![Accelerated OVN-Kubernetes Worker Node](../images/ovnk-accelerated.svg) + +--- + +In this kind of deployment the DPU is not part of the *Host Kubernetes Cluster*. This is typically referred to as an “*off-host-cluster*” +type of deployment. OVN-Kubernetes running in the DPU is typically managed as a secondary Kubernetes cluster, but it does not have to be. +The kubeconfig of the host cluster is provided to OVN-Kube running in the DPU in order to configure the SDN. For the purpose of this guide, +we will treat the DPU as if it is in its own secondary Kubernetes cluster, referred to as the *DPU Kubernetes Cluster*. +OVN-Kube on the DPU is responsible for configuring OVN and wiring up networking within the DPU, while OVN-Kube on the Host is a lightweight +container used to plug in the network interfaces for pods in the Host. + +## Getting Started + +Before starting the deployment, ensure the following prerequisites are met: + +* A Host Kubernetes cluster with nodes equipped with compatible DPUs. +* A secondary DPU Kubernetes cluster that contains the DPUs. +* Kube-proxy should be disabled in both clusters. +* Familiarity with OVN-Kubernetes and general Kubernetes networking concepts. +* Ensure the versions of Kubernetes are compatible with the version of OVN-Kubernetes to be used in the cluster. An OVN-Kubernetes version of at least 1.3 Alpha (latest master as of Feb 2026) is required, which maps to Kubernetes 1.34 as of this writing. + +Note, for setting up Kubernetes clusters quickly for a test environment, see [kubernetes.io/docs/setup/production-environment/tools/kubeadm/create-cluster-kubeadm/](https://kubernetes.io/docs/setup/production-environment/tools/kubeadm/create-cluster-kubeadm/). + +For this guide, we will focus on the setup using an NVIDIA BlueField-3 DPU, which has been tested to be +compatible with OVN-Kubernetes. +The OVN-Kubernetes DPU architecture is not specific to NVIDIA DPUs, and should be compatible with any DPU vendor. +Currently only NVIDIA BlueField DPUs have been tested and considered supported by upstream. +For the BlueField-3 Linux OS, ensure that it matches the version supported by the DOCA drivers. At the time of this writing, +the current DOCA drivers for BlueField-3 are version 3.2.1, which are compatible with Ubuntu 24.04 on the DPU. +Note, the Host Linux OS also needs DOCA software installed, but there are many different flavors and versions of Linux +supported for 3.2.1. +For the full list check out the [NVIDIA DOCA downloads page](https://developer.nvidia.com/doca-downloads?deployment_platform=Host-Server&deployment_package=DOCA-Host&target_os=Linux&Architecture=x86_64&Profile=doca-all). + +## Limitations + +A pod who's network interface is provided by the OVN-Kubernetes on the DPU is considered to be "DPU accelerated". +When using a DPU with OVN-Kubernetes, all OVN-Kubernetes networked pod interfaces on the Host must be DPU accelerated. +There is no support for a mix of unaccelerated and accelerated workloads within the same node. However, it is possible to +have a node with accelerated workloads with a DPU, and then another node with unaccelerated pods. +It is also possible to have a mix of some nodes with DPU and other nodes with other types of SmartNICs. + +Furthermore, it is also not currently possible in OVN-Kubernetes to use more than one SmartNIC or DPU within a node. +There are plans in future to lift these limitations. + +## OVS Offload Considerations + +OVS provides two methods of datapath hardware offload: + +* **Kernel Hardware Offload** - Using Linux Traffic Control to configure the DPU. +* **OVS DOCA** - A userspace OVS control plane that programs a fully hardware-resident datapath on the DPU using DOCA, with no kernel OVS or TC involvement. + +Either option is supported by OVN-Kubernetes, and either option may be used in this guide. + +## Setting up the DPU and Host + +The DPU must be configured to handle networking functions for the host. The following configuration for the DPU will be done on the Host itself. + +1. Install DOCA-Host drivers on Host if not already present. Note, these are required even if not using OVS DOCA in order to access the rshim interface to manage the DPU. Be sure to use at least version 3.2.1. + * Go to [developer.nvidia.com/doca-downloads](https://developer.nvidia.com/doca-downloads), in the “Select” form, choose **Host-Server** -> **DOCA-Host** -> **Linux** -> **x86_64** -> **doca-ofed** -> {*Your-OS-Distribution*} -> {*Your-OS-Version*} -> {*Preferred installation type*}, then follow the instructions displayed below the form to install the package. + + Note: Some extra packages may be required depending on your distribution. + +2. Make sure that **bfb-install** exists after the above step. +3. Start **rshim** by running either “systemctl enable -–now rshim” or simply “rshim”. +4. Make sure that `/dev/rshim*` device file shows up after the above step. + +5. Update BFB and firmware for Mellanox Bluefield DPUs + * Go to [developer.nvidia.com/doca-downloads](https://developer.nvidia.com/doca-downloads), in the “Select” form, choose **BlueField** -> **BF-Bundle** -> **Ubuntu** -> {*Version*} -> **BFB**, then click “Download” to start downloading. + * Upload the BFB package to the Host. + * On the Host, follow the instructions to install the BFB package. You may provide a config file to set up a password for the **ubuntu** account, in the following format: “**ubuntu_PASSWORD='$1……'**” + * An encrypted password can be generated by command “**openssl passwd -1**”. + * Please power cycle (off and then on) the Host to reboot and run the newly installed software and firmware. + +6. The DPU has two modes, DPU and NIC, modes. DPU mode is required for this solution. + * Run following commands on the Host to identify the current mode or update it to enable DPU mode: + + ```bash + user@fedora: mlxconfig -d /dev/mst/ q INTERNAL_CPU_OFFLOAD_ENGINE + ENABLED(0) + + # to configure BlueField 3: + user@fedora: mlxconfig -d /dev/mst/ s INTERNAL_CPU_OFFLOAD_ENGINE=0 + + # to configure BlueField 2: + user@fedora: mlxconfig -d /dev/mst/ s INTERNAL_CPU_PAGE_SUPPLIER=0 INTERNAL_CPU_ESWITCH_MANAGER=0 INTERNAL_CPU_IB_VPORT0=0 INTERNAL_CPU_OFFLOAD_ENGINE=0 + ``` + +7. Reboot. + +8. Optional: At this point you may follow this guide in order to enable OVS DOCA offload support: [docs.nvidia.com/doca/sdk/ovs-doca-hardware-acceleration/index.html](https://docs.nvidia.com/doca/sdk/ovs-doca-hardware-acceleration/index.html) + +9. On the Host, configure the desired number of VFs, then rename the first VF device so that it can be dedicated to and referenced by OVN-Kubernetes as the OVN-Kubernetes management port: + + ```bash + user@fedora: echo ${num_of_desired_vfs} > /sys/class/net/${interface}/devices/sriov_numvfs + user@fedora: ip link set ens1f0v0 down + user@fedora: ip link set ens1f0v0 name forOVN0 + user@fedora: ip link set forOVN0 up + ``` + +10. The BFB package installed earlier includes Open vSwitch (OVS). OVS will be installed as a systemd service, and the service is enabled by default. By default DPU will come up with 2 bridges, ovsbr1 and ovsbr2, regardless if the port is cabled or not. You may delete them and create your own bridges, just remember to add uplink **p0/p1** and x86 representor **pf0hpf/pf1hpf** to the new bridge. We will use a tool called minicom to get into the DPU from the host and configure OVS. + + ```bash + #### run minicom on x86 host to login to the DPU via rshim interface + # minicom -D /dev/rshim0/console + #### login to DPU + user@ubuntu: ovs-vsctl show + c41c98ac-0159-4874-97d5-17a4d2647d70 + Bridge ovsbr2 + Port en3f1pf1sf0 + Interface en3f1pf1sf0 + Port p1 + Interface p1 + Port pf1hpf + Interface pf1hpf + Port ovsbr2 + Interface ovsbr2 + type: internal + Bridge ovsbr1 + Port p0 + Interface p0 + Port ovsbr1 + Interface ovsbr1 + type: internal + Port pf0hpf + Interface pf0hpf + Port en3f0pf0sf0 + Interface en3f0pf0sf0 + ovs_version: "3.2.1005" + user@ubuntu: ovs-vsctl del-br ovsbr1 + user@ubuntu: ovs-vsctl del-br ovsbr2 + user@ubuntu: ovs-vsctl add-br brp0 + user@ubuntu: ovs-vsctl add-port brp0 p0 + user@ubuntu: ovs-vsctl add-port brp0 pf0hpf + ``` + +11. Now that the OVS bridge is created with the proper port configuration, we need to configure the IP address of the bridge. Typically this involves moving the IP address that was already configured on the **en3f0pf0sf0** interface to the **brp0** bridge. This IP address will be used for Geneve encapsulation (ovn-encap-ip), and therefore we must configure the OVS bridge so that OVN is aware of it. Additionally, take note of the default gateway route on the Host (10.1.65.1 in this example). We will need to configure this as well in the OVS bridge so OVN will use it as its default gateway. + + ```bash + #### run minicom on x86 host to login to the DPU via rshim interface + # minicom -D /dev/rshim0/console + #### login to DPU + user@ubuntu: ip addr del 10.1.65.155/24 dev en3f0pf0sf0 + user@ubuntu: ip addr add 10.1.65.155/24 dev brp0 + #### make brp0 as the default route interface + user@ubuntu: ip r add default via 10.1.65.1 dev brp0 + #### configure OVS + user@ubuntu: ovs-vsctl set Open_vSwitch . other_config:hw-offload=true + user@ubuntu: ovs-vsctl set Open_vSwitch . external_ids:ovn-encap-ip="10.1.65.155" + user@ubuntu: ovs-vsctl set Open_vSwitch . external_ids:ovn-gw-interface="brp0" + user@ubuntu: ovs-vsctl set Open_vSwitch . external_ids:ovn-gw-nexthop="10.1.65.1" + #### configure the hostname of the Host as it will appear in the Host Kubernetes Cluster + user@ubuntu: ovs-vsctl set Open_vSwitch . external_ids:host-k8s-nodename="host-worker-1" + ``` + +## Deploying OVN-Kubernetes + +A version of OVN-Kubernetes at least with 1.3 is required for DPUs. At the time of this writing, 1.3 is in Alpha state. The following steps should be done from a jumphost that has Kubeconfig access to both the Host and DPU cluster. + +1. Build or download the OVN-Kubernetes container images. Refer to this [image build guide](../developer-guide/image-build.md) on how to build/obtain the artifacts. +2. Upload the images to a container registry that is reachable by both clusters. +3. Label all Host nodes with DPU with **k8s.ovn.org/dpu-host=""** +4. Label all DPU nodes with **k8s.ovn.org/dpu=""** +5. `git clone https://github.com/ovn-kubernetes/ovn-kubernetes` to obtain the helm charts. +6. Follow the [upstream installation guide](../installation/launching-ovn-kubernetes-with-dpu.md) to configure the helm charts correctly and install OVN-Kubernetes to the Host and DPU. + +## Install SR-IOV Device Plugin + +OVN-Kubernetes relies on SR-IOV Plugin to provision VFs for the pods. Once allocated, OVN-Kubernetes will plug the VF for the pod on the Host into the pod network namespace. Then, on the DPU side, it will plug in the VF representor into OVS. From the jumphost follow these steps and use the kubeconfig of the Host Kubernetes cluster. + +1. `git clone https://github.com/k8snetworkplumbingwg/sriov-network-device-plugin`. Use at least tag v3.11.0. +2. Configure the SR-IOV resource that OVN-Kubernetes will use. Replace the content of `deployments/configMap.yaml` with: + + ```yaml + apiVersion: v1 + kind: ConfigMap + metadata: + name: sriovdp-config + namespace: kube-system + data: + config.json: | + { + "resourceList": [ + { + "resourceName": "asap2_vf", + "resourcePrefix": "nvidia.com", + "excludeTopology": true, + "selectors": { + "vendors": [ "15b3" ], + "devices": [ "101e" ], + "drivers": [ "mlx5_core" ], + "pfNames": [ "ens1f0np0#1-7" ] + } + } + ] + } + ``` + +3. `kubectl create -f deployments/configMap.yaml` +4. `kubectl create -f deployments/sriovdp-daemonset.yaml` + +## Install Multus + +Multus is needed in order to pass the VF allocated by SR-IOV Plugin to OVN-Kubernetes as the DeviceID. Furthermore, in addition to the primary network, OVN-Kubernetes supports Secondary Networks using Secondary Network Attachment Definitions (NADs) or Secondary User Defined Networks (UDNs). In simpler terms, a pod can have a VF for its default gateway interface, as well as one or more VFs for secondary networks. To leverage this capability, Multus needs to be installed. Follow these steps on the jumphost while using the Host kubeconfig. + +1. Download the deployment spec for Multus. Use at least tag v4.2.3: + + ```bash + user@jumphost: curl -LO https://raw.githubusercontent.com/k8snetworkplumbingwg/multus-cni/master/deployments/multus-daemonset.yml + ``` + +2. Create the Multus cni-conf file that will be used with OVN-Kubernetes: + + ```bash + user@jumphost: cat > cni-conf.json << 'EOF' + { + "name": "multus-cni-network", + "type": "multus", + "logLevel": "verbose", + "logFile": "/var/log/multus.log", + "namespaceIsolation": false, + "multusNamespace": "default", + "clusterNetwork": "ovn-primary", + "confDir": "/etc/cni/net.d", + "readinessindicatorfile": "/etc/cni/net.d/10-ovn-kubernetes.conf", + "kubeconfig": "/etc/cni/net.d/multus.d/multus.kubeconfig" + } + EOF + ``` + +3. Create the configMap using the cni-conf file: + + ```bash + user@jumphost: kubectl -n kube-system delete configmap multus-cni-config --ignore-not-found=true + user@jumphost: kubectl -n kube-system create configmap multus-cni-config --from-file=cni-conf.json + ``` + +4. Edit the `multus-daemonset.yml` previously downloaded. + + ```yaml + ... + spec: + ... + template: + ... + spec: + ... + containers: + - name: kube-multus + image: ghcr.io/k8snetworkplumbingwg/multus-cni:snapshot + command: ["/thin_entrypoint"] + args: + - "/tmp/multus-conf/00-multus.conf" # Modify multus-conf-file + ... + volumes: + ... + - name: multus-cfg + configMap: + name: multus-cni-config + items: + - key: cni-conf.json + path: 00-multus.conf # Modify to 00-multus.conf + ``` + +5. Create the Multus Daemonset. + + ```bash + kubectl apply -f multus-daemonset.yml + ``` + +## Validating the Setup + +Now that we have configured everything it is time to create a pod and verify that it is properly offloaded. The last step to do before we can start a pod is to create the Network Attachment Definition (NAD) so that OVN-Kubernetes will be invoked as the CNI and use VFs for the default network. Additionally, Primary or Secondary User Defined Networks (UDNs) could also be created, but for the purpose of this guide we will focus on the Cluster Default Network (CDN). Follow these steps from the jumphost with the Host kubeconfig to create the NAD and a pod to test with. + +1. Configure the primary default NAD. Notice the `resourceName` annotation is set to the SR-IOV device plugin resource we previously configured: + + ```bash + cat < # Print variable value ``` +### Debugging C Binaries with GDB (e.g. FRR) + +Some coredumps come from C binaries such as FRR's `bgpd` or `zebra`, not from Go +binaries. These require GDB instead of Delve. + +The key challenge is matching the exact container image that produced the coredump, +since GDB needs the same binary and shared libraries to resolve symbols. + +1. **Identify the image that produced the coredump.** Check the CI job logs for the + `docker run` command that started the crashed process. For example, the external + FRR container may use `quay.io/frrouting/frr:9.1.0` (deployed via + `contrib/kind-common.sh`). + +2. **Run the same image with the coredumps mounted:** + + ```bash + docker run --platform linux/amd64 -it \ + -v /path/to/coredumps:/coredumps \ + quay.io/frrouting/frr:9.1.0 sh + ``` + + Using `--platform linux/amd64` is important if the coredump was generated on + x86_64 and you are on a different architecture (e.g. Apple Silicon). + +3. **Install GDB and debug symbols inside the container:** + + ```bash + apk add gdb frr-dbg musl-dbg + ``` + + The exact package names depend on the base distro. Alpine uses `-dbg` suffix. + +4. **Run GDB:** + + ```bash + gdb /usr/lib/frr/bgpd /coredumps/core.38907.bgpd.ovn-control-plane.11 + ``` + +5. **Explore the crash:** + + ``` + (gdb) bt # Show backtrace + (gdb) thread apply all bt # Backtraces for all threads + (gdb) frame # Select stack frame + (gdb) info locals # Show local variables + (gdb) info args # Show function arguments + (gdb) print *some_ptr # Dereference and print a pointer + (gdb) info sharedlibrary # Check if all shared libraries are resolved + ``` + +6. **Troubleshooting missing symbols.** If the backtrace shows `??` for most frames: + - Run `info sharedlibrary` in GDB. Lines marked `(*)` are missing debug info. + - Verify you are using the exact same image tag that produced the coredump. + Floating tags (like `latest` or even `9.1.0`) may have been rebuilt with updated + packages. If the shared library versions don't match (GDB will print warnings + about missing `.so` files), you need the exact image digest from CI. + - Install additional `-dbg` packages for libraries that appear in the backtrace. + ### Local Development To enable coredump collection in a local KIND cluster: diff --git a/docs/features/hardware-offload/dpu-support.md b/docs/features/hardware-offload/dpu-support.md index 2c5c23e028..2aac8e6965 100644 --- a/docs/features/hardware-offload/dpu-support.md +++ b/docs/features/hardware-offload/dpu-support.md @@ -55,3 +55,15 @@ For detailed configuration of gateway interfaces in DPU host mode, see [DPU Gate - ovnkube-controller-with-node - ovn-controller - ovs-metrics + +## DPU health monitoring + +OVN-Kubernetes uses a custom Kubernetes `Lease` in the `ovn-kubernetes` namespace to track the health of the DPU side of a trusted deployment. +The DPU host creates the lease and sets an owner reference to the Kubernetes `Node`, while ovnkube running on the DPU renews the lease on a regular interval. + +Two ovnkube-node options control this behavior: +- `--dpu-node-lease-renew-interval` (seconds, default 10). Set to `0` to disable the health check. +- `--dpu-node-lease-duration` (seconds, default 40). + +If the lease expires, the DPU host CNI server fails `ADD` requests immediately with `DPU Not Ready` and the `STATUS` command returns a CNI error with code `50` (The plugin is not available). +This causes the container runtime to report `NetworkReady=false`, preventing new workloads from landing on the affected host until the DPU becomes healthy again. diff --git a/docs/features/user-defined-networks/user-defined-networks.md b/docs/features/user-defined-networks/user-defined-networks.md index f2e7348eb7..ac94e519f3 100644 --- a/docs/features/user-defined-networks/user-defined-networks.md +++ b/docs/features/user-defined-networks/user-defined-networks.md @@ -124,6 +124,7 @@ of end users. Currently supported topology types for a given network include: `Layer3`: is a topology type wherein the pods or VMs are connected to their node’s local router and all these routers are then connected to the distributed switch across nodes. + * Each pod would hence get an IP from the node's subnet segment * When in doubt which topology to use go with layer3 which is the same topology as the cluster default network @@ -142,6 +143,7 @@ network (grey color) which is only used for kubelet healthchecks. `Layer2`: is a topology type wherein the pods or VMs are all connected to the same layer2 flat switch. + * Usually used when the applications deployed expect a layer2 type network connection (Perhaps applications want a single broadcast domain, latency sensitive, use proprietary L2 protocols) * Common in Virtualization world for seamless migration of the VM since @@ -149,7 +151,7 @@ same layer2 flat switch. during live migration * Can be of type `primary` or `secondary` -![l2-UDN](images/L2DeepDive-2segments.png) +![l2-UDN](images/L2DeepDive-2segments.jpg) Here we can see a blue and green P-UDN. On node1, pod1 is part of green UDN and pod2 is part of blue UDN. They each have a udn-0 interface that is attached to @@ -160,6 +162,7 @@ network (grey color) which is only used for kubelet healthchecks. `Localnet`: is a topology type wherein the pods or VMs attached to a localnet network on the overlay can egress to the provider’s physical network + * without SNATing to nodeIPs… preserves the podIPs * podIPs can be on the same subnet as the provider’s VLAN * VLAN IDs can be used to mark the traffic coming from the localnet for diff --git a/docs/images/ovnk-accelerated.excalidraw b/docs/images/ovnk-accelerated.excalidraw new file mode 100644 index 0000000000..044d3720cd --- /dev/null +++ b/docs/images/ovnk-accelerated.excalidraw @@ -0,0 +1,1149 @@ +{ + "type": "excalidraw", + "version": 2, + "source": "https://excalidraw.com", + "elements": [ + { + "id": "3PSV2IpmimdHV3TGHwCnL", + "type": "rectangle", + "x": 469.71484375, + "y": 230.94921875, + "width": 975.9257812499999, + "height": 370.6484375, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aD", + "roundness": { + "type": 3 + }, + "seed": 1323383732, + "version": 590, + "versionNonce": 1650696134, + "isDeleted": false, + "boundElements": [], + "updated": 1769213052357, + "link": null, + "locked": false + }, + { + "id": "Hhj9Yei2KIDBGtqDrnLuE", + "type": "text", + "x": 494.09765625, + "y": 233.42578125000003, + "width": 676.264217346056, + "height": 35, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aE", + "roundness": null, + "seed": 234782220, + "version": 297, + "versionNonce": 613304933, + "isDeleted": false, + "boundElements": [], + "updated": 1769807221667, + "link": null, + "locked": false, + "text": "Kubernetes Worker Node", + "fontSize": 28, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "Kubernetes Worker Node", + "autoResize": false, + "lineHeight": 1.25 + }, + { + "id": "AFBhMx_FMLkpvpoVM55si", + "type": "diamond", + "x": 647.9921875, + "y": 447.890625, + "width": 186.53515625000003, + "height": 124.37109375, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aF", + "roundness": { + "type": 2 + }, + "seed": 1109773108, + "version": 938, + "versionNonce": 195622373, + "isDeleted": true, + "boundElements": [], + "updated": 1769806380801, + "link": null, + "locked": false + }, + { + "id": "d21SDy11vytpGym8ZRv9Z", + "type": "text", + "x": 699.8398056030273, + "y": 492.4833984375, + "width": 82.57234191894531, + "height": 35, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aG", + "roundness": null, + "seed": 265828788, + "version": 857, + "versionNonce": 401966827, + "isDeleted": true, + "boundElements": [], + "updated": 1769806380801, + "link": null, + "locked": false, + "text": "Pod A", + "fontSize": 28, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "AFBhMx_FMLkpvpoVM55si", + "originalText": "Pod A", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "MMRCgxZ2SkPQRTbazh5Mt", + "type": "diamond", + "x": 826.091796875, + "y": 417.40234375, + "width": 186.71093749999997, + "height": 160.96484375000003, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aH", + "roundness": { + "type": 2 + }, + "seed": 1941094156, + "version": 1260, + "versionNonce": 249372837, + "isDeleted": true, + "boundElements": [], + "updated": 1769806362195, + "link": null, + "locked": false + }, + { + "id": "fBzHO81d67o5rHJ7nhi37", + "type": "text", + "x": 893.0474243164062, + "y": 462.6435546875, + "width": 52.4442138671875, + "height": 70, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aI", + "roundness": null, + "seed": 262077836, + "version": 1221, + "versionNonce": 830367787, + "isDeleted": true, + "boundElements": [], + "updated": 1769806362195, + "link": null, + "locked": false, + "text": "Pod\nB", + "fontSize": 28, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "MMRCgxZ2SkPQRTbazh5Mt", + "originalText": "Pod B", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "tYzSvRmwfQQNkfrVTe1P5", + "type": "rectangle", + "x": 710.88671875, + "y": 671.09375, + "width": 234.94921875, + "height": 144.21484375, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0C", + "roundness": { + "type": 3 + }, + "seed": 198185882, + "version": 708, + "versionNonce": 962154202, + "isDeleted": false, + "boundElements": [ + { + "id": "6eS7HxMZOU17RQmhuRTEc", + "type": "arrow" + } + ], + "updated": 1769213059064, + "link": null, + "locked": false + }, + { + "id": "3d5gOfYF1IUhTLFt9bgVo", + "type": "rectangle", + "x": 1124.23046875, + "y": 645.26171875, + "width": 263.3984375, + "height": 76.671875, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0G", + "roundness": { + "type": 3 + }, + "seed": 160618182, + "version": 241, + "versionNonce": 507884954, + "isDeleted": false, + "boundElements": [ + { + "id": "SO75S5gL5bCji88YbBsKf", + "type": "arrow" + } + ], + "updated": 1769212914995, + "link": null, + "locked": false + }, + { + "id": "otyemJxFJECPTvDVhd1hb", + "type": "text", + "x": 1153.78125, + "y": 666.8203125, + "width": 202.52484130859375, + "height": 35, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0H", + "roundness": null, + "seed": 400406918, + "version": 296, + "versionNonce": 1094596075, + "isDeleted": false, + "boundElements": [], + "updated": 1769806431368, + "link": null, + "locked": false, + "text": "OVN-Kube DPU", + "fontSize": 28, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "OVN-Kube DPU", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "1chdpPBLx0VEf6sBwnyrT", + "type": "rectangle", + "x": 542.08203125, + "y": 300.78515625, + "width": 179.29296875, + "height": 75.30859375, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0I", + "roundness": { + "type": 3 + }, + "seed": 1050229210, + "version": 33, + "versionNonce": 549854406, + "isDeleted": false, + "boundElements": [], + "updated": 1769211856008, + "link": null, + "locked": false + }, + { + "id": "E1qyG2mrQnzUtFZRnlzon", + "type": "text", + "x": 577.8984375, + "y": 320.80078125, + "width": 99.06443786621094, + "height": 35, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0J", + "roundness": null, + "seed": 1111918810, + "version": 12, + "versionNonce": 888734571, + "isDeleted": false, + "boundElements": [], + "updated": 1769806292888, + "link": null, + "locked": false, + "text": "Kubelet", + "fontSize": 28, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "Kubelet", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "YPI4K5hgJbgHj0hYUQy7U", + "type": "rectangle", + "x": 1124.60546875, + "y": 764.3125, + "width": 260.94921875, + "height": 99.75390625, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0K", + "roundness": { + "type": 3 + }, + "seed": 2135829062, + "version": 407, + "versionNonce": 874908314, + "isDeleted": false, + "boundElements": [ + { + "id": "SO75S5gL5bCji88YbBsKf", + "type": "arrow" + }, + { + "id": "6eS7HxMZOU17RQmhuRTEc", + "type": "arrow" + } + ], + "updated": 1769213056152, + "link": null, + "locked": false + }, + { + "id": "xdskfV2GHpQr09QrrldPB", + "type": "text", + "x": 1223.94921875, + "y": 795.8984375, + "width": 55.74822998046875, + "height": 35, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0L", + "roundness": null, + "seed": 1687121690, + "version": 302, + "versionNonce": 597504357, + "isDeleted": false, + "boundElements": [], + "updated": 1769806428054, + "link": null, + "locked": false, + "text": "OVN", + "fontSize": 28, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "OVN", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "SO75S5gL5bCji88YbBsKf", + "type": "arrow", + "x": 1251.1328125, + "y": 721.33984375, + "width": 0.85546875, + "height": 43.734375, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0M", + "roundness": { + "type": 2 + }, + "seed": 1411280134, + "version": 307, + "versionNonce": 1938263430, + "isDeleted": false, + "boundElements": [], + "updated": 1769212944324, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 0.85546875, + 43.734375 + ] + ], + "startBinding": { + "elementId": "3d5gOfYF1IUhTLFt9bgVo", + "mode": "inside", + "fixedPoint": [ + 0.48178852142963075, + 0.9922559608722233 + ] + }, + "endBinding": { + "elementId": "YPI4K5hgJbgHj0hYUQy7U", + "mode": "inside", + "fixedPoint": [ + 0.48815172971273746, + 0.007635979167482476 + ] + }, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": false, + "moveMidPointsWithElement": false + }, + { + "id": "nOHqcMRCCN5CvsIk9L7HK", + "type": "text", + "x": 798, + "y": 723.234375, + "width": 55.468231201171875, + "height": 35, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0O", + "roundness": null, + "seed": 260809434, + "version": 338, + "versionNonce": 995951973, + "isDeleted": false, + "boundElements": [], + "updated": 1769806415498, + "link": null, + "locked": false, + "text": "OVS", + "fontSize": 28, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "OVS", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "lv5dwAOUTpgj7UyAdI9C8", + "type": "rectangle", + "x": 469.86556773558385, + "y": 612.6299835602476, + "width": 983.9485520288324, + "height": 291.42362662950524, + "angle": 0.00044584313856965707, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0S", + "roundness": { + "type": 3 + }, + "seed": 995117318, + "version": 1088, + "versionNonce": 1696375878, + "isDeleted": false, + "boundElements": [], + "updated": 1769213048339, + "link": null, + "locked": false + }, + { + "id": "nXwCdHqSV7QcCaC0Erx7X", + "type": "text", + "x": 498.4374982030654, + "y": 623.1721233376917, + "width": 61.824249267578125, + "height": 35, + "angle": 0.00044584313856965707, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffffff", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0T", + "roundness": null, + "seed": 383758106, + "version": 10, + "versionNonce": 1631039019, + "isDeleted": false, + "boundElements": [], + "updated": 1769808323507, + "link": null, + "locked": false, + "text": "DPU", + "fontSize": 28, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "DPU", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "IfUjPJctzeOIeVT3m_NKp", + "type": "line", + "x": 745.578125, + "y": 567.96484375, + "width": 36.9140625, + "height": 104.56640625, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffffff", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0U", + "roundness": { + "type": 2 + }, + "seed": 1418297306, + "version": 72, + "versionNonce": 1158579290, + "isDeleted": false, + "boundElements": [], + "updated": 1769213000233, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 36.9140625, + 104.56640625 + ] + ], + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": null, + "polygon": false + }, + { + "id": "PrirCcAPzRM9oZsZiPQfu", + "type": "line", + "x": 923.26171875, + "y": 569.6640625, + "width": 61.421875, + "height": 101.24609375, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffffff", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0V", + "roundness": { + "type": 2 + }, + "seed": 424258758, + "version": 73, + "versionNonce": 882050117, + "isDeleted": false, + "boundElements": [], + "updated": 1769806405260, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + -61.421875, + 101.24609375 + ] + ], + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": null, + "polygon": false + }, + { + "id": "p81TteZyn3j3h51DDmQTH", + "type": "line", + "x": 825.7578125, + "y": 815.96484375, + "width": 1.26953125, + "height": 64.45703125, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffffff", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0W", + "roundness": { + "type": 2 + }, + "seed": 1162031450, + "version": 132, + "versionNonce": 949266138, + "isDeleted": false, + "boundElements": [], + "updated": 1769213029964, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 1.26953125, + 64.45703125 + ] + ], + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": null, + "polygon": false + }, + { + "id": "5tWnGTpPXbqm73Qbq0o0q", + "type": "rectangle", + "x": 781.54296875, + "y": 882.6015625, + "width": 81.39453125, + "height": 45, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffffff", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0X", + "roundness": { + "type": 3 + }, + "seed": 33982854, + "version": 71, + "versionNonce": 1656813829, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "sTQv7IwvRCIrTkGyDXdOe" + } + ], + "updated": 1769806419424, + "link": null, + "locked": false + }, + { + "id": "sTQv7IwvRCIrTkGyDXdOe", + "type": "text", + "x": 789.7461013793945, + "y": 887.6015625, + "width": 64.98826599121094, + "height": 35, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffffff", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0Y", + "roundness": null, + "seed": 515413210, + "version": 9, + "versionNonce": 1755680869, + "isDeleted": false, + "boundElements": [], + "updated": 1769806419425, + "link": null, + "locked": false, + "text": "eth0", + "fontSize": 28, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "5tWnGTpPXbqm73Qbq0o0q", + "originalText": "eth0", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "6eS7HxMZOU17RQmhuRTEc", + "type": "arrow", + "x": 1125.1015625, + "y": 813.7265625, + "width": 181.09375, + "height": 54.76953125, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffffff", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0Z", + "roundness": { + "type": 2 + }, + "seed": 991405146, + "version": 229, + "versionNonce": 1472329882, + "isDeleted": false, + "boundElements": [], + "updated": 1769213071066, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + -181.09375, + -54.76953125 + ] + ], + "startBinding": { + "elementId": "YPI4K5hgJbgHj0hYUQy7U", + "mode": "inside", + "fixedPoint": [ + 0.0019011122254988548, + 0.4953596741982222 + ] + }, + "endBinding": { + "elementId": "tYzSvRmwfQQNkfrVTe1P5", + "mode": "inside", + "fixedPoint": [ + 0.9922190632949274, + 0.6092526883176684 + ] + }, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": false + }, + { + "id": "MqPB_VDtU5rMycT-a935q", + "type": "rectangle", + "x": 1081.32421875, + "y": 447.765625, + "width": 315.91796875, + "height": 93.0859375, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0a", + "roundness": { + "type": 3 + }, + "seed": 1303795158, + "version": 444, + "versionNonce": 1847224491, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "7u9OSdgdx8ADuPRU4aG-I" + } + ], + "updated": 1769806402993, + "link": null, + "locked": false + }, + { + "id": "7u9OSdgdx8ADuPRU4aG-I", + "type": "text", + "x": 1100.5006408691406, + "y": 476.80859375, + "width": 277.56512451171875, + "height": 35, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffffff", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0b", + "roundness": null, + "seed": 1295666890, + "version": 92, + "versionNonce": 1404967755, + "isDeleted": false, + "boundElements": [], + "updated": 1769806402993, + "link": null, + "locked": false, + "text": "OVN-Kube DPU-Host", + "fontSize": 28, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "MqPB_VDtU5rMycT-a935q", + "originalText": "OVN-Kube DPU-Host", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "uA4KWyM_Ie7myxT2uvmCP", + "type": "diamond", + "x": 829.755859375, + "y": 412.166015625, + "width": 190.07812499999997, + "height": 161.12109375, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0c", + "roundness": { + "type": 2 + }, + "seed": 1507403301, + "version": 1119, + "versionNonce": 445224427, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "xVvf92bga4Yjg2V49SLlB" + } + ], + "updated": 1769806392104, + "link": null, + "locked": false + }, + { + "id": "xVvf92bga4Yjg2V49SLlB", + "type": "text", + "x": 882.2992172241211, + "y": 475.4462890625, + "width": 84.95234680175781, + "height": 35, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0d", + "roundness": null, + "seed": 1652234629, + "version": 1040, + "versionNonce": 475373707, + "isDeleted": false, + "boundElements": [], + "updated": 1769806392104, + "link": null, + "locked": false, + "text": "Pod B", + "fontSize": 28, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "uA4KWyM_Ie7myxT2uvmCP", + "originalText": "Pod B", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "onVWXBwWSHPutTw8Ddqy2", + "type": "diamond", + "x": 646, + "y": 413.212890625, + "width": 190.07812499999997, + "height": 161.12109375, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0e", + "roundness": { + "type": 2 + }, + "seed": 183245483, + "version": 1130, + "versionNonce": 1930976357, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "qg1fbVwEByLpag6WgrzZj" + } + ], + "updated": 1769806393479, + "link": null, + "locked": false + }, + { + "id": "qg1fbVwEByLpag6WgrzZj", + "type": "text", + "x": 699.7333602905273, + "y": 476.4931640625, + "width": 82.57234191894531, + "height": 35, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0f", + "roundness": null, + "seed": 260672843, + "version": 1052, + "versionNonce": 95689349, + "isDeleted": false, + "boundElements": [], + "updated": 1769806395323, + "link": null, + "locked": false, + "text": "Pod A", + "fontSize": 28, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "onVWXBwWSHPutTw8Ddqy2", + "originalText": "Pod A", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "19Uo1FixsTScuIkccHk0h", + "type": "text", + "x": 492.79887739537054, + "y": 549.5296776267693, + "width": 63.532257080078125, + "height": 35, + "angle": 0.00044584313856965707, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffffff", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0g", + "roundness": null, + "seed": 1559443979, + "version": 28, + "versionNonce": 492903403, + "isDeleted": false, + "boundElements": [], + "updated": 1769808327521, + "link": null, + "locked": false, + "text": "Host", + "fontSize": 28, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "Host", + "autoResize": true, + "lineHeight": 1.25 + } + ], + "appState": { + "gridSize": 20, + "gridStep": 5, + "gridModeEnabled": false, + "viewBackgroundColor": "#ffffff", + "lockedMultiSelections": {} + }, + "files": {} +} \ No newline at end of file diff --git a/docs/images/ovnk-accelerated.svg b/docs/images/ovnk-accelerated.svg new file mode 100644 index 0000000000..5d65dccc9d --- /dev/null +++ b/docs/images/ovnk-accelerated.svg @@ -0,0 +1,4 @@ + + +Kubernetes Worker NodeOVN-Kube DPUKubeletOVNOVSDPUeth0OVN-Kube DPU-HostPod BPod AHost \ No newline at end of file diff --git a/docs/images/ovnk-unaccelerated.excalidraw b/docs/images/ovnk-unaccelerated.excalidraw new file mode 100644 index 0000000000..d373705c25 --- /dev/null +++ b/docs/images/ovnk-unaccelerated.excalidraw @@ -0,0 +1,2131 @@ +{ + "type": "excalidraw", + "version": 2, + "source": "https://excalidraw.com", + "elements": [ + { + "id": "3PSV2IpmimdHV3TGHwCnL", + "type": "rectangle", + "x": 467.4296875, + "y": 221.05078125, + "width": 1031.4375, + "height": 575.5703125, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aD", + "roundness": { + "type": 3 + }, + "seed": 1323383732, + "version": 437, + "versionNonce": 1307439322, + "isDeleted": false, + "boundElements": [], + "updated": 1769211652514, + "link": null, + "locked": false + }, + { + "id": "Hhj9Yei2KIDBGtqDrnLuE", + "type": "text", + "x": 494.56640624999994, + "y": 233.42578125, + "width": 371.5190825257982, + "height": 35, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aE", + "roundness": null, + "seed": 234782220, + "version": 215, + "versionNonce": 956850501, + "isDeleted": false, + "boundElements": [], + "updated": 1769807070243, + "link": null, + "locked": false, + "text": "Kubernetes Worker Node", + "fontSize": 28, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "Kubernetes Worker Node", + "autoResize": false, + "lineHeight": 1.25 + }, + { + "id": "AFBhMx_FMLkpvpoVM55si", + "type": "diamond", + "x": 743.5546875, + "y": 445.03125, + "width": 191.01171874999997, + "height": 175.77734374999997, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aF", + "roundness": { + "type": 2 + }, + "seed": 1109773108, + "version": 903, + "versionNonce": 1838605157, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "d21SDy11vytpGym8ZRv9Z" + } + ], + "updated": 1769807111952, + "link": null, + "locked": false + }, + { + "id": "d21SDy11vytpGym8ZRv9Z", + "type": "text", + "x": 798.0214462280273, + "y": 515.4755859375, + "width": 82.57234191894531, + "height": 35, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aG", + "roundness": null, + "seed": 265828788, + "version": 853, + "versionNonce": 1242508997, + "isDeleted": false, + "boundElements": [], + "updated": 1769807111952, + "link": null, + "locked": false, + "text": "Pod A", + "fontSize": 28, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "AFBhMx_FMLkpvpoVM55si", + "originalText": "Pod A", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "MMRCgxZ2SkPQRTbazh5Mt", + "type": "diamond", + "x": 775.771484375, + "y": 637.09375, + "width": 167.17578125000003, + "height": 123.34765625000003, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aH", + "roundness": { + "type": 2 + }, + "seed": 1941094156, + "version": 1015, + "versionNonce": 1658634021, + "isDeleted": true, + "boundElements": [ + { + "type": "text", + "id": "fBzHO81d67o5rHJ7nhi37" + } + ], + "updated": 1769807106331, + "link": null, + "locked": false + }, + { + "id": "fBzHO81d67o5rHJ7nhi37", + "type": "text", + "x": 829.225456237793, + "y": 686.4306640625, + "width": 60.67994689941406, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aI", + "roundness": null, + "seed": 262077836, + "version": 972, + "versionNonce": 1437462955, + "isDeleted": true, + "boundElements": [], + "updated": 1769807106331, + "link": null, + "locked": false, + "text": "Pod B", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "MMRCgxZ2SkPQRTbazh5Mt", + "originalText": "Pod B", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "o-BS3wjgvpBdXfEGxS4zJ", + "type": "rectangle", + "x": 653.71484375, + "y": 517.03515625, + "width": 135.08984375, + "height": 63.8671875, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aJ", + "roundness": { + "type": 3 + }, + "seed": 1828499764, + "version": 508, + "versionNonce": 1836541978, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false + }, + { + "id": "CI86pCLaDcJplMW-JEioJ", + "type": "text", + "x": 675.0098114013672, + "y": 523.96875, + "width": 92.49990844726562, + "height": 50, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aJV", + "roundness": null, + "seed": 583086900, + "version": 473, + "versionNonce": 1445346950, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false, + "text": "OVN-Kube\nK2", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "o-BS3wjgvpBdXfEGxS4zJ", + "originalText": "OVN-Kube\nK2", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "uXVXhzqnTRn3VJnZ7vo3K", + "type": "rectangle", + "x": 837.640625, + "y": 549.546875, + "width": 342.1953125, + "height": 172.0546875, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aO", + "roundness": { + "type": 3 + }, + "seed": 1521260428, + "version": 984, + "versionNonce": 893867226, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false + }, + { + "id": "EMcQCTn12gbouIGsMbCWR", + "type": "text", + "x": 855, + "y": 565.53515625, + "width": 39.619964599609375, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aQ", + "roundness": null, + "seed": 322143028, + "version": 709, + "versionNonce": 623410630, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false, + "text": "OVS", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "OVS", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "FGWbnjuJMPPm2--6Clo68", + "type": "rectangle", + "x": 1209.94921875, + "y": 505.71875, + "width": 104.7421875, + "height": 80.87109375, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aS", + "roundness": { + "type": 3 + }, + "seed": 581768628, + "version": 738, + "versionNonce": 2113644954, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false + }, + { + "id": "MHVGjmp21WFrjCeVuw6_M", + "type": "text", + "x": 1232.8403396606445, + "y": 533.654296875, + "width": 58.95994567871094, + "height": 25, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aT", + "roundness": null, + "seed": 962236812, + "version": 713, + "versionNonce": 807458054, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false, + "text": "L7 FW", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "FGWbnjuJMPPm2--6Clo68", + "originalText": "L7 FW", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "par5rlqYTD0bht0WGS4oz", + "type": "rectangle", + "x": 1218.1171875, + "y": 636.783203125, + "width": 104.7421875, + "height": 80.87109375, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aU", + "roundness": { + "type": 3 + }, + "seed": 246895884, + "version": 723, + "versionNonce": 2126997082, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false + }, + { + "id": "VJCFsIBfciMT2Hg4opZrX", + "type": "text", + "x": 1242.4383087158203, + "y": 664.71875, + "width": 56.099945068359375, + "height": 25, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aV", + "roundness": null, + "seed": 1235276684, + "version": 724, + "versionNonce": 1082934342, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false, + "text": "L7 LB", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "par5rlqYTD0bht0WGS4oz", + "originalText": "L7 LB", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "ZHSHwJ-WMxVntYGdHzqAS", + "type": "rectangle", + "x": 875.52734375, + "y": 612.0859375, + "width": 131.03125, + "height": 42.83984375, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aW", + "roundness": { + "type": 3 + }, + "seed": 436416012, + "version": 786, + "versionNonce": 276663066, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false + }, + { + "id": "yhhtdkwmarFaKUI6H9eVm", + "type": "text", + "x": 893.5230178833008, + "y": 621.005859375, + "width": 95.03990173339844, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aWV", + "roundness": null, + "seed": 1538533684, + "version": 713, + "versionNonce": 1749535622, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false, + "text": "worker LS", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "ZHSHwJ-WMxVntYGdHzqAS", + "originalText": "worker LS", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "bIAAQozA4blO6kgvckP3S", + "type": "rectangle", + "x": 1028.86328125, + "y": 611.146484375, + "width": 131.03125, + "height": 42.83984375, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "ab", + "roundness": { + "type": 3 + }, + "seed": 330947340, + "version": 845, + "versionNonce": 2039551962, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false + }, + { + "id": "O88cyEH5yk5c4puqvqvCF", + "type": "text", + "x": 1059.6089401245117, + "y": 620.06640625, + "width": 69.53993225097656, + "height": 25, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "ac", + "roundness": null, + "seed": 1482856844, + "version": 776, + "versionNonce": 1972398790, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false, + "text": "SFC LS", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "bIAAQozA4blO6kgvckP3S", + "originalText": "SFC LS", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "pVasypJX2u2AP43S1rJ-H", + "type": "rectangle", + "x": 480.009765625, + "y": 450.455078125, + "width": 891.2070312499999, + "height": 289.640625, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "ah", + "roundness": { + "type": 3 + }, + "seed": 1392129420, + "version": 528, + "versionNonce": 62172442, + "isDeleted": true, + "boundElements": [], + "updated": 1769211638753, + "link": null, + "locked": false + }, + { + "id": "oA5KoMQjshRzJUNQcjaiC", + "type": "text", + "x": 500.03125, + "y": 461.015625, + "width": 333.6326293945313, + "height": 25, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aj", + "roundness": null, + "seed": 1841999628, + "version": 121, + "versionNonce": 974458010, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false, + "text": "DPU - K2 Kubernetes Cluster", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "DPU - K2 Kubernetes Cluster", + "autoResize": false, + "lineHeight": 1.25 + }, + { + "id": "DXSgUtwjW1M5OtOfM2oOj", + "type": "text", + "x": 897.46875, + "y": 400.98828125, + "width": 35.91996765136719, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aq", + "roundness": null, + "seed": 1235853748, + "version": 93, + "versionNonce": 865510426, + "isDeleted": true, + "boundElements": [], + "updated": 1769211693537, + "link": null, + "locked": false, + "text": "VFs", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "VFs", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "jTuqSQnd1xqE5jzyNgo_V", + "type": "text", + "x": 922.2275161743164, + "y": 553.3203125, + "width": 80.11993408203125, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "ar", + "roundness": null, + "seed": 257701772, + "version": 685, + "versionNonce": 1728258566, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false, + "text": "VF Reps", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "VF Reps", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "kUpychMCsIeTRZMalnMDE", + "type": "rectangle", + "x": 504.18359375, + "y": 517.0546875, + "width": 140.1484375, + "height": 61.43359375, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "as", + "roundness": { + "type": 3 + }, + "seed": 250474741, + "version": 420, + "versionNonce": 1471965530, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false + }, + { + "id": "KypHgViW6BpvPS6NP1wXN", + "type": "text", + "x": 528.0078582763672, + "y": 522.771484375, + "width": 92.49990844726562, + "height": 50, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "at", + "roundness": null, + "seed": 1179519573, + "version": 388, + "versionNonce": 1604120902, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false, + "text": "OVN-Kube\nK1", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "kUpychMCsIeTRZMalnMDE", + "originalText": "OVN-Kube\nK1", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "R8DOjTmolTEKd9pKdGNyl", + "type": "rectangle", + "x": 563.0703125, + "y": 608.494140625, + "width": 176.24218750000003, + "height": 104.79296875000006, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "au", + "roundness": { + "type": 3 + }, + "seed": 2033483131, + "version": 692, + "versionNonce": 702164506, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false + }, + { + "id": "1A3hQ4xYut0uu_3QxzAE5", + "type": "text", + "x": 594.2914505004883, + "y": 635.890625, + "width": 113.79991149902344, + "height": 50, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "av", + "roundness": null, + "seed": 1918703131, + "version": 671, + "versionNonce": 448452742, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false, + "text": "Shared OVN\nK1 and K2", + "fontSize": 20, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "R8DOjTmolTEKd9pKdGNyl", + "originalText": "Shared OVN\nK1 and K2", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "3kqb-K8O9f08ArUDv9Cjf", + "type": "line", + "x": 821.24609375, + "y": 401.33984375, + "width": 113.5859375, + "height": 209.3515625, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "aw", + "roundness": { + "type": 2 + }, + "seed": 2063502261, + "version": 139, + "versionNonce": 1596119770, + "isDeleted": true, + "boundElements": [], + "updated": 1769211662686, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 113.5859375, + 209.3515625 + ] + ], + "lastCommittedPoint": null, + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": null, + "polygon": false + }, + { + "id": "4QSR0lJotZNZzQEMYnuKY", + "type": "line", + "x": 1010.19140625, + "y": 400.0546875, + "width": 21.296875, + "height": 211.91796875, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "ax", + "roundness": { + "type": 2 + }, + "seed": 1579370107, + "version": 196, + "versionNonce": 1847422726, + "isDeleted": true, + "boundElements": [], + "updated": 1769211665400, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + -21.296875, + 211.91796875 + ] + ], + "lastCommittedPoint": null, + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": null, + "polygon": false + }, + { + "id": "5_Xt3FUxVeazyDS7r-Ucr", + "type": "arrow", + "x": 595.2326807094685, + "y": 579.48828125, + "width": 14.427475540531532, + "height": 25.95703125, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b02", + "roundness": { + "type": 2 + }, + "seed": 1308316949, + "version": 81, + "versionNonce": 627137242, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 14.427475540531532, + 25.95703125 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "kUpychMCsIeTRZMalnMDE", + "mode": "orbit", + "fixedPoint": [ + 0.5315652620380965, + 0.531565262038097 + ] + }, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": false + }, + { + "id": "QqSmIOXcdN0n9q_f3P72z", + "type": "arrow", + "x": 744.84765625, + "y": 648.58203125, + "width": 124.9464277396745, + "height": 20.664800962877393, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b03", + "roundness": { + "type": 2 + }, + "seed": 2041478555, + "version": 384, + "versionNonce": 1612306374, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 124.9464277396745, + -20.664800962877393 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "R8DOjTmolTEKd9pKdGNyl", + "mode": "orbit", + "fixedPoint": [ + 0.5237502641723047, + 0.5237502641723036 + ] + }, + "endBinding": { + "elementId": "ZHSHwJ-WMxVntYGdHzqAS", + "mode": "orbit", + "fixedPoint": [ + 0.23070594032628622, + 0.23070594032628763 + ] + }, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": false + }, + { + "id": "Xo9luBmfagxGHywoTzuWJ", + "type": "line", + "x": 1090.875, + "y": 612.74609375, + "width": 118.109375, + "height": 61.48046875, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b06", + "roundness": { + "type": 2 + }, + "seed": 1795601781, + "version": 77, + "versionNonce": 686092186, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 118.109375, + -61.48046875 + ] + ], + "lastCommittedPoint": null, + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": null, + "polygon": false + }, + { + "id": "FrZUA_nXF7jrENeAm_iuN", + "type": "line", + "x": 1122.578125, + "y": 611.7421875, + "width": 87.8984375, + "height": 48.03125, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b07", + "roundness": { + "type": 2 + }, + "seed": 140615867, + "version": 65, + "versionNonce": 2010574598, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 87.8984375, + -48.03125 + ] + ], + "lastCommittedPoint": null, + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": null, + "polygon": false + }, + { + "id": "h0gFSUv-MCabHAwjdzstW", + "type": "line", + "x": 1140.62890625, + "y": 655.7109375, + "width": 76.375, + "height": 37.7421875, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b08", + "roundness": { + "type": 2 + }, + "seed": 735261141, + "version": 47, + "versionNonce": 1217915994, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 76.375, + 37.7421875 + ] + ], + "lastCommittedPoint": null, + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": null, + "polygon": false + }, + { + "id": "uyp2yTlDgYoUCgRf1t2Mw", + "type": "line", + "x": 1160.61328125, + "y": 646.52734375, + "width": 57.1875, + "height": 25.95703125, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b09", + "roundness": { + "type": 2 + }, + "seed": 209223931, + "version": 31, + "versionNonce": 510598726, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 57.1875, + 25.95703125 + ] + ], + "lastCommittedPoint": null, + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": null, + "polygon": false + }, + { + "id": "8RZ9vzUTGLaqkOXekPLDQ", + "type": "arrow", + "x": 747.41015625, + "y": 697.359375, + "width": 286.4296875, + "height": 37.015625, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0A", + "roundness": { + "type": 2 + }, + "seed": 1520074645, + "version": 64, + "versionNonce": 441274650, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 286.4296875, + -37.015625 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "R8DOjTmolTEKd9pKdGNyl", + "mode": "orbit", + "fixedPoint": [ + 0.8833472697193855, + 0.8833472697193837 + ] + }, + "endBinding": { + "elementId": "bIAAQozA4blO6kgvckP3S", + "mode": "orbit", + "fixedPoint": [ + 0.8338256639609369, + 0.8338256639609338 + ] + }, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": false + }, + { + "id": "5hOE-WQdQ6EGLVGOoP1I2", + "type": "arrow", + "x": 717.5696974855308, + "y": 581.90234375, + "width": 21.679994111348492, + "height": 25.3359375, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0B", + "roundness": { + "type": 2 + }, + "seed": 1498907035, + "version": 49, + "versionNonce": 2025809286, + "isDeleted": true, + "boundElements": [], + "updated": 1769211644106, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + -21.679994111348492, + 25.3359375 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "o-BS3wjgvpBdXfEGxS4zJ", + "mode": "orbit", + "fixedPoint": [ + 0.6290772163270132, + 0.6290772163270139 + ] + }, + "endBinding": { + "elementId": "R8DOjTmolTEKd9pKdGNyl", + "mode": "orbit", + "fixedPoint": [ + 0.4954418212421667, + 0.49544182124216624 + ] + }, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": false + }, + { + "id": "tYzSvRmwfQQNkfrVTe1P5", + "type": "rectangle", + "x": 992.984375, + "y": 572.734375, + "width": 234.94921875, + "height": 144.21484375, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0C", + "roundness": { + "type": 3 + }, + "seed": 198185882, + "version": 375, + "versionNonce": 411814042, + "isDeleted": false, + "boundElements": [ + { + "id": "u9ZjgE4NtlRvghUr5XIG0", + "type": "arrow" + } + ], + "updated": 1769211901272, + "link": null, + "locked": false + }, + { + "id": "lYB2Gm5YLXtUdQ0eGqYgo", + "type": "freedraw", + "x": 1208.921875, + "y": 367.125, + "width": 0.0001, + "height": 0.0001, + "angle": 0, + "strokeColor": "#1971c2", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0D", + "roundness": null, + "seed": 901415366, + "version": 4, + "versionNonce": 110577818, + "isDeleted": true, + "boundElements": [], + "updated": 1769211701877, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 0.0001, + 0.0001 + ] + ], + "pressures": [], + "simulatePressure": true + }, + { + "id": "tsRvMJO_5sEYyzqg0Hh7c", + "type": "line", + "x": 929.01171875, + "y": 535.390625, + "width": 65.09375, + "height": 64.51171875, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0E", + "roundness": { + "type": 2 + }, + "seed": 1228634182, + "version": 113, + "versionNonce": 628743877, + "isDeleted": false, + "boundElements": [], + "updated": 1769807129093, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 65.09375, + 64.51171875 + ] + ], + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": null, + "polygon": false + }, + { + "id": "oACgbs4cMqbhu8HtPebFx", + "type": "line", + "x": 932.6796875, + "y": 709.16015625, + "width": 61.11328125, + "height": 36.8828125, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0F", + "roundness": { + "type": 2 + }, + "seed": 983491994, + "version": 68, + "versionNonce": 1634049963, + "isDeleted": false, + "boundElements": [], + "updated": 1769807122256, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 61.11328125, + -36.8828125 + ] + ], + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": null, + "polygon": false + }, + { + "id": "3d5gOfYF1IUhTLFt9bgVo", + "type": "rectangle", + "x": 981.53125, + "y": 261.81640625, + "width": 263.3984375, + "height": 76.671875, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0G", + "roundness": { + "type": 3 + }, + "seed": 160618182, + "version": 76, + "versionNonce": 374670022, + "isDeleted": false, + "boundElements": [ + { + "id": "SO75S5gL5bCji88YbBsKf", + "type": "arrow" + } + ], + "updated": 1769211893845, + "link": null, + "locked": false + }, + { + "id": "otyemJxFJECPTvDVhd1hb", + "type": "text", + "x": 1047.35546875, + "y": 283.36328125, + "width": 129.50054931640625, + "height": 35, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0H", + "roundness": null, + "seed": 400406918, + "version": 91, + "versionNonce": 1488222219, + "isDeleted": false, + "boundElements": [], + "updated": 1769807077202, + "link": null, + "locked": false, + "text": "OVN-Kube", + "fontSize": 28, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "OVN-Kube", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "1chdpPBLx0VEf6sBwnyrT", + "type": "rectangle", + "x": 542.08203125, + "y": 300.78515625, + "width": 179.29296875, + "height": 75.30859375, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0I", + "roundness": { + "type": 3 + }, + "seed": 1050229210, + "version": 33, + "versionNonce": 549854406, + "isDeleted": false, + "boundElements": [], + "updated": 1769211856008, + "link": null, + "locked": false + }, + { + "id": "E1qyG2mrQnzUtFZRnlzon", + "type": "text", + "x": 577.8984375, + "y": 320.80078125, + "width": 99.06443786621094, + "height": 35, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0J", + "roundness": null, + "seed": 1111918810, + "version": 12, + "versionNonce": 1083119301, + "isDeleted": false, + "boundElements": [], + "updated": 1769807072474, + "link": null, + "locked": false, + "text": "Kubelet", + "fontSize": 28, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "Kubelet", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "YPI4K5hgJbgHj0hYUQy7U", + "type": "rectangle", + "x": 983.12890625, + "y": 410.5234375, + "width": 260.94921875, + "height": 99.75390625, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0K", + "roundness": { + "type": 3 + }, + "seed": 2135829062, + "version": 161, + "versionNonce": 1398047066, + "isDeleted": false, + "boundElements": [ + { + "id": "SO75S5gL5bCji88YbBsKf", + "type": "arrow" + }, + { + "id": "u9ZjgE4NtlRvghUr5XIG0", + "type": "arrow" + } + ], + "updated": 1769211899581, + "link": null, + "locked": false + }, + { + "id": "xdskfV2GHpQr09QrrldPB", + "type": "text", + "x": 1081.9375, + "y": 441.5078125, + "width": 55.74822998046875, + "height": 35, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0L", + "roundness": null, + "seed": 1687121690, + "version": 128, + "versionNonce": 1673910021, + "isDeleted": false, + "boundElements": [], + "updated": 1769807080854, + "link": null, + "locked": false, + "text": "OVN", + "fontSize": 28, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "OVN", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "SO75S5gL5bCji88YbBsKf", + "type": "arrow", + "x": 1108.43359375, + "y": 337.89453125, + "width": 2.078125, + "height": 73.390625, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0M", + "roundness": { + "type": 2 + }, + "seed": 1411280134, + "version": 63, + "versionNonce": 800499334, + "isDeleted": false, + "boundElements": [], + "updated": 1769211896239, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 2.078125, + 73.390625 + ] + ], + "startBinding": { + "elementId": "3d5gOfYF1IUhTLFt9bgVo", + "mode": "inside", + "fixedPoint": [ + 0.48178852142963075, + 0.9922559608722233 + ] + }, + "endBinding": { + "elementId": "YPI4K5hgJbgHj0hYUQy7U", + "mode": "inside", + "fixedPoint": [ + 0.48815172971273746, + 0.007635979167482476 + ] + }, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": false + }, + { + "id": "u9ZjgE4NtlRvghUr5XIG0", + "type": "arrow", + "x": 1112.98828125, + "y": 508.2265625, + "width": 0.453125, + "height": 66.83984375, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0N", + "roundness": { + "type": 2 + }, + "seed": 1453237018, + "version": 64, + "versionNonce": 374911322, + "isDeleted": false, + "boundElements": [], + "updated": 1769211905520, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + -0.453125, + 66.83984375 + ] + ], + "startBinding": { + "elementId": "YPI4K5hgJbgHj0hYUQy7U", + "mode": "inside", + "fixedPoint": [ + 0.4976423214526294, + 0.9794415945490856 + ] + }, + "endBinding": { + "elementId": "tYzSvRmwfQQNkfrVTe1P5", + "mode": "inside", + "fixedPoint": [ + 0.5088366834588591, + 0.016170535496627753 + ] + }, + "startArrowhead": null, + "endArrowhead": "arrow", + "elbowed": false + }, + { + "id": "nOHqcMRCCN5CvsIk9L7HK", + "type": "text", + "x": 1083.96875, + "y": 624.0546875, + "width": 55.468231201171875, + "height": 35, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0O", + "roundness": null, + "seed": 260809434, + "version": 68, + "versionNonce": 1104957285, + "isDeleted": false, + "boundElements": [], + "updated": 1769807084822, + "link": null, + "locked": false, + "text": "OVS", + "fontSize": 28, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "OVS", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "4IG9MXRe5ijYB-oIc24C6", + "type": "rectangle", + "x": 1067.27734375, + "y": 782.578125, + "width": 97.47265625, + "height": 31.69921875, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffffff", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0P", + "roundness": { + "type": 3 + }, + "seed": 1419364058, + "version": 56, + "versionNonce": 1950880902, + "isDeleted": false, + "boundElements": [], + "updated": 1769211952988, + "link": null, + "locked": false + }, + { + "id": "nIejWKUhYhhAgOj88nUB7", + "type": "text", + "x": 1086.921875, + "y": 786.23828125, + "width": 64.98826599121094, + "height": 35, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffffff", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0Q", + "roundness": null, + "seed": 1034482586, + "version": 33, + "versionNonce": 1547842859, + "isDeleted": false, + "boundElements": [], + "updated": 1769807134429, + "link": null, + "locked": false, + "text": "eth0", + "fontSize": 28, + "fontFamily": 5, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "eth0", + "autoResize": true, + "lineHeight": 1.25 + }, + { + "id": "xkJunDpSamP7rfEtN7oH1", + "type": "line", + "x": 1115.1484375, + "y": 783.14453125, + "width": 1.234375, + "height": 65.4609375, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffffff", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0R", + "roundness": { + "type": 2 + }, + "seed": 1791101850, + "version": 51, + "versionNonce": 500083802, + "isDeleted": false, + "boundElements": [], + "updated": 1769211973078, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + -1.234375, + -65.4609375 + ] + ], + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": null, + "polygon": false + }, + { + "id": "lzux7Pz5JYywiZFFYn3c9", + "type": "diamond", + "x": 746.962890625, + "y": 619.037109375, + "width": 191.01171874999997, + "height": 175.77734374999997, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0S", + "roundness": { + "type": 2 + }, + "seed": 1138066597, + "version": 917, + "versionNonce": 364881579, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "ZIHVxeeORZWrsAYttnEg3" + } + ], + "updated": 1769807115636, + "link": null, + "locked": false + }, + { + "id": "ZIHVxeeORZWrsAYttnEg3", + "type": "text", + "x": 800.2396469116211, + "y": 689.4814453125, + "width": 84.95234680175781, + "height": 35, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "index": "b0T", + "roundness": null, + "seed": 979871749, + "version": 870, + "versionNonce": 1945999973, + "isDeleted": false, + "boundElements": [], + "updated": 1769807118450, + "link": null, + "locked": false, + "text": "Pod B", + "fontSize": 28, + "fontFamily": 5, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "lzux7Pz5JYywiZFFYn3c9", + "originalText": "Pod B", + "autoResize": true, + "lineHeight": 1.25 + } + ], + "appState": { + "gridSize": 20, + "gridStep": 5, + "gridModeEnabled": false, + "viewBackgroundColor": "#ffffff", + "lockedMultiSelections": {} + }, + "files": {} +} \ No newline at end of file diff --git a/docs/images/ovnk-unaccelerated.svg b/docs/images/ovnk-unaccelerated.svg new file mode 100644 index 0000000000..a2bb8222cf --- /dev/null +++ b/docs/images/ovnk-unaccelerated.svg @@ -0,0 +1,4 @@ + + +Kubernetes Worker NodePod AOVN-KubeKubeletOVNOVSeth0Pod B \ No newline at end of file diff --git a/docs/observability/ovn-observability.md b/docs/observability/ovn-observability.md index 5810fea58a..b79cfa6e39 100644 --- a/docs/observability/ovn-observability.md +++ b/docs/observability/ovn-observability.md @@ -7,6 +7,7 @@ specific OVS flows are matched. To see the generated samples, a binary called `o This binary allows printing the samples to stdout or writing them to a file. Currently, supports observability for: + - Network Policy - (Baseline) Admin Network Policy - Egress firewall @@ -37,28 +38,44 @@ insights of what ovn-kubernetes is doing with a packet and why. To enable this feature, use `--observability` flag with `kind.sh` script or `--enable-observability` flag with `ovnkube` binary. -To see the samples, use `ovnkube-observ` binary, use `-h` to see allowed flags. +To see the samples, use `ovnkube-observ` binary, with `-h` to see allowed flags. `ovnkube-observ` is installed on the ovnkube pods. For example: -This feature requires OVS 3.4 and linux kernel 6.11. +``` +kubectl -n ovn-kubernetes exec -it -c ovnkube-controller -- ovnkube-observ -h +Usage of ovnkube-observ: + -add-ovs-collector + Add ovs collector to enable sampling. Use with caution. Make sure no one else is using observability. + -enable-enrichment + Enrich samples with nbdb data. (default true) + -filter-dst-ip string + Filter in only packets to a given destination ip. + -filter-src-ip string + Filter in only packets from a given source ip. + -log-cookie + Print raw sample cookie with psample group_id. + -output-file string + Output file to write the samples to. + -print-full-packet + Print full received packet. When false, only src and dst ips are printed with every sample. +``` -As of Aug 2024, the kernel need to be built from the source, therefore to try this feature you need to: -- rebuild the kernel with the current master branch from [Linus' tree](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git) - - to rebuild on fedora: https://docs.fedoraproject.org/en-US/quick-docs/kernel-build-custom/#_building_a_vanilla_upstream_kernel -- Build an ovn-kubernetes image that uses the latest OVS/OVN code: -`OVS_BRANCH=main make -C dist/images fedora-dev-local-gw-deployment` -- Start kind with that image, use `-ov localhost/ovn-daemonset-fedora:latest` flag with `kind.sh` script. +This feature requires OVS 3.4 and linux kernel 6.11. ## Workflow Description - Observability is enabled by setting the `--enable-observability` flag in the `ovnkube` binary. - For now all mentioned features are enabled by this flag at the same time. -- `ovnkube-observ` binary is used to see the samples. Samples are only generated when the real traffic matching the ACLs -is sent through the OVS. An example output is: +- To start observing and display the samples, run `ovnkube-observ -add-ovs-collector`. Samples are only generated when the real traffic matching the ACLs is sent through the OVS. An example output is: + ``` OVN-K message: Allowed by default allow from local node policy, direction ingress src=10.129.2.2, dst=10.129.2.5 ``` +## Support in observability tools + +- [NetObserv](https://github.com/netobserv/network-observability-operator): through the `NetworkEvents` agent feature. + ## Implementation Details ### User facing API Changes diff --git a/go-controller/cmd/ovn-k8s-cni-overlay/ovn-k8s-cni-overlay.go b/go-controller/cmd/ovn-k8s-cni-overlay/ovn-k8s-cni-overlay.go index 88d94faeb5..c3cb365244 100644 --- a/go-controller/cmd/ovn-k8s-cni-overlay/ovn-k8s-cni-overlay.go +++ b/go-controller/cmd/ovn-k8s-cni-overlay/ovn-k8s-cni-overlay.go @@ -22,9 +22,11 @@ func main() { c.Action = func(_ *cli.Context) error { skel.PluginMainFuncs( skel.CNIFuncs{ - Add: p.CmdAdd, - Check: p.CmdCheck, - Del: p.CmdDel, + Add: p.CmdAdd, + Check: p.CmdCheck, + Del: p.CmdDel, + GC: p.CmdGC, + Status: p.CmdStatus, }, version.All, bv.BuildString("ovn-k8s-cni-overlay")) diff --git a/go-controller/cmd/ovnkube/ovnkube.go b/go-controller/cmd/ovnkube/ovnkube.go index 7a4ab3bc9a..826c6ffc8f 100644 --- a/go-controller/cmd/ovnkube/ovnkube.go +++ b/go-controller/cmd/ovnkube/ovnkube.go @@ -14,6 +14,7 @@ import ( "text/template" "time" + "github.com/prometheus/client_golang/prometheus" "github.com/urfave/cli/v2" "k8s.io/apimachinery/pkg/util/sets" @@ -269,6 +270,15 @@ func determineOvnkubeRunMode(ctx *cli.Context) (*ovnkubeRunMode, error) { return mode, nil } +// Determine if we should serve both ovnkube-node and OVN/OVS metrics on a single endpoint. +func combineMetricsEndpoints(runMode *ovnkubeRunMode) bool { + return runMode != nil && + runMode.node && + config.Metrics.BindAddress != "" && + config.Metrics.BindAddress == config.Metrics.OVNMetricsBindAddress && + config.OvnKubeNode.Mode != types.NodeModeDPUHost +} + func startOvnKube(ctx *cli.Context, cancel context.CancelFunc) error { pidfile := ctx.String("pidfile") if pidfile != "" { @@ -319,9 +329,9 @@ func startOvnKube(ctx *cli.Context, cancel context.CancelFunc) error { eventRecorder := util.EventRecorder(ovnClientset.KubeClient) - // Start metric server for master and node. Expose the metrics HTTP endpoint if configured. + // Start the general metrics server only when not combined. // Non LE master instances also are required to expose the metrics server. - if config.Metrics.BindAddress != "" { + if config.Metrics.BindAddress != "" && !combineMetricsEndpoints(runMode) { metrics.StartMetricsServer(config.Metrics.BindAddress, config.Metrics.EnablePprof, config.Metrics.NodeServerCert, config.Metrics.NodeServerPrivKey, ctx.Done(), ovnKubeStartWg) } @@ -611,7 +621,7 @@ func runOvnKube(ctx context.Context, runMode *ovnkubeRunMode, ovnClientset *util // start the prometheus server to serve OVS and OVN Metrics (default port: 9476) // Note: for ovnkube node mode dpu-host no metrics is required as ovs/ovn is not running on the node. - if config.OvnKubeNode.Mode != types.NodeModeDPUHost && config.Metrics.OVNMetricsBindAddress != "" { + if runMode.node && config.OvnKubeNode.Mode != types.NodeModeDPUHost && config.Metrics.OVNMetricsBindAddress != "" { if ovsClient == nil { ovsClient, err = libovsdb.NewOVSClient(ctx.Done()) @@ -631,6 +641,12 @@ func runOvnKube(ctx context.Context, runMode *ovnkubeRunMode, ovnClientset *util EnableOVNDBMetrics: true, } + if combineMetricsEndpoints(runMode) { + // Reuse the default registry (and its gatherer) so ovnkube-node metrics and OVN metrics share one endpoint. + opts.Registerer = prometheus.DefaultRegisterer + opts.EnablePprof = config.Metrics.EnablePprof + } + if !config.OVNKubernetesFeature.EnableInterconnect { // In Central mode, OVNKube Node doesn't need to register OVN Northd and DB metrics unless // OVNKube Master Pod is running on this node. diff --git a/go-controller/hybrid-overlay/pkg/controller/ho_node_linux.go b/go-controller/hybrid-overlay/pkg/controller/ho_node_linux.go index 1bb5593609..ad9f5e2b50 100644 --- a/go-controller/hybrid-overlay/pkg/controller/ho_node_linux.go +++ b/go-controller/hybrid-overlay/pkg/controller/ho_node_linux.go @@ -89,8 +89,9 @@ func (n *HONodeController) AddPod(pod *corev1.Pod) error { _, ok := pod.Annotations[util.OvnPodAnnotationName] if ok { klog.Infof("Remove the ovnkube pod annotation from pod %s", pod.Name) - delete(pod.Annotations, util.OvnPodAnnotationName) - if err := n.kube.UpdatePodStatus(pod); err != nil { + podToUpdate := pod.DeepCopy() + delete(podToUpdate.Annotations, util.OvnPodAnnotationName) + if err := n.kube.UpdatePodStatus(podToUpdate); err != nil { return fmt.Errorf("failed to remove ovnkube pod annotation from pod %s: %v", pod.Name, err) } return nil diff --git a/go-controller/pkg/clustermanager/clustermanager_test.go b/go-controller/pkg/clustermanager/clustermanager_test.go index 58e234e67f..eba62f731e 100644 --- a/go-controller/pkg/clustermanager/clustermanager_test.go +++ b/go-controller/pkg/clustermanager/clustermanager_test.go @@ -1649,7 +1649,7 @@ var _ = ginkgo.Describe("Cluster Manager", func() { ) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - const expectedNADContents = `{"cniVersion": "0.4.0", "name": "ovn-kubernetes", "type": "ovn-k8s-cni-overlay"}` + const expectedNADContents = `{"cniVersion": "1.1.0", "name": "ovn-kubernetes", "type": "ovn-k8s-cni-overlay"}` gomega.Expect(nad.Spec.Config).To(gomega.Equal(expectedNADContents)) return nil diff --git a/go-controller/pkg/clustermanager/egressip_controller.go b/go-controller/pkg/clustermanager/egressip_controller.go index ded7851375..891ec2c0ce 100644 --- a/go-controller/pkg/clustermanager/egressip_controller.go +++ b/go-controller/pkg/clustermanager/egressip_controller.go @@ -1242,7 +1242,7 @@ func (eIPC *egressIPClusterController) assignEgressIPs(name string, egressIPs [] eIPC.recorder.Eventf(&eIPRef, corev1.EventTypeWarning, "EgressIPConflict", "Egress IP %s with IP "+ "%v is conflicting with a host (%s) IP address and will not be assigned", name, eIP, conflictedHost) klog.Errorf("Egress IP: %v address is already assigned on an interface on node %s", eIP, conflictedHost) - return assignments + continue } if status, exists := existingAllocations[eIP.String()]; exists { // On public clouds we will re-process assignments for the same IP @@ -1294,7 +1294,7 @@ func (eIPC *egressIPClusterController) assignEgressIPs(name string, egressIPs [] "IP: %q for EgressIP: %s is already allocated for EgressIP: %s on %s", egressIP, name, status.Name, status.Node, ) klog.Errorf("IP: %q for EgressIP: %s is already allocated for EgressIP: %s on %s", egressIP, name, status.Name, status.Node) - return assignments + continue } } // Egress IP for secondary host networks is only available on baremetal environments diff --git a/go-controller/pkg/clustermanager/egressip_controller_test.go b/go-controller/pkg/clustermanager/egressip_controller_test.go index 593671f7b8..95d06bc0b1 100644 --- a/go-controller/pkg/clustermanager/egressip_controller_test.go +++ b/go-controller/pkg/clustermanager/egressip_controller_test.go @@ -1367,12 +1367,23 @@ var _ = ginkgo.Describe("OVN cluster-manager EgressIP Operations", func() { I0212 20:22:37.643187 1837759 egressip_controller.go:1173] Current assignments are: map[] I0212 20:22:37.643205 1837759 egressip_controller.go:1175] Will attempt assignment for egress IP: 192.168.126.51 E0212 20:22:37.643254 1837759 egressip_controller.go:1190] Egress IP: 192.168.126.51 address is already assigned on an interface on node node2*/ - gomega.Eventually(fakeClusterManagerOVN.fakeRecorder.Events).Should(gomega.HaveLen(4)) - for i := 0; i < 4; i++ { + gomega.Eventually(fakeClusterManagerOVN.fakeRecorder.Events).Should(gomega.HaveLen(8)) + conflictCount := 0 + noMatchingCount := 0 + for i := 0; i < 8; i++ { recordedEvent := <-fakeClusterManagerOVN.fakeRecorder.Events - gomega.Expect(recordedEvent).To(gomega.ContainSubstring( - "EgressIPConflict Egress IP egressip with IP 192.168.126.51 is conflicting with a host (node2) IP address and will not be assigned")) + gomega.Expect(recordedEvent).To(gomega.SatisfyAny( + gomega.ContainSubstring("EgressIPConflict Egress IP egressip with IP 192.168.126.51 is conflicting with a host (node2) IP address and will not be assigned"), + gomega.ContainSubstring("NoMatchingNodeFound No matching nodes found, which can host any of the egress IPs: [192.168.126.51] for object EgressIP: egressip"))) + if strings.Contains(recordedEvent, "EgressIPConflict") { + conflictCount++ + } + if strings.Contains(recordedEvent, "NoMatchingNodeFound") { + noMatchingCount++ + } } + gomega.Expect(conflictCount).To(gomega.Equal(4)) + gomega.Expect(noMatchingCount).To(gomega.Equal(4)) return nil } diff --git a/go-controller/pkg/clustermanager/endpointslicemirror/endpointslice_mirror_controller.go b/go-controller/pkg/clustermanager/endpointslicemirror/endpointslice_mirror_controller.go index 66f95f1e83..6e97f0a7d0 100644 --- a/go-controller/pkg/clustermanager/endpointslicemirror/endpointslice_mirror_controller.go +++ b/go-controller/pkg/clustermanager/endpointslicemirror/endpointslice_mirror_controller.go @@ -252,7 +252,7 @@ func (c *Controller) syncDefaultEndpointSlice(ctx context.Context, key string) e return err } - if namespacePrimaryNetwork.IsDefault() || !namespacePrimaryNetwork.IsPrimaryNetwork() { + if namespacePrimaryNetwork == nil || namespacePrimaryNetwork.IsDefault() || !namespacePrimaryNetwork.IsPrimaryNetwork() { return nil } diff --git a/go-controller/pkg/clustermanager/networkconnect/cluster_network_connect.go b/go-controller/pkg/clustermanager/networkconnect/cluster_network_connect.go index 89e0eb2b8b..a48036e6ab 100644 --- a/go-controller/pkg/clustermanager/networkconnect/cluster_network_connect.go +++ b/go-controller/pkg/clustermanager/networkconnect/cluster_network_connect.go @@ -38,8 +38,8 @@ var ( func getPrimaryNADForNamespace(networkMgr networkmanager.Interface, namespaceName string, nadLister nadlisters.NetworkAttachmentDefinitionLister) (nadKey string, network util.NetInfo, err error) { namespacePrimaryNetwork, err := networkMgr.GetActiveNetworkForNamespace(namespaceName) if err != nil { - if util.IsInvalidPrimaryNetworkError(err) || util.IsUnprocessedActiveNetworkError(err) { - // We intentionally ignore the unprocessed active network error because + if util.IsInvalidPrimaryNetworkError(err) { + // We intentionally ignore the invalid primary network error because // UDN Controller hasn't created the NAD yet, OR NAD doesn't exist in a // namespace that has the required UDN label. It could also be that the // UDN was deleted and the NAD is also gone. @@ -47,13 +47,13 @@ func getPrimaryNADForNamespace(networkMgr networkmanager.Interface, namespaceNam } return "", nil, err } - if namespacePrimaryNetwork.IsDefault() { + if namespacePrimaryNetwork == nil || namespacePrimaryNetwork.IsDefault() { // No primary UDN in this namespace return "", nil, nil } primaryNADKey, err := networkMgr.GetPrimaryNADForNamespace(namespaceName) if err != nil { - if util.IsInvalidPrimaryNetworkError(err) || util.IsUnprocessedActiveNetworkError(err) { + if util.IsInvalidPrimaryNetworkError(err) { return "", nil, nil } return "", nil, err diff --git a/go-controller/pkg/clustermanager/networkconnect/controller.go b/go-controller/pkg/clustermanager/networkconnect/controller.go index 8bec1787ee..654f69d33d 100644 --- a/go-controller/pkg/clustermanager/networkconnect/controller.go +++ b/go-controller/pkg/clustermanager/networkconnect/controller.go @@ -407,16 +407,19 @@ func (c *Controller) mustProcessCNCForNAD(nad *nadv1.NetworkAttachmentDefinition continue } for _, namespace := range namespaces { - primaryNAD, err := c.networkManager.GetActiveNetworkForNamespace(namespace.Name) + nsPrimaryNetwork, err := c.networkManager.GetActiveNetworkForNamespace(namespace.Name) if err != nil { - if util.IsUnprocessedActiveNetworkError(err) || util.IsInvalidPrimaryNetworkError(err) { + if util.IsInvalidPrimaryNetworkError(err) { continue } klog.Errorf("Failed to get active network for namespace %s: %v", namespace.Name, err) continue } + if nsPrimaryNetwork == nil { + continue + } networkName := c.networkManager.GetNetworkNameForNADKey(nadKey) - if networkName != "" && networkName == primaryNAD.GetNetworkName() { + if networkName != "" && networkName == nsPrimaryNetwork.GetNetworkName() { isSelected = true break selectorLoop } diff --git a/go-controller/pkg/clustermanager/networkconnect/controller_components_test.go b/go-controller/pkg/clustermanager/networkconnect/controller_components_test.go index 461eda3c40..23cf247401 100644 --- a/go-controller/pkg/clustermanager/networkconnect/controller_components_test.go +++ b/go-controller/pkg/clustermanager/networkconnect/controller_components_test.go @@ -1259,11 +1259,11 @@ func TestNADNeedsUpdate(t *testing.T) { udnOwner := makeUDNOwnerRef("test-udn") makePrimaryNADConfig := func(name string) string { - return fmt.Sprintf(`{"cniVersion": "0.4.0", "name": "%s", "type": "ovn-k8s-cni-overlay", "topology": "layer3", "role": "primary", "netAttachDefName": "test/%s"}`, name, name) + return fmt.Sprintf(`{"cniVersion": "1.1.0", "name": "%s", "type": "ovn-k8s-cni-overlay", "topology": "layer3", "role": "primary", "netAttachDefName": "test/%s"}`, name, name) } makeSecondaryNADConfig := func(name string) string { - return fmt.Sprintf(`{"cniVersion": "0.4.0", "name": "%s", "type": "ovn-k8s-cni-overlay", "topology": "layer3", "netAttachDefName": "test/%s"}`, name, name) + return fmt.Sprintf(`{"cniVersion": "1.1.0", "name": "%s", "type": "ovn-k8s-cni-overlay", "topology": "layer3", "netAttachDefName": "test/%s"}`, name, name) } tests := []struct { diff --git a/go-controller/pkg/clustermanager/networkconnect/controller_test.go b/go-controller/pkg/clustermanager/networkconnect/controller_test.go index a712abfe34..8598988e5f 100644 --- a/go-controller/pkg/clustermanager/networkconnect/controller_test.go +++ b/go-controller/pkg/clustermanager/networkconnect/controller_test.go @@ -73,7 +73,7 @@ func newTestUDNNAD(name, namespace, network string, networkID string) *nadv1.Net }, Spec: nadv1.NetworkAttachmentDefinitionSpec{ Config: fmt.Sprintf( - `{"cniVersion": "0.4.0", "name": "%s", "type": "%s", "topology": "layer3", "netAttachDefName": "%s/%s", "role": "primary", "subnets": "10.0.0.0/16/24"}`, + `{"cniVersion": "1.1.0", "name": "%s", "type": "%s", "topology": "layer3", "netAttachDefName": "%s/%s", "role": "primary", "subnets": "10.0.0.0/16/24"}`, network, config.CNI.Plugin, namespace, @@ -601,7 +601,7 @@ var _ = ginkgo.Describe("NetworkConnect ClusterManager Controller Integration Te }, Spec: nadv1.NetworkAttachmentDefinitionSpec{ Config: fmt.Sprintf( - `{"cniVersion": "0.4.0", "name": "%s", "type": "%s", "topology": "layer3", "netAttachDefName": "secondary-ns/cudn-secondary", "subnets": "10.0.0.0/16/24"}`, + `{"cniVersion": "1.1.0", "name": "%s", "type": "%s", "topology": "layer3", "netAttachDefName": "secondary-ns/cudn-secondary", "subnets": "10.0.0.0/16/24"}`, network, config.CNI.Plugin, ), @@ -1204,7 +1204,7 @@ var _ = ginkgo.Describe("NetworkConnect ClusterManager Controller Integration Te }, Spec: nadv1.NetworkAttachmentDefinitionSpec{ // Invalid JSON config - missing required fields, will fail ParseNADInfo - Config: `{"cniVersion": "0.4.0", "name": "malformed", "type": "invalid-type"}`, + Config: `{"cniVersion": "1.1.0", "name": "malformed", "type": "invalid-type"}`, }, } _, err := fakeClientset.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions("malformed-ns").Create( diff --git a/go-controller/pkg/clustermanager/pod/allocator.go b/go-controller/pkg/clustermanager/pod/allocator.go index 5e5e65f25d..ab377aa759 100644 --- a/go-controller/pkg/clustermanager/pod/allocator.go +++ b/go-controller/pkg/clustermanager/pod/allocator.go @@ -113,11 +113,17 @@ func (a *PodAllocator) Init() error { func (a *PodAllocator) getActiveNetworkForPod(pod *corev1.Pod) (util.NetInfo, error) { activeNetwork, err := a.networkManager.GetActiveNetworkForNamespace(pod.Namespace) if err != nil { - if util.IsUnprocessedActiveNetworkError(err) { + if util.IsInvalidPrimaryNetworkError(err) { a.recordPodErrorEvent(pod, err) } return nil, err } + // Cluster manager pod allocation should always have an active network + if activeNetwork == nil { + newErr := fmt.Errorf("no active network found for pod %s/%s", pod.Namespace, pod.Name) + a.recordPodErrorEvent(pod, newErr) + return nil, newErr + } return activeNetwork, nil } @@ -131,7 +137,7 @@ func (a *PodAllocator) GetNetworkRole(pod *corev1.Pod) (string, error) { pod, ) if err != nil { - if util.IsUnprocessedActiveNetworkError(err) { + if util.IsInvalidPrimaryNetworkError(err) { a.recordPodErrorEvent(pod, err) } return "", err diff --git a/go-controller/pkg/clustermanager/routeadvertisements/controller_test.go b/go-controller/pkg/clustermanager/routeadvertisements/controller_test.go index 1bad4f1ad5..44a29a4545 100644 --- a/go-controller/pkg/clustermanager/routeadvertisements/controller_test.go +++ b/go-controller/pkg/clustermanager/routeadvertisements/controller_test.go @@ -332,7 +332,7 @@ func (tn testNAD) NAD() *nadtypes.NetworkAttachmentDefinition { // Build the config as a map to properly marshal EVPN config cniConfig := map[string]interface{}{ - "cniVersion": "0.4.0", + "cniVersion": "1.1.0", "name": tn.Network, "type": config.CNI.Plugin, "netAttachDefName": tn.Namespace + "/" + tn.Name, diff --git a/go-controller/pkg/clustermanager/userdefinednetwork/controller.go b/go-controller/pkg/clustermanager/userdefinednetwork/controller.go index f1ffd0dcc3..3bd6ca3086 100644 --- a/go-controller/pkg/clustermanager/userdefinednetwork/controller.go +++ b/go-controller/pkg/clustermanager/userdefinednetwork/controller.go @@ -504,14 +504,14 @@ func (c *Controller) ReconcileNetAttachDef(key string) error { // ReconcileNamespace enqueue relevant Cluster UDN CR requests following namespace events. func (c *Controller) ReconcileNamespace(key string) error { namespace, err := c.namespaceInformer.Lister().Get(key) - if err != nil { - // Ignore removed namespaces - if apierrors.IsNotFound(err) { - return nil - } + if err != nil && !apierrors.IsNotFound(err) { return fmt.Errorf("failed to get namespace %q from cache: %w", key, err) } - namespaceLabels := labels.Set(namespace.Labels) + + var namespaceLabels labels.Set + if namespace != nil { + namespaceLabels = namespace.Labels + } c.namespaceTrackerLock.RLock() defer c.namespaceTrackerLock.RUnlock() @@ -519,8 +519,16 @@ func (c *Controller) ReconcileNamespace(key string) error { for cudnName, affectedNamespaces := range c.namespaceTracker { affectedNamespace := affectedNamespaces.Has(key) - selectedNamespace := false + // For deleted namespaces, only reconcile if tracked + if namespace == nil { + if affectedNamespace { + klog.Errorf("BUG: namespace %q was deleted but still tracked by ClusterUDN %q, forcing reconcile to cleanup", key, cudnName) + c.cudnController.Reconcile(cudnName) + } + continue + } + selectedNamespace := false if !affectedNamespace { cudn, err := c.cudnLister.Get(cudnName) if err != nil { @@ -912,6 +920,10 @@ func (c *Controller) getSelectedNamespaces(sel metav1.LabelSelector) (sets.Set[s return nil, fmt.Errorf("failed to list namespaces: %w", err) } for _, selectedNs := range selectedNamespacesList { + if !selectedNs.DeletionTimestamp.IsZero() { + klog.V(5).Infof("Namespace %s is being deleted, skipping", selectedNs.Name) + continue + } selectedNamespaces.Insert(selectedNs.Name) } return selectedNamespaces, nil diff --git a/go-controller/pkg/clustermanager/userdefinednetwork/controller_test.go b/go-controller/pkg/clustermanager/userdefinednetwork/controller_test.go index 166931625d..dbc5e7769e 100644 --- a/go-controller/pkg/clustermanager/userdefinednetwork/controller_test.go +++ b/go-controller/pkg/clustermanager/userdefinednetwork/controller_test.go @@ -458,7 +458,7 @@ var _ = Describe("User Defined Network Controller", func() { nad := testClusterUdnNAD(cudn.Name, nsName) networkName := ovntypes.CUDNPrefix + cudn.Name nadName := nsName + "/" + cudn.Name - nad.Spec.Config = `{"cniVersion":"1.0.0","name":"` + networkName + `","netAttachDefName":"` + nadName + `","role":"","subnets":"10.10.10.0/24","topology":"layer2","type":"ovn-k8s-cni-overlay"}` + nad.Spec.Config = `{"cniVersion":"1.1.0","name":"` + networkName + `","netAttachDefName":"` + nadName + `","role":"","subnets":"10.10.10.0/24","topology":"layer2","type":"ovn-k8s-cni-overlay"}` expectedNsNADs[nsName] = nad } @@ -1319,7 +1319,7 @@ var _ = Describe("User Defined Network Controller", func() { for _, nsName := range testNamespaces { nad := testClusterUdnNAD(cudn.Name, nsName) nadName := nsName + "/" + cudn.Name - nad.Spec.Config = `{"cniVersion":"1.0.0","name":"` + networkName + `","netAttachDefName":"` + nadName + `","role":"","subnets":"10.10.10.0/24","topology":"layer2","type":"ovn-k8s-cni-overlay"}` + nad.Spec.Config = `{"cniVersion":"1.1.0","name":"` + networkName + `","netAttachDefName":"` + nadName + `","role":"","subnets":"10.10.10.0/24","topology":"layer2","type":"ovn-k8s-cni-overlay"}` nad.Annotations = map[string]string{ "foo": "bar", ovntypes.OvnNetworkNameAnnotation: networkName, @@ -2058,6 +2058,32 @@ var _ = Describe("User Defined Network Controller", func() { Expect(err).To(MatchError(expectedErr)) }) + It("when namespace without pods is being deleted, should delete NAD in that namespace", func() { + const cudnName = "test-network" + testNs := testNamespace("blue") + cudn := testClusterUDN(cudnName, testNs.Name) + expectedNAD := testClusterUdnNAD(cudnName, testNs.Name) + c := newTestController(renderNadStub(expectedNAD), cudn, testNs) + Expect(c.Run()).To(Succeed()) + + By("verify NAD is created in namespace") + Eventually(func() error { + _, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), cudnName, metav1.GetOptions{}) + return err + }).Should(Succeed()) + + By("mark namespace as terminating") + testNs.DeletionTimestamp = &metav1.Time{Time: time.Now()} + _, err := cs.KubeClient.CoreV1().Namespaces().Update(context.Background(), testNs, metav1.UpdateOptions{}) + Expect(err).ToNot(HaveOccurred()) + + By("verify NAD is deleted") + Eventually(func() bool { + _, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(testNs.Name).Get(context.Background(), cudnName, metav1.GetOptions{}) + return apierrors.IsNotFound(err) + }).Should(BeTrue(), "NAD should be deleted when namespace is terminating") + }) + It("when CR is deleted, CR has no finalizer, should succeed", func() { deletedCUDN := testClusterUDN("test", "blue") deletedCUDN.Finalizers = []string{} @@ -2505,10 +2531,10 @@ func testEVPNClusterUdnNADWithVIDs(name, namespace, vtepName string, macVID, ipV nad := testClusterUdnNAD(name, namespace) if ipVID > 0 { // Symmetric IRB (both MAC-VRF and IP-VRF) - nad.Spec.Config = fmt.Sprintf(`{"cniVersion":"1.0.0","name":"cluster_udn_%s","type":"ovn-k8s-cni-overlay","netAttachDefName":"%s/%s","topology":"layer2","role":"primary","subnets":"10.10.0.0/16","transport":"evpn","evpn":{"vtep":"%s","macVRF":{"vni":100,"vid":%d},"ipVRF":{"vni":200,"vid":%d}}}`, name, namespace, name, vtepName, macVID, ipVID) + nad.Spec.Config = fmt.Sprintf(`{"cniVersion":"1.1.0","name":"cluster_udn_%s","type":"ovn-k8s-cni-overlay","netAttachDefName":"%s/%s","topology":"layer2","role":"primary","subnets":"10.10.0.0/16","transport":"evpn","evpn":{"vtep":"%s","macVRF":{"vni":100,"vid":%d},"ipVRF":{"vni":200,"vid":%d}}}`, name, namespace, name, vtepName, macVID, ipVID) } else { // MAC-VRF only - nad.Spec.Config = fmt.Sprintf(`{"cniVersion":"1.0.0","name":"cluster_udn_%s","type":"ovn-k8s-cni-overlay","netAttachDefName":"%s/%s","topology":"layer2","role":"primary","subnets":"10.10.0.0/16","transport":"evpn","evpn":{"vtep":"%s","macVRF":{"vni":100,"vid":%d}}}`, name, namespace, name, vtepName, macVID) + nad.Spec.Config = fmt.Sprintf(`{"cniVersion":"1.1.0","name":"cluster_udn_%s","type":"ovn-k8s-cni-overlay","netAttachDefName":"%s/%s","topology":"layer2","role":"primary","subnets":"10.10.0.0/16","transport":"evpn","evpn":{"vtep":"%s","macVRF":{"vni":100,"vid":%d}}}`, name, namespace, name, vtepName, macVID) } return nad } diff --git a/go-controller/pkg/clustermanager/userdefinednetwork/nad_test.go b/go-controller/pkg/clustermanager/userdefinednetwork/nad_test.go index 77a03b529d..d841956c91 100644 --- a/go-controller/pkg/clustermanager/userdefinednetwork/nad_test.go +++ b/go-controller/pkg/clustermanager/userdefinednetwork/nad_test.go @@ -91,15 +91,15 @@ var _ = Describe("PrimaryNetAttachDefNotExist", func() { nads := []*netv1.NetworkAttachmentDefinition{ { ObjectMeta: metav1.ObjectMeta{Name: "test-net1", Namespace: "blue"}, - Spec: netv1.NetworkAttachmentDefinitionSpec{Config: `{"cniVersion": "1.0.0","type": "ovn-k8s-cni-overlay","role": "secondary"}`}, + Spec: netv1.NetworkAttachmentDefinitionSpec{Config: `{"cniVersion": "1.1.0","type": "ovn-k8s-cni-overlay","role": "secondary"}`}, }, { ObjectMeta: metav1.ObjectMeta{Name: "test-net2", Namespace: "blue"}, - Spec: netv1.NetworkAttachmentDefinitionSpec{Config: `{"cniVersion": "1.0.0","type": "ovn-k8s-cni-overlay","role": "secondary"}`}, + Spec: netv1.NetworkAttachmentDefinitionSpec{Config: `{"cniVersion": "1.1.0","type": "ovn-k8s-cni-overlay","role": "secondary"}`}, }, { ObjectMeta: metav1.ObjectMeta{Name: "test-net3", Namespace: "blue"}, - Spec: netv1.NetworkAttachmentDefinitionSpec{Config: `{"cniVersion": "1.0.0","type": "fake-ovn-cni","role": "primary"}`}, + Spec: netv1.NetworkAttachmentDefinitionSpec{Config: `{"cniVersion": "1.1.0","type": "fake-ovn-cni","role": "primary"}`}, }, } Expect(PrimaryNetAttachDefNotExist(nads)).To(Succeed()) @@ -108,15 +108,15 @@ var _ = Describe("PrimaryNetAttachDefNotExist", func() { nads := []*netv1.NetworkAttachmentDefinition{ { ObjectMeta: metav1.ObjectMeta{Name: "test-net1", Namespace: "blue"}, - Spec: netv1.NetworkAttachmentDefinitionSpec{Config: `{"cniVersion": "1.0.0","type": "ovn-k8s-cni-overlay","role": "primary"}`}, + Spec: netv1.NetworkAttachmentDefinitionSpec{Config: `{"cniVersion": "1.1.0","type": "ovn-k8s-cni-overlay","role": "primary"}`}, }, { ObjectMeta: metav1.ObjectMeta{Name: "test-net2", Namespace: "blue"}, - Spec: netv1.NetworkAttachmentDefinitionSpec{Config: `{"cniVersion": "1.0.0","type": "ovn-k8s-cni-overlay","role": "secondary"}`}, + Spec: netv1.NetworkAttachmentDefinitionSpec{Config: `{"cniVersion": "1.1.0","type": "ovn-k8s-cni-overlay","role": "secondary"}`}, }, { ObjectMeta: metav1.ObjectMeta{Name: "test-net3", Namespace: "blue"}, - Spec: netv1.NetworkAttachmentDefinitionSpec{Config: `{"cniVersion": "1.0.0","type": "fake-ovn-cni","role": "primary"}`}, + Spec: netv1.NetworkAttachmentDefinitionSpec{Config: `{"cniVersion": "1.1.0","type": "fake-ovn-cni","role": "primary"}`}, }, } Expect(PrimaryNetAttachDefNotExist(nads)).ToNot(Succeed()) diff --git a/go-controller/pkg/clustermanager/userdefinednetwork/notifier/namespace.go b/go-controller/pkg/clustermanager/userdefinednetwork/notifier/namespace.go index 90ff81befc..d6dbf634f2 100644 --- a/go-controller/pkg/clustermanager/userdefinednetwork/notifier/namespace.go +++ b/go-controller/pkg/clustermanager/userdefinednetwork/notifier/namespace.go @@ -46,10 +46,11 @@ func NewNamespaceNotifier(nsInformer corev1informer.NamespaceInformer, subscribe func (c *NamespaceNotifier) needUpdate(old, new *corev1.Namespace) bool { nsCreated := old == nil && new != nil nsDeleted := old != nil && new == nil + nsDeleting := new != nil && !new.DeletionTimestamp.IsZero() nsLabelsChanged := old != nil && new != nil && !reflect.DeepEqual(old.Labels, new.Labels) - return nsCreated || nsDeleted || nsLabelsChanged + return nsCreated || nsDeleted || nsDeleting || nsLabelsChanged } // reconcile notify subscribers with the request namespace key following namespace events. diff --git a/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template.go b/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template.go index 62850d4a23..72ee497fa6 100644 --- a/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template.go +++ b/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template.go @@ -24,8 +24,6 @@ const ( FinalizerUserDefinedNetwork = "k8s.ovn.org/user-defined-network-protection" LabelUserDefinedNetwork = "k8s.ovn.org/user-defined-network" - - cniVersion = "1.0.0" ) type SpecGetter interface { @@ -139,7 +137,7 @@ func validateTopology(spec SpecGetter) error { func renderCNINetworkConfig(networkName, nadName string, spec SpecGetter, opts *RenderOptions) (map[string]interface{}, error) { netConfSpec := &ovncnitypes.NetConf{ NetConf: cnitypes.NetConf{ - CNIVersion: cniVersion, + CNIVersion: config.CNISpecVersion, Type: OvnK8sCNIOverlay, Name: networkName, }, @@ -224,7 +222,7 @@ func renderCNINetworkConfig(networkName, nadName string, spec SpecGetter, opts * // Generating the net-conf JSON string using 'map[string]struct{}' provide the // expected result. cniNetConf := map[string]interface{}{ - "cniVersion": cniVersion, + "cniVersion": config.CNISpecVersion, "type": OvnK8sCNIOverlay, "name": networkName, "netAttachDefName": nadName, diff --git a/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template_test.go b/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template_test.go index 5881617c6b..0967125e02 100644 --- a/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template_test.go +++ b/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template_test.go @@ -352,7 +352,7 @@ var _ = Describe("NetAttachDefTemplate", func() { }, }, `{ - "cniVersion": "1.0.0", + "cniVersion": "1.1.0", "type": "ovn-k8s-cni-overlay", "name": "mynamespace_test-net", "netAttachDefName": "mynamespace/test-net", @@ -376,7 +376,7 @@ var _ = Describe("NetAttachDefTemplate", func() { }, }, `{ - "cniVersion": "1.0.0", + "cniVersion": "1.1.0", "type": "ovn-k8s-cni-overlay", "name": "mynamespace_test-net", "netAttachDefName": "mynamespace/test-net", @@ -403,7 +403,7 @@ var _ = Describe("NetAttachDefTemplate", func() { }, }, `{ - "cniVersion": "1.0.0", + "cniVersion": "1.1.0", "type": "ovn-k8s-cni-overlay", "name": "mynamespace_test-net", "netAttachDefName": "mynamespace/test-net", @@ -429,7 +429,7 @@ var _ = Describe("NetAttachDefTemplate", func() { }, }, `{ - "cniVersion": "1.0.0", + "cniVersion": "1.1.0", "type": "ovn-k8s-cni-overlay", "name": "mynamespace_test-net", "netAttachDefName": "mynamespace/test-net", @@ -490,7 +490,7 @@ var _ = Describe("NetAttachDefTemplate", func() { }, }, `{ - "cniVersion": "1.0.0", + "cniVersion": "1.1.0", "type": "ovn-k8s-cni-overlay", "name": "cluster_udn_test-net", "netAttachDefName": "mynamespace/test-net", @@ -514,7 +514,7 @@ var _ = Describe("NetAttachDefTemplate", func() { }, }, `{ - "cniVersion": "1.0.0", + "cniVersion": "1.1.0", "type": "ovn-k8s-cni-overlay", "name": "cluster_udn_test-net", "netAttachDefName": "mynamespace/test-net", @@ -541,7 +541,7 @@ var _ = Describe("NetAttachDefTemplate", func() { }, }, `{ - "cniVersion": "1.0.0", + "cniVersion": "1.1.0", "type": "ovn-k8s-cni-overlay", "name": "cluster_udn_test-net", "netAttachDefName": "mynamespace/test-net", @@ -567,7 +567,7 @@ var _ = Describe("NetAttachDefTemplate", func() { }, }, `{ - "cniVersion": "1.0.0", + "cniVersion": "1.1.0", "type": "ovn-k8s-cni-overlay", "name": "cluster_udn_test-net", "netAttachDefName": "mynamespace/test-net", @@ -594,7 +594,7 @@ var _ = Describe("NetAttachDefTemplate", func() { }, }, `{ - "cniVersion": "1.0.0", + "cniVersion": "1.1.0", "type": "ovn-k8s-cni-overlay", "name": "cluster_udn_test-net", "netAttachDefName": "mynamespace/test-net", @@ -623,7 +623,7 @@ var _ = Describe("NetAttachDefTemplate", func() { }, }, `{ - "cniVersion": "1.0.0", + "cniVersion": "1.1.0", "type": "ovn-k8s-cni-overlay", "name": "cluster_udn_test-net", "netAttachDefName": "mynamespace/test-net", @@ -655,7 +655,7 @@ var _ = Describe("NetAttachDefTemplate", func() { }, }, `{ - "cniVersion": "1.0.0", + "cniVersion": "1.1.0", "type": "ovn-k8s-cni-overlay", "name": "cluster_udn_test-net", "netAttachDefName": "mynamespace/test-net", @@ -695,7 +695,7 @@ var _ = Describe("NetAttachDefTemplate", func() { }, }, `{ - "cniVersion": "1.0.0", + "cniVersion": "1.1.0", "type": "ovn-k8s-cni-overlay", "name": "cluster_udn_test-net", "netAttachDefName": "mynamespace/test-net", @@ -736,7 +736,7 @@ var _ = Describe("NetAttachDefTemplate", func() { }, }, `{ - "cniVersion": "1.0.0", + "cniVersion": "1.1.0", "type": "ovn-k8s-cni-overlay", "name": "cluster_udn_test-net", "netAttachDefName": "mynamespace/test-net", @@ -778,7 +778,7 @@ var _ = Describe("NetAttachDefTemplate", func() { }, }, `{ - "cniVersion": "1.0.0", + "cniVersion": "1.1.0", "type": "ovn-k8s-cni-overlay", "name": "cluster_udn_test-net", "netAttachDefName": "mynamespace/test-net", diff --git a/go-controller/pkg/cni/cni.go b/go-controller/pkg/cni/cni.go index 2a7b71b77d..98151304bd 100644 --- a/go-controller/pkg/cni/cni.go +++ b/go-controller/pkg/cni/cni.go @@ -35,6 +35,8 @@ var ( BandwidthNotFound = ¬FoundError{} ) +const dpuNotReadyMsg = "DPU Not Ready" + type direction int func (d direction) String() string { @@ -457,7 +459,12 @@ func HandlePodRequest( response, err = request.cmdDel(clientset) case CNICheck: err = request.cmdCheck() + case CNIUpdate: + // No-op update path today + case CNIStatus: + // handled by DPU health check gating before reaching here default: + err = fmt.Errorf("unsupported CNI command %s", request.Command) } if response != nil { diff --git a/go-controller/pkg/cni/cniserver.go b/go-controller/pkg/cni/cniserver.go index 19378f4483..c2474f0b97 100644 --- a/go-controller/pkg/cni/cniserver.go +++ b/go-controller/pkg/cni/cniserver.go @@ -4,12 +4,14 @@ import ( "context" "encoding/base64" "encoding/json" + "errors" "fmt" "io" "net/http" "strings" "time" + cnitypes "github.com/containernetworking/cni/pkg/types" "github.com/gorilla/mux" nadv1Listers "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/listers/k8s.cni.cncf.io/v1" @@ -27,6 +29,8 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" ) +const kubeletDefaultCRIOperationTimeout = 2 * time.Minute + // *** The Server is PRIVATE API between OVN components and may be // changed at any time. It is in no way a supported interface or API. *** // @@ -58,6 +62,7 @@ func NewCNIServer( kclient kubernetes.Interface, networkManager networkmanager.Interface, ovsClient client.Client, + dpuHealth DPUStatusProvider, ) (*Server, error) { var nadLister nadv1Listers.NetworkAttachmentDefinitionLister @@ -88,6 +93,7 @@ func NewCNIServer( handlePodRequestFunc: HandlePodRequest, networkManager: networkManager, ovsClient: ovsClient, + dpuHealth: dpuHealth, } if len(config.Kubernetes.CAData) > 0 { @@ -99,6 +105,15 @@ func NewCNIServer( router.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { result, err := s.handleCNIRequest(r) if err != nil { + var cniErr *cnitypes.Error + if errors.As(err, &cniErr) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusBadRequest) + if encodeErr := json.NewEncoder(w).Encode(cniErr); encodeErr != nil { + klog.Warningf("Failed to write CNI error response: %v", encodeErr) + } + return + } http.Error(w, fmt.Sprintf("%v", err), http.StatusBadRequest) return } @@ -141,7 +156,22 @@ func cniRequestToPodRequest(cr *Request) (*PodRequest, error) { } req := &PodRequest{ - Command: command(cmd), + Command: command(cmd), + timestamp: time.Now(), + } + + conf, err := config.ReadCNIConfig(cr.Config) + if err != nil { + return nil, fmt.Errorf("broken stdin args") + } + req.CNIConf = conf + req.deviceInfo = cr.DeviceInfo + + // STATUS requests do not carry pod-specific context. Return early after validating config. + if req.Command == CNIStatus { + // Match the Kubelet default CRI operation timeout of 2m. + req.ctx, req.cancel = context.WithTimeout(context.Background(), kubeletDefaultCRIOperationTimeout) + return req, nil } req.SandboxID, ok = cr.Env["CNI_CONTAINERID"] @@ -182,11 +212,6 @@ func cniRequestToPodRequest(cr *Request) (*PodRequest, error) { // containerd 1.5: https://github.com/containerd/containerd/pull/5643 req.PodUID = cniArgs["K8S_POD_UID"] - conf, err := config.ReadCNIConfig(cr.Config) - if err != nil { - return nil, fmt.Errorf("broken stdin args") - } - // the first network to the Pod is always named as `default`, // capture the effective NAD Name here req.netName = conf.Name @@ -211,11 +236,8 @@ func cniRequestToPodRequest(cr *Request) (*PodRequest, error) { } } - req.CNIConf = conf - req.deviceInfo = cr.DeviceInfo - req.timestamp = time.Now() - // Match the Kubelet default CRI operation timeout of 2m - req.ctx, req.cancel = context.WithTimeout(context.Background(), 2*time.Minute) + // Match the Kubelet default CRI operation timeout of 2m. + req.ctx, req.cancel = context.WithTimeout(context.Background(), kubeletDefaultCRIOperationTimeout) return req, nil } @@ -233,10 +255,18 @@ func (s *Server) handleCNIRequest(r *http.Request) ([]byte, error) { } defer req.cancel() + if err := s.checkDPUHealth(req); err != nil { + return nil, err + } + result, err := s.handlePodRequestFunc(req, s.clientSet, s.kubeAuth, s.networkManager, s.ovsClient) if err != nil { // Prefix error with request information for easier debugging - return nil, fmt.Errorf("%s %v", req, err) + var cniErr *cnitypes.Error + if !errors.As(err, &cniErr) { + err = fmt.Errorf("%s %w", req, err) + } + return nil, err } return result, nil } @@ -258,3 +288,27 @@ func (s *Server) handleCNIMetrics(w http.ResponseWriter, r *http.Request) { klog.Warningf("Error writing %s HTTP response for metrics post", err) } } + +func (s *Server) checkDPUHealth(req *PodRequest) error { + if s.dpuHealth == nil || config.OvnKubeNode.Mode != types.NodeModeDPUHost { + return nil + } + + if req.Command != CNIAdd && req.Command != CNIStatus { + return nil + } + + ready, reason := s.dpuHealth.Ready() + if ready { + return nil + } + + msg := dpuNotReadyMsg + if reason != "" { + msg = fmt.Sprintf("%s: %s", msg, reason) + } + if req.Command == CNIStatus { + return &cnitypes.Error{Code: 50, Msg: msg} + } + return fmt.Errorf("%s", msg) +} diff --git a/go-controller/pkg/cni/cniserver_test.go b/go-controller/pkg/cni/cniserver_test.go index c070616dbc..cb0507a323 100644 --- a/go-controller/pkg/cni/cniserver_test.go +++ b/go-controller/pkg/cni/cniserver_test.go @@ -28,6 +28,7 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/networkmanager" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" ) @@ -57,7 +58,7 @@ var expectedResult cnitypes.Result func serverHandleCNI(request *PodRequest, _ *ClientSet, _ *KubeAPIAuth, _ networkmanager.Interface, _ client.Client) ([]byte, error) { if request.Command == CNIAdd { return json.Marshal(&expectedResult) - } else if request.Command == CNIDel || request.Command == CNIUpdate || request.Command == CNICheck { + } else if request.Command == CNIDel || request.Command == CNIUpdate || request.Command == CNICheck || request.Command == CNIStatus { return nil, nil } return nil, fmt.Errorf("unhandled CNI command %v", request.Command) @@ -103,7 +104,7 @@ func TestCNIServer(t *testing.T) { if err != nil { t.Fatalf("failed to call newOVSClientWithExternalIDs: %v", err) } - s, err := NewCNIServer(wf, fakeClient, networkmanager.Default().Interface(), ovsClient) + s, err := NewCNIServer(wf, fakeClient, networkmanager.Default().Interface(), ovsClient, nil) if err != nil { t.Fatalf("error creating CNI server: %v", err) } @@ -166,7 +167,7 @@ func TestCNIServer(t *testing.T) { Config: []byte(cniConfig), DeviceInfo: nadapi.DeviceInfo{ Type: "vdpa", - Version: "1.0.0", + Version: "1.1.0", Vdpa: &nadapi.VdpaDevice{ ParentDevice: "vdpa:0000:65:00.3", Driver: "vhost", @@ -218,6 +219,17 @@ func TestCNIServer(t *testing.T) { }, result: nil, }, + // STATUS request + { + name: "STATUS", + request: &Request{ + Env: map[string]string{ + "CNI_COMMAND": string(CNIStatus), + }, + Config: []byte(cniConfig), + }, + result: nil, + }, // Missing CNI_ARGS { name: "ARGS1", @@ -287,3 +299,108 @@ func TestCNIServer(t *testing.T) { } } } + +func TestCNIServerStatusNotReady(t *testing.T) { + tmpDir, err := utiltesting.MkTmpdir("cniserver-status") + if err != nil { + t.Fatalf("failed to create temp directory: %v", err) + } + defer os.RemoveAll(tmpDir) + + socketPath := filepath.Join(tmpDir, serverSocketName) + fakeClient := fake.NewSimpleClientset() + err = config.PrepareTestConfig() + if err != nil { + t.Fatalf("failed to prepare test config: %v", err) + } + fakeClientset := &util.OVNNodeClientset{ + KubeClient: fakeClient, + } + wf, err := factory.NewNodeWatchFactory(fakeClientset, nodeName) + if err != nil { + t.Fatalf("failed to create watch factory: %v", err) + } + if err := wf.Start(); err != nil { + t.Fatalf("failed to start watch factory: %v", err) + } + + ovsClient, err := newOVSClientWithExternalIDs(map[string]string{}) + if err != nil { + t.Fatalf("failed to call newOVSClientWithExternalIDs: %v", err) + } + dpuHealth := &fakeDPUHealth{ready: false, reason: "lease expired"} + s, err := NewCNIServer(wf, fakeClient, networkmanager.Default().Interface(), ovsClient, dpuHealth) + if err != nil { + t.Fatalf("error creating CNI server: %v", err) + } + if err := s.Start(tmpDir); err != nil { + t.Fatalf("error starting CNI server: %v", err) + } + + client := &http.Client{ + Transport: &http.Transport{ + Dial: func(_, _ string) (net.Conn, error) { + return net.Dial("unix", socketPath) + }, + }, + } + + testcases := []struct { + name string + mode string + expectCode int + expectErr bool + }{ + { + name: "DPUHostNotReady", + mode: types.NodeModeDPUHost, + expectCode: http.StatusBadRequest, + expectErr: true, + }, + { + name: "FullModeIgnoresHealth", + mode: types.NodeModeFull, + expectCode: http.StatusOK, + expectErr: false, + }, + } + + for _, tc := range testcases { + config.OvnKubeNode.Mode = tc.mode + body, code := clientDoCNI(t, client, &Request{ + Env: map[string]string{ + "CNI_COMMAND": string(CNIStatus), + }, + Config: []byte(cniConfig), + }) + if code != tc.expectCode { + t.Fatalf("[%s] expected status %v but got %v", tc.name, tc.expectCode, code) + } + if tc.expectErr { + var cniErr cnitypes.Error + if err := json.Unmarshal(body, &cniErr); err != nil { + t.Fatalf("[%s] failed to unmarshal error response: %v", tc.name, err) + } + if cniErr.Code != 50 { + t.Fatalf("[%s] expected CNI error code 50 but got %d", tc.name, cniErr.Code) + } + if !strings.Contains(cniErr.Msg, dpuNotReadyMsg) { + t.Fatalf("[%s] expected error to mention DPU not ready, got %q", tc.name, cniErr.Msg) + } + if !strings.Contains(cniErr.Msg, "lease expired") { + t.Fatalf("[%s] expected error to include lease reason, got %q", tc.name, cniErr.Msg) + } + } else if len(body) != 0 { + t.Fatalf("[%s] expected empty body for success, got %q", tc.name, string(body)) + } + } +} + +type fakeDPUHealth struct { + ready bool + reason string +} + +func (f *fakeDPUHealth) Ready() (bool, string) { + return f.ready, f.reason +} diff --git a/go-controller/pkg/cni/cnishim.go b/go-controller/pkg/cni/cnishim.go index df6edaad2a..4828eb2ee0 100644 --- a/go-controller/pkg/cni/cnishim.go +++ b/go-controller/pkg/cni/cnishim.go @@ -98,6 +98,10 @@ func (p *Plugin) doCNI(url string, req interface{}) ([]byte, error) { } if resp.StatusCode != 200 { + var cniErr types.Error + if err := json.Unmarshal(body, &cniErr); err == nil && cniErr.Code != 0 { + return nil, &cniErr + } return nil, fmt.Errorf("CNI request failed with status %v: '%s'", resp.StatusCode, string(body)) } @@ -339,6 +343,51 @@ func (p *Plugin) CmdDel(args *skel.CmdArgs) error { return err } +// CmdStatus is the callback for plugin readiness checks +func (p *Plugin) CmdStatus(args *skel.CmdArgs) error { + var err error + + startTime := time.Now() + defer func() { + p.postMetrics(startTime, CNIStatus, err) + if err != nil { + klog.Errorf("Error on CmdStatus: %v", err) + } + }() + + conf, err := config.ReadCNIConfig(args.StdinData) + if err != nil { + return err + } + setupLogging(conf) + + req := newCNIRequest(args, nadapi.DeviceInfo{}) + _, err = p.doCNIFunc("http://dummy/", req) + return err +} + +// CmdGC is the callback for runtime garbage collection. +func (p *Plugin) CmdGC(args *skel.CmdArgs) error { + var err error + + startTime := time.Now() + defer func() { + p.postMetrics(startTime, CNIGC, err) + if err != nil { + klog.Errorf("Error on CmdGC: %v", err) + } + }() + + conf, err := config.ReadCNIConfig(args.StdinData) + if err != nil { + return err + } + setupLogging(conf) + + // OVN-Kubernetes does not maintain independent local plugin state that needs GC. + return nil +} + // CmdCheck is the callback for 'checking' container's networking is as expected. func (p *Plugin) CmdCheck(_ *skel.CmdArgs) error { // noop...CMD check is not considered useful, and has a considerable performance impact diff --git a/go-controller/pkg/cni/cnishim_test.go b/go-controller/pkg/cni/cnishim_test.go index 4793feeec5..633b7093fa 100644 --- a/go-controller/pkg/cni/cnishim_test.go +++ b/go-controller/pkg/cni/cnishim_test.go @@ -49,7 +49,7 @@ func TestCmdAdd_PrivilegedMode(t *testing.T) { }() args := &skel.CmdArgs{ - StdinData: []byte(`{"cniVersion":"1.0.0","name":"mynet","type":"ovn-k8s-cni-overlay"}`), + StdinData: []byte(`{"cniVersion":"1.1.0","name":"mynet","type":"ovn-k8s-cni-overlay"}`), ContainerID: "cid", Netns: "/var/run/netns/test", IfName: "eth0", @@ -66,7 +66,7 @@ func TestCmdAdd_PrivilegedMode(t *testing.T) { } expected := `{ - "cniVersion": "1.0.0", + "cniVersion": "1.1.0", "interfaces": [ { "name": "serverWired" @@ -156,7 +156,7 @@ func TestCmdAdd_UnprivilegedMode(t *testing.T) { }() args := &skel.CmdArgs{ - StdinData: []byte(`{"cniVersion":"1.0.0","name":"mynet","type":"ovn-k8s-cni-overlay"}`), + StdinData: []byte(`{"cniVersion":"1.1.0","name":"mynet","type":"ovn-k8s-cni-overlay"}`), ContainerID: "cid", Netns: "/var/run/netns/test", IfName: "eth0", @@ -178,7 +178,7 @@ func TestCmdAdd_UnprivilegedMode(t *testing.T) { // Expected output includes both interfaces wired by CNIShim expected := `{ - "cniVersion": "1.0.0", + "cniVersion": "1.1.0", "interfaces": [ { "name": "eth0", @@ -232,7 +232,7 @@ func TestCmdDel_PrivilegedMode(t *testing.T) { } args := &skel.CmdArgs{ - StdinData: []byte(`{"cniVersion":"1.0.0","name":"mynet","type":"ovn-k8s-cni-overlay"}`), + StdinData: []byte(`{"cniVersion":"1.1.0","name":"mynet","type":"ovn-k8s-cni-overlay"}`), ContainerID: "cid", Netns: "/var/run/netns/test", IfName: "eth0", @@ -277,7 +277,7 @@ func TestCmdDel_UnprivilegedMode(t *testing.T) { } args := &skel.CmdArgs{ - StdinData: []byte(`{"cniVersion":"1.0.0","name":"mynet","type":"ovn-k8s-cni-overlay"}`), + StdinData: []byte(`{"cniVersion":"1.1.0","name":"mynet","type":"ovn-k8s-cni-overlay"}`), ContainerID: "cid", Netns: "/var/run/netns/test", IfName: "eth0", @@ -291,6 +291,16 @@ func TestCmdDel_UnprivilegedMode(t *testing.T) { }) } +func TestCmdGC(t *testing.T) { + p := &Plugin{} + args := &skel.CmdArgs{ + StdinData: []byte(`{"cniVersion":"1.1.0","name":"mynet","type":"ovn-k8s-cni-overlay"}`), + } + + err := p.CmdGC(args) + require.NoError(t, err) +} + func withCNIEnv(t *testing.T, fn func()) { t.Helper() diff --git a/go-controller/pkg/cni/helper_linux_test.go b/go-controller/pkg/cni/helper_linux_test.go index 0b99996770..8e1221ed93 100644 --- a/go-controller/pkg/cni/helper_linux_test.go +++ b/go-controller/pkg/cni/helper_linux_test.go @@ -1240,7 +1240,7 @@ func TestPodRequest_deletePodConntrack(t *testing.T) { }, }, inpPrevResult: ¤t.Result{ - CNIVersion: "1.0.0", + CNIVersion: "1.1.0", Interfaces: []*current.Interface{{Name: "eth0"}}, IPs: []*current.IPConfig{{Interface: &[]int{0}[0], Address: *ovntest.MustParseIPNet("192.168.1.15/24"), Gateway: ovntest.MustParseIP("192.168.1.1")}}, }, @@ -1255,7 +1255,7 @@ func TestPodRequest_deletePodConntrack(t *testing.T) { }, }, inpPrevResult: ¤t.Result{ - CNIVersion: "1.0.0", + CNIVersion: "1.1.0", Interfaces: []*current.Interface{{Name: "eth0", Sandbox: "blah"}}, IPs: []*current.IPConfig{{Interface: &[]int{0}[0], Address: *ovntest.MustParseIPNet("192.168.1.15/24"), Gateway: ovntest.MustParseIP("192.168.1.1")}}, }, diff --git a/go-controller/pkg/cni/types.go b/go-controller/pkg/cni/types.go index 866d5e2749..22866aa193 100644 --- a/go-controller/pkg/cni/types.go +++ b/go-controller/pkg/cni/types.go @@ -81,6 +81,12 @@ const CNIDel command = "DEL" // CNICheck is the command representing check operation on a pod const CNICheck command = "CHECK" +// CNIStatus is the command representing a plugin readiness check +const CNIStatus command = "STATUS" + +// CNIGC is the command representing CNI runtime garbage collection +const CNIGC command = "GC" + // Request sent to the Server by the OVN CNI plugin type Request struct { // CNI environment variables, like CNI_COMMAND and CNI_NETNS @@ -201,6 +207,11 @@ func NewClientSet(kclient kubernetes.Interface, podLister corev1listers.PodListe } } +// DPUStatusProvider reports whether the DPU is ready to service CNI requests. +type DPUStatusProvider interface { + Ready() (bool, string) +} + // Server object that listens for JSON-marshaled Request objects // on a private root-only Unix domain socket. type Server struct { @@ -210,4 +221,5 @@ type Server struct { kubeAuth *KubeAPIAuth networkManager networkmanager.Interface ovsClient client.Client + dpuHealth DPUStatusProvider } diff --git a/go-controller/pkg/cni/udn/primary_network.go b/go-controller/pkg/cni/udn/primary_network.go index c751a6cbda..8ac6fecfc6 100644 --- a/go-controller/pkg/cni/udn/primary_network.go +++ b/go-controller/pkg/cni/udn/primary_network.go @@ -152,6 +152,10 @@ func (p *UserDefinedPrimaryNetwork) ensureActiveNetwork(namespace string) error if err != nil { return err } + // CNI should always have an active network for a pod on our node + if activeNetwork == nil { + return fmt.Errorf("no active network found for namespace %s", namespace) + } if activeNetwork.IsDefault() { return fmt.Errorf("missing primary user defined network NAD for namespace '%s'", namespace) } diff --git a/go-controller/pkg/config/cni.go b/go-controller/pkg/config/cni.go index 3bec2d286f..1038b4ac8d 100644 --- a/go-controller/pkg/config/cni.go +++ b/go-controller/pkg/config/cni.go @@ -19,13 +19,16 @@ import ( var ErrorAttachDefNotOvnManaged = errors.New("net-attach-def not managed by OVN") var ErrorChainingNotSupported = errors.New("CNI plugin chaining is not supported") +// CNISpecVersion is the CNI spec version used when OVN-Kubernetes renders CNI config. +const CNISpecVersion = "1.1.0" + // WriteCNIConfig writes a CNI JSON config file to directory given by global config // if the file doesn't already exist, or is different than the content that would // be written. func WriteCNIConfig() error { netConf := &ovncnitypes.NetConf{ NetConf: types.NetConf{ - CNIVersion: "0.4.0", + CNIVersion: CNISpecVersion, Name: "ovn-kubernetes", Type: CNI.Plugin, }, diff --git a/go-controller/pkg/config/config.go b/go-controller/pkg/config/config.go index 55c985a187..c3f8270858 100644 --- a/go-controller/pkg/config/config.go +++ b/go-controller/pkg/config/config.go @@ -236,7 +236,9 @@ var ( // OvnKubeNode holds ovnkube-node parsed config file parameters and command-line overrides OvnKubeNode = OvnKubeNodeConfig{ - Mode: types.NodeModeFull, + Mode: types.NodeModeFull, + DPUNodeLeaseRenewInterval: 10, + DPUNodeLeaseDuration: 40, } ClusterManager = ClusterManagerConfig{ @@ -507,6 +509,7 @@ type OVNKubernetesFeatureConfig struct { EnableServiceTemplateSupport bool `gcfg:"enable-svc-template-support"` EnableObservability bool `gcfg:"enable-observability"` EnableNetworkQoS bool `gcfg:"enable-network-qos"` + AllowICMPNetworkPolicy bool `gcfg:"allow-icmp-network-policy"` // This feature requires a kernel fix https://github.com/torvalds/linux/commit/7f3287db654395f9c5ddd246325ff7889f550286 // to work on a kind cluster. Flag allows to disable it for current CI, will be turned on when github runners have this fix. AdvertisedUDNIsolationMode string `gcfg:"advertised-udn-isolation-mode"` @@ -639,9 +642,11 @@ type HybridOverlayConfig struct { // OvnKubeNodeConfig holds ovnkube-node configurations type OvnKubeNodeConfig struct { - Mode string `gcfg:"mode"` - MgmtPortNetdev string `gcfg:"mgmt-port-netdev"` - MgmtPortDPResourceName string `gcfg:"mgmt-port-dp-resource-name"` + Mode string `gcfg:"mode"` + MgmtPortNetdev string `gcfg:"mgmt-port-netdev"` + MgmtPortDPResourceName string `gcfg:"mgmt-port-dp-resource-name"` + DPUNodeLeaseRenewInterval int `gcfg:"dpu-node-lease-renew-interval"` + DPUNodeLeaseDuration int `gcfg:"dpu-node-lease-duration"` } // ClusterManagerConfig holds configuration for ovnkube-cluster-manager @@ -1267,6 +1272,12 @@ var OVNK8sFeatureFlags = []cli.Flag{ Destination: &cliConfig.OVNKubernetesFeature.EnableStatelessNetPol, Value: OVNKubernetesFeature.EnableStatelessNetPol, }, + &cli.BoolFlag{ + Name: "allow-icmp-network-policy", + Usage: "Allow ICMP/ICMPv6 traffic to bypass NetworkPolicy default-deny rules.", + Destination: &cliConfig.OVNKubernetesFeature.AllowICMPNetworkPolicy, + Value: OVNKubernetesFeature.AllowICMPNetworkPolicy, + }, &cli.BoolFlag{ Name: "enable-interconnect", Usage: "Enable interconnecting multiple zones.", @@ -1832,6 +1843,18 @@ var OvnKubeNodeFlags = []cli.Flag{ Value: OvnKubeNode.MgmtPortDPResourceName, Destination: &cliConfig.OvnKubeNode.MgmtPortDPResourceName, }, + &cli.IntFlag{ + Name: "dpu-node-lease-renew-interval", + Usage: "Interval in seconds at which the DPU updates its custom node lease. Set to 0 to disable DPU health checking", + Value: OvnKubeNode.DPUNodeLeaseRenewInterval, + Destination: &cliConfig.OvnKubeNode.DPUNodeLeaseRenewInterval, + }, + &cli.IntFlag{ + Name: "dpu-node-lease-duration", + Usage: "Lease duration in seconds before the DPU is considered unhealthy", + Value: OvnKubeNode.DPUNodeLeaseDuration, + Destination: &cliConfig.OvnKubeNode.DPUNodeLeaseDuration, + }, } // ClusterManagerFlags captures ovnkube-cluster-manager specific configurations @@ -3182,6 +3205,17 @@ func buildOvnKubeNodeConfig(cli, file *config) error { return fmt.Errorf("hybrid overlay is not supported with ovnkube-node mode %s", OvnKubeNode.Mode) } + if OvnKubeNode.DPUNodeLeaseRenewInterval < 0 { + return fmt.Errorf("invalid dpu-node-lease-renew-interval '%d'. must be >= 0", OvnKubeNode.DPUNodeLeaseRenewInterval) + } + if OvnKubeNode.DPUNodeLeaseDuration <= 0 { + return fmt.Errorf("invalid dpu-node-lease-duration '%d'. must be > 0", OvnKubeNode.DPUNodeLeaseDuration) + } + if OvnKubeNode.DPUNodeLeaseDuration <= OvnKubeNode.DPUNodeLeaseRenewInterval { + return fmt.Errorf("invalid dpu-node-lease-duration '%d'. must be > dpu-node-lease-renew-interval '%d'", + OvnKubeNode.DPUNodeLeaseDuration, OvnKubeNode.DPUNodeLeaseRenewInterval) + } + // Warn the user if both MgmtPortNetdev and MgmtPortDPResourceName are specified since they // configure the management port. if OvnKubeNode.MgmtPortNetdev != "" && OvnKubeNode.MgmtPortDPResourceName != "" { diff --git a/go-controller/pkg/config/config_test.go b/go-controller/pkg/config/config_test.go index 2a108e39d6..ae8258581c 100644 --- a/go-controller/pkg/config/config_test.go +++ b/go-controller/pkg/config/config_test.go @@ -1974,12 +1974,16 @@ udn-allowed-default-services= ns/svc, ns1/svc1 It("Overrides value from Config file", func() { cliConfig := config{ OvnKubeNode: OvnKubeNodeConfig{ - Mode: types.NodeModeFull, + Mode: types.NodeModeFull, + DPUNodeLeaseDuration: OvnKubeNode.DPUNodeLeaseDuration, + DPUNodeLeaseRenewInterval: OvnKubeNode.DPUNodeLeaseRenewInterval, }, } file := config{ OvnKubeNode: OvnKubeNodeConfig{ - Mode: types.NodeModeDPU, + Mode: types.NodeModeDPU, + DPUNodeLeaseDuration: OvnKubeNode.DPUNodeLeaseDuration, + DPUNodeLeaseRenewInterval: OvnKubeNode.DPUNodeLeaseRenewInterval, }, } err := buildOvnKubeNodeConfig(&cliConfig, &file) @@ -1990,9 +1994,11 @@ udn-allowed-default-services= ns/svc, ns1/svc1 It("Overrides value from CLI", func() { cliConfig := config{ OvnKubeNode: OvnKubeNodeConfig{ - Mode: types.NodeModeDPUHost, - MgmtPortNetdev: "enp1s0f0v0", - MgmtPortDPResourceName: "openshift.io/mgmtvf", + Mode: types.NodeModeDPUHost, + MgmtPortNetdev: "enp1s0f0v0", + MgmtPortDPResourceName: "openshift.io/mgmtvf", + DPUNodeLeaseRenewInterval: 5, + DPUNodeLeaseDuration: 20, }, } err := buildOvnKubeNodeConfig(&cliConfig, &config{}) @@ -2000,6 +2006,8 @@ udn-allowed-default-services= ns/svc, ns1/svc1 gomega.Expect(OvnKubeNode.Mode).To(gomega.Equal(types.NodeModeDPUHost)) gomega.Expect(OvnKubeNode.MgmtPortNetdev).To(gomega.Equal("enp1s0f0v0")) gomega.Expect(OvnKubeNode.MgmtPortDPResourceName).To(gomega.Equal("openshift.io/mgmtvf")) + gomega.Expect(OvnKubeNode.DPUNodeLeaseRenewInterval).To(gomega.Equal(5)) + gomega.Expect(OvnKubeNode.DPUNodeLeaseDuration).To(gomega.Equal(20)) }) It("Fails with unsupported mode", func() { @@ -2026,14 +2034,71 @@ udn-allowed-default-services= ns/svc, ns1/svc1 "hybrid overlay is not supported with ovnkube-node mode")) }) + It("Fails if DPU node lease renew interval is negative", func() { + cliConfig := config{ + OvnKubeNode: OvnKubeNodeConfig{ + Mode: types.NodeModeFull, + DPUNodeLeaseRenewInterval: -1, + DPUNodeLeaseDuration: OvnKubeNode.DPUNodeLeaseDuration, + }, + } + err := buildOvnKubeNodeConfig(&cliConfig, &config{OvnKubeNode: OvnKubeNode}) + gomega.Expect(err).To(gomega.HaveOccurred()) + gomega.Expect(err.Error()).To(gomega.ContainSubstring("dpu-node-lease-renew-interval")) + }) + + It("Succeeds if DPU node lease renew interval is zero", func() { + cliConfig := config{ + OvnKubeNode: OvnKubeNodeConfig{ + Mode: types.NodeModeFull, + DPUNodeLeaseRenewInterval: 0, + DPUNodeLeaseDuration: 10, + }, + } + err := buildOvnKubeNodeConfig(&cliConfig, &config{OvnKubeNode: OvnKubeNode}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + gomega.Expect(OvnKubeNode.DPUNodeLeaseRenewInterval).To(gomega.Equal(0)) + gomega.Expect(OvnKubeNode.DPUNodeLeaseDuration).To(gomega.Equal(10)) + }) + + It("Fails if DPU node lease duration is non-positive", func() { + cliConfig := config{ + OvnKubeNode: OvnKubeNodeConfig{ + Mode: types.NodeModeFull, + DPUNodeLeaseDuration: 0, + }, + } + err := buildOvnKubeNodeConfig(&cliConfig, &config{OvnKubeNode: OvnKubeNode}) + gomega.Expect(err).To(gomega.HaveOccurred()) + gomega.Expect(err.Error()).To(gomega.ContainSubstring("dpu-node-lease-duration")) + }) + + It("Fails if DPU node lease duration is less than or equal to renew interval", func() { + cliConfig := config{ + OvnKubeNode: OvnKubeNodeConfig{ + Mode: types.NodeModeFull, + DPUNodeLeaseRenewInterval: 10, + DPUNodeLeaseDuration: 10, + }, + } + err := buildOvnKubeNodeConfig(&cliConfig, &config{OvnKubeNode: OvnKubeNode}) + gomega.Expect(err).To(gomega.HaveOccurred()) + gomega.Expect(err.Error()).To(gomega.Or( + gomega.ContainSubstring("dpu-node-lease-duration"), + gomega.ContainSubstring("dpu-node-lease-renew-interval"), + )) + }) + It("Fails if management port is provided and ovnkube node mode is dpu", func() { cliConfig := config{ OvnKubeNode: OvnKubeNodeConfig{ - Mode: types.NodeModeDPU, - MgmtPortNetdev: "enp1s0f0v0", + Mode: types.NodeModeDPU, + MgmtPortNetdev: "enp1s0f0v0", + DPUNodeLeaseDuration: OvnKubeNode.DPUNodeLeaseDuration, + DPUNodeLeaseRenewInterval: OvnKubeNode.DPUNodeLeaseRenewInterval, }, } - err := buildOvnKubeNodeConfig(&cliConfig, &config{}) + err := buildOvnKubeNodeConfig(&cliConfig, &config{OvnKubeNode: OvnKubeNode}) gomega.Expect(err).To(gomega.HaveOccurred()) gomega.Expect(err.Error()).To(gomega.ContainSubstring("ovnkube-node-mgmt-port-netdev or ovnkube-node-mgmt-port-dp-resource-name must not be provided")) }) @@ -2041,10 +2106,12 @@ udn-allowed-default-services= ns/svc, ns1/svc1 It("Fails if management port is not provided and ovnkube node mode is dpu-host", func() { cliConfig := config{ OvnKubeNode: OvnKubeNodeConfig{ - Mode: types.NodeModeDPUHost, + Mode: types.NodeModeDPUHost, + DPUNodeLeaseDuration: OvnKubeNode.DPUNodeLeaseDuration, + DPUNodeLeaseRenewInterval: OvnKubeNode.DPUNodeLeaseRenewInterval, }, } - err := buildOvnKubeNodeConfig(&cliConfig, &config{}) + err := buildOvnKubeNodeConfig(&cliConfig, &config{OvnKubeNode: OvnKubeNode}) gomega.Expect(err).To(gomega.HaveOccurred()) gomega.Expect(err.Error()).To(gomega.ContainSubstring("ovnkube-node-mgmt-port-netdev or ovnkube-node-mgmt-port-dp-resource-name must be provided")) }) @@ -2052,13 +2119,17 @@ udn-allowed-default-services= ns/svc, ns1/svc1 It("Succeeds if management netdev provided in the full mode", func() { cliConfig := config{ OvnKubeNode: OvnKubeNodeConfig{ - Mode: types.NodeModeFull, - MgmtPortNetdev: "ens1f0v0", + Mode: types.NodeModeFull, + MgmtPortNetdev: "ens1f0v0", + DPUNodeLeaseDuration: OvnKubeNode.DPUNodeLeaseDuration, + DPUNodeLeaseRenewInterval: OvnKubeNode.DPUNodeLeaseRenewInterval, }, } file := config{ OvnKubeNode: OvnKubeNodeConfig{ - Mode: types.NodeModeFull, + Mode: types.NodeModeFull, + DPUNodeLeaseDuration: OvnKubeNode.DPUNodeLeaseDuration, + DPUNodeLeaseRenewInterval: OvnKubeNode.DPUNodeLeaseRenewInterval, }, } err := buildOvnKubeNodeConfig(&cliConfig, &file) @@ -2068,13 +2139,17 @@ udn-allowed-default-services= ns/svc, ns1/svc1 It("Succeeds if management port device plugin resource name provided in the full mode", func() { cliConfig := config{ OvnKubeNode: OvnKubeNodeConfig{ - Mode: types.NodeModeFull, - MgmtPortDPResourceName: "openshift.io/mgmtvf", + Mode: types.NodeModeFull, + MgmtPortDPResourceName: "openshift.io/mgmtvf", + DPUNodeLeaseDuration: OvnKubeNode.DPUNodeLeaseDuration, + DPUNodeLeaseRenewInterval: OvnKubeNode.DPUNodeLeaseRenewInterval, }, } file := config{ OvnKubeNode: OvnKubeNodeConfig{ - Mode: types.NodeModeFull, + Mode: types.NodeModeFull, + DPUNodeLeaseDuration: OvnKubeNode.DPUNodeLeaseDuration, + DPUNodeLeaseRenewInterval: OvnKubeNode.DPUNodeLeaseRenewInterval, }, } err := buildOvnKubeNodeConfig(&cliConfig, &file) diff --git a/go-controller/pkg/controllermanager/controller_manager.go b/go-controller/pkg/controllermanager/controller_manager.go index 61a342f77a..eac0c84651 100644 --- a/go-controller/pkg/controllermanager/controller_manager.go +++ b/go-controller/pkg/controllermanager/controller_manager.go @@ -92,15 +92,17 @@ func (cm *ControllerManager) NewNetworkController(nInfo util.NetInfo) (networkma return nil, fmt.Errorf("topology type %s not supported", topoType) } -// newDummyNetworkController creates a dummy network controller used to clean up specific network -func (cm *ControllerManager) newDummyNetworkController(topoType, netName string) (networkmanager.NetworkController, error) { +// newDummyNetworkController creates a dummy network controller used to clean up specific network. +// role is the NetworkRoleExternalID from stale OVN entities (e.g. "primary" or "secondary") so that +// the dummy's netInfo.IsPrimaryNetwork() is correct for Layer2 gateway cleanup. +func (cm *ControllerManager) newDummyNetworkController(topoType, netName, role string) (networkmanager.NetworkController, error) { // Pass a shallow clone of the watch factory, this allows multiplexing // informers for user-defined Networks. cnci, err := cm.newCommonNetworkControllerInfo(cm.watchFactory.ShallowClone()) if err != nil { return nil, fmt.Errorf("failed to create network controller info %w", err) } - netInfo, _ := util.NewNetInfo(&ovncnitypes.NetConf{NetConf: types.NetConf{Name: netName}, Topology: topoType}) + netInfo, _ := util.NewNetInfo(&ovncnitypes.NetConf{NetConf: types.NetConf{Name: netName}, Topology: topoType, Role: role}) switch topoType { case ovntypes.Layer3Topology: return ovn.NewLayer3UserDefinedNetworkController(cnci, netInfo, cm.networkManager.Interface(), cm.routeImportManager, cm.eIPController, cm.portCache) @@ -112,33 +114,38 @@ func (cm *ControllerManager) newDummyNetworkController(topoType, netName string) return nil, fmt.Errorf("topology type %s not supported", topoType) } -// Find all the OVN logical switches/routers for the secondary networks -func findAllSecondaryNetworkLogicalEntities(nbClient libovsdbclient.Client) ([]*nbdb.LogicalSwitch, +// findAllUserDefinedNetworkLogicalEntities returns all OVN logical switches and +// routers that belong to user-defined networks (primary or secondary). Same +// predicate as original: entities have NetworkExternalID and NetworkRoleExternalID +// (TopologyExternalID always co-exists with NetworkExternalID per CleanupStaleNetworks). +// Caller reads role and topoType from entity ExternalIDs for dummy controller creation. +// Used on controller restart to remove stale entities for deleted UDNs. +func findAllUserDefinedNetworkLogicalEntities(nbClient libovsdbclient.Client) ([]*nbdb.LogicalSwitch, []*nbdb.LogicalRouter, error) { - belongsToSecondaryNetwork := func(externalIDs map[string]string) bool { + belongsToUserDefinedNetwork := func(externalIDs map[string]string) bool { _, hasNetworkExternalID := externalIDs[ovntypes.NetworkExternalID] - networkRole, hasNetworkRoleExternalID := externalIDs[ovntypes.NetworkRoleExternalID] - return hasNetworkExternalID && hasNetworkRoleExternalID && networkRole == ovntypes.NetworkRoleSecondary + _, hasNetworkRoleExternalID := externalIDs[ovntypes.NetworkRoleExternalID] + return hasNetworkExternalID && hasNetworkRoleExternalID } p1 := func(item *nbdb.LogicalSwitch) bool { - return belongsToSecondaryNetwork(item.ExternalIDs) + return belongsToUserDefinedNetwork(item.ExternalIDs) } - nodeSwitches, err := libovsdbops.FindLogicalSwitchesWithPredicate(nbClient, p1) + switches, err := libovsdbops.FindLogicalSwitchesWithPredicate(nbClient, p1) if err != nil { - klog.Errorf("Failed to get all logical switches of secondary network error: %v", err) + klog.Errorf("Failed to get all logical switches of user-defined networks: %v", err) return nil, nil, err } p2 := func(item *nbdb.LogicalRouter) bool { - return belongsToSecondaryNetwork(item.ExternalIDs) + return belongsToUserDefinedNetwork(item.ExternalIDs) } - clusterRouters, err := libovsdbops.FindLogicalRoutersWithPredicate(nbClient, p2) + routers, err := libovsdbops.FindLogicalRoutersWithPredicate(nbClient, p2) if err != nil { - klog.Errorf("Failed to get all distributed logical routers: %v", err) + klog.Errorf("Failed to get all logical routers of user-defined networks: %v", err) return nil, nil, err } - return nodeSwitches, clusterRouters, nil + return switches, routers, nil } func (cm *ControllerManager) GetDefaultNetworkController() networkmanager.ReconcilableNetworkController { @@ -155,8 +162,9 @@ func (cm *ControllerManager) CleanupStaleNetworks(validNetworks ...util.NetInfo) } } - // Get all the existing secondary networks and its logical entities - switches, routers, err := findAllSecondaryNetworkLogicalEntities(cm.nbClient) + // Get all the existing user-defined network logical entities (primary and secondary). + // For a given network, all switches/routers have the same role external ID (primary or secondary). + switches, routers, err := findAllUserDefinedNetworkLogicalEntities(cm.nbClient) if err != nil { return err } @@ -170,11 +178,15 @@ func (cm *ControllerManager) CleanupStaleNetworks(validNetworks ...util.NetInfo) // network still exists, no cleanup to do continue } + role := ls.ExternalIDs[ovntypes.NetworkRoleExternalID] + if _, ok := staleNetworkControllers[netName]; ok { + // already have a dummy controller for this network (from an earlier entity) + continue + } // Create dummy network controllers to clean up logical entities klog.V(5).Infof("Found stale %s network %s", topoType, netName) - if oc, err := cm.newDummyNetworkController(topoType, netName); err == nil { + if oc, err := cm.newDummyNetworkController(topoType, netName, role); err == nil { staleNetworkControllers[netName] = oc - continue } } for _, lr := range routers { @@ -185,11 +197,15 @@ func (cm *ControllerManager) CleanupStaleNetworks(validNetworks ...util.NetInfo) // network still exists, no cleanup to do continue } + role := lr.ExternalIDs[ovntypes.NetworkRoleExternalID] + if _, ok := staleNetworkControllers[netName]; ok { + // already have a dummy controller for this network (from an earlier entity) + continue + } // Create dummy network controllers to clean up logical entities klog.V(5).Infof("Found stale %s network %s", topoType, netName) - if oc, err := cm.newDummyNetworkController(topoType, netName); err == nil { + if oc, err := cm.newDummyNetworkController(topoType, netName, role); err == nil { staleNetworkControllers[netName] = oc - continue } } diff --git a/go-controller/pkg/metrics/metrics.go b/go-controller/pkg/metrics/metrics.go index 89b0fa896f..e574a9c468 100644 --- a/go-controller/pkg/metrics/metrics.go +++ b/go-controller/pkg/metrics/metrics.go @@ -2,11 +2,9 @@ package metrics import ( "context" - "crypto/tls" "fmt" "io" "net/http" - "net/http/pprof" "os" "path" "regexp" @@ -16,11 +14,8 @@ import ( "time" "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promhttp" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - utilruntime "k8s.io/apimachinery/pkg/util/runtime" - utilwait "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/kubernetes" "k8s.io/klog/v2" @@ -417,26 +412,6 @@ func CheckPodRunsOnGivenNode(clientset kubernetes.Interface, labels []string, k8 strings.Join(labels, ","), k8sNodeName) } -// using the cyrpto/tls module's GetCertificate() callback function helps in picking up -// the latest certificate (due to cert rotation on cert expiry) -func getTLSServer(addr, certFile, privKeyFile string, handler http.Handler) *http.Server { - tlsConfig := &tls.Config{ - GetCertificate: func(_ *tls.ClientHelloInfo) (*tls.Certificate, error) { - cert, err := tls.LoadX509KeyPair(certFile, privKeyFile) - if err != nil { - return nil, fmt.Errorf("error generating x509 certs for metrics TLS endpoint: %v", err) - } - return &cert, nil - }, - } - server := &http.Server{ - Addr: addr, - Handler: handler, - TLSConfig: tlsConfig, - } - return server -} - // stringFlagSetterFunc is a func used for setting string type flag. type stringFlagSetterFunc func(string) (string, error) @@ -482,25 +457,27 @@ func writePlainText(statusCode int, text string, w http.ResponseWriter) { fmt.Fprintln(w, text) } -// StartMetricsServer runs the prometheus listener so that OVN K8s metrics can be collected -// It puts the endpoint behind TLS if certFile and keyFile are defined. +// StartMetricsServer runs the prometheus listener so that OVN K8s metrics can be collected. +// It now reuses the unified MetricServer implementation so it can share plumbing with the +// OVN/OVS metrics server. TLS and pprof behaviour remain unchanged. func StartMetricsServer(bindAddress string, enablePprof bool, certFile string, keyFile string, stopChan <-chan struct{}, wg *sync.WaitGroup) { - mux := http.NewServeMux() - mux.Handle("/metrics", promhttp.Handler()) - - if enablePprof { - mux.HandleFunc("/debug/pprof/", pprof.Index) - mux.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline) - mux.HandleFunc("/debug/pprof/profile", pprof.Profile) - mux.HandleFunc("/debug/pprof/symbol", pprof.Symbol) - mux.HandleFunc("/debug/pprof/trace", pprof.Trace) - - // Allow changes to log level at runtime - mux.HandleFunc("/debug/flags/v", stringFlagPutHandler(klogSetter)) + opts := MetricServerOptions{ + BindAddress: bindAddress, + CertFile: certFile, + KeyFile: keyFile, + EnablePprof: enablePprof, + // Use default registry so existing metric registrations keep working. + Registerer: prometheus.DefaultRegisterer, } - startMetricsServer(bindAddress, certFile, keyFile, mux, stopChan, wg) + server := NewMetricServer(opts, nil, nil) + + wg.Add(1) + go func() { + defer wg.Done() + server.Run(stopChan) + }() } // StartOVNMetricsServer runs the prometheus listener so that OVN metrics can be collected @@ -522,40 +499,3 @@ func StartOVNMetricsServer(opts MetricServerOptions, return metricsServer } - -func startMetricsServer(bindAddress, certFile, keyFile string, handler http.Handler, stopChan <-chan struct{}, wg *sync.WaitGroup) { - var server *http.Server - wg.Add(1) - go func() { - defer wg.Done() - utilwait.Until(func() { - klog.Infof("Starting metrics server at address %q", bindAddress) - var listenAndServe func() error - if certFile != "" && keyFile != "" { - server = getTLSServer(bindAddress, certFile, keyFile, handler) - listenAndServe = func() error { return server.ListenAndServeTLS("", "") } - } else { - server = &http.Server{Addr: bindAddress, Handler: handler} - listenAndServe = func() error { return server.ListenAndServe() } - } - - errCh := make(chan error) - go func() { - errCh <- listenAndServe() - }() - var err error - select { - case err = <-errCh: - err = fmt.Errorf("failed while running metrics server at address %q: %w", bindAddress, err) - utilruntime.HandleError(err) - case <-stopChan: - klog.Infof("Stopping metrics server at address %q", bindAddress) - shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() - if err := server.Shutdown(shutdownCtx); err != nil { - klog.Errorf("Error stopping metrics server at address %q: %v", bindAddress, err) - } - } - }, 5*time.Second, stopChan) - }() -} diff --git a/go-controller/pkg/metrics/ovn.go b/go-controller/pkg/metrics/ovn.go index a243aec0ee..cb266f2026 100644 --- a/go-controller/pkg/metrics/ovn.go +++ b/go-controller/pkg/metrics/ovn.go @@ -364,7 +364,7 @@ func updateSBDBConnectionMetric(ovsAppctl ovsClient) { } // RegisterOvnControllerMetrics registers the ovn-controller metrics -func RegisterOvnControllerMetrics(ovsDBClient libovsdbclient.Client, ovnRegistry *prometheus.Registry) { +func RegisterOvnControllerMetrics(ovsDBClient libovsdbclient.Client, ovnRegistry prometheus.Registerer) { getOvnControllerVersionInfo() ovnRegistry.MustRegister(prometheus.NewGaugeFunc( prometheus.GaugeOpts{ diff --git a/go-controller/pkg/metrics/ovn_db.go b/go-controller/pkg/metrics/ovn_db.go index e42fa1be3f..7a9cfdd0f7 100644 --- a/go-controller/pkg/metrics/ovn_db.go +++ b/go-controller/pkg/metrics/ovn_db.go @@ -359,7 +359,7 @@ func getOvnDbVersionInfo() { } } -func RegisterOvnDBMetrics(ovnRegistry *prometheus.Registry) ([]*util.OvsDbProperties, bool, bool) { +func RegisterOvnDBMetrics(ovnRegistry prometheus.Registerer) ([]*util.OvsDbProperties, bool, bool) { // get the ovsdb server version info getOvnDbVersionInfo() // register metrics that will be served off of /metrics path diff --git a/go-controller/pkg/metrics/server.go b/go-controller/pkg/metrics/server.go index 88d04ce2a4..ca5a23680a 100644 --- a/go-controller/pkg/metrics/server.go +++ b/go-controller/pkg/metrics/server.go @@ -6,6 +6,7 @@ import ( "errors" "fmt" "net/http" + "net/http/pprof" "time" "github.com/prometheus/client_golang/prometheus" @@ -35,11 +36,15 @@ type MetricServerOptions struct { EnableOVNDBMetrics bool EnableOVNControllerMetrics bool EnableOVNNorthdMetrics bool + EnablePprof bool // OnFatalError is called when an unrecoverable error occurs (e.g., failed to bind to address). // If set, it allows the caller to trigger a graceful shutdown. OnFatalError func() + // Prometheus plumbing + Registerer prometheus.Registerer + // Kubernetes integration K8sClient kubernetes.Interface K8sNodeName string @@ -63,35 +68,48 @@ type MetricServer struct { server *http.Server mux *http.ServeMux - // Prometheus registries - ovnRegistry *prometheus.Registry + // Prometheus registry + registerer prometheus.Registerer } // NewMetricServer creates a new MetricServer instance func NewMetricServer(opts MetricServerOptions, ovsDBClient libovsdbclient.Client, kubeClient kubernetes.Interface) *MetricServer { - // Create server instance + registerer := opts.Registerer + if registerer == nil { + registerer = prometheus.NewRegistry() + } + server := &MetricServer{ opts: opts, ovsDBClient: ovsDBClient, - ovnRegistry: prometheus.NewRegistry(), + registerer: registerer, kubeClient: kubeClient, } server.mux = http.NewServeMux() - metricsHandler := promhttp.HandlerForTransactional( - prometheus.ToTransactionalGatherer(server.ovnRegistry), - promhttp.HandlerOpts{}, - ) + tg := prometheus.ToTransactionalGatherer(server.registerer.(prometheus.Gatherer)) + metricsHandler := promhttp.HandlerForTransactional(tg, promhttp.HandlerOpts{}) + server.mux.Handle("/metrics", promhttp.InstrumentMetricHandler( - server.ovnRegistry, + server.registerer, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { // Update metrics in the registry before emitting them. server.handleMetrics(r) - // Emit the updated metrics using the transactional handler. metricsHandler.ServeHTTP(w, r) }), )) + if opts.EnablePprof { + server.mux.HandleFunc("/debug/pprof/", pprof.Index) + server.mux.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline) + server.mux.HandleFunc("/debug/pprof/profile", pprof.Profile) + server.mux.HandleFunc("/debug/pprof/symbol", pprof.Symbol) + server.mux.HandleFunc("/debug/pprof/trace", pprof.Trace) + + // Allow changes to log level at runtime + server.mux.HandleFunc("/debug/flags/v", stringFlagPutHandler(klogSetter)) + } + return server } @@ -99,32 +117,32 @@ func NewMetricServer(opts MetricServerOptions, ovsDBClient libovsdbclient.Client func (s *MetricServer) registerMetrics() { if s.opts.EnableOVSMetrics { klog.Infof("MetricServer registers OVS metrics") - registerOvsMetrics(s.ovsDBClient, s.ovnRegistry) + registerOvsMetrics(s.ovsDBClient, s.registerer) } if s.opts.EnableOVNDBMetrics { klog.Infof("MetricServer registers OVN DB metrics") - s.ovsDbProperties, s.opts.dbIsClustered, s.opts.dbFoundViaPath = RegisterOvnDBMetrics(s.ovnRegistry) + s.ovsDbProperties, s.opts.dbIsClustered, s.opts.dbFoundViaPath = RegisterOvnDBMetrics(s.registerer) } if s.opts.EnableOVNControllerMetrics { klog.Infof("MetricServer registers OVN Controller metrics") - RegisterOvnControllerMetrics(s.ovsDBClient, s.ovnRegistry) + RegisterOvnControllerMetrics(s.ovsDBClient, s.registerer) } if s.opts.EnableOVNNorthdMetrics { klog.Infof("MetricServer registers OVN Northd metrics") - RegisterOvnNorthdMetrics(s.ovnRegistry) + RegisterOvnNorthdMetrics(s.registerer) } } func (s *MetricServer) EnableOVNNorthdMetrics() { s.opts.EnableOVNNorthdMetrics = true klog.Infof("MetricServer registers OVN Northd metrics") - RegisterOvnNorthdMetrics(s.ovnRegistry) + RegisterOvnNorthdMetrics(s.registerer) } func (s *MetricServer) EnableOVNDBMetrics() { s.opts.EnableOVNDBMetrics = true klog.Infof("MetricServer registers OVN DB metrics") - s.ovsDbProperties, s.opts.dbIsClustered, s.opts.dbFoundViaPath = RegisterOvnDBMetrics(s.ovnRegistry) + s.ovsDbProperties, s.opts.dbIsClustered, s.opts.dbFoundViaPath = RegisterOvnDBMetrics(s.registerer) } // updateOvsMetrics updates the OVS metrics @@ -226,6 +244,7 @@ func (s *MetricServer) Run(stopChan <-chan struct{}) { errCh := make(chan error) go func() { + klog.Infof("Metric Server starts to listen on %s", s.opts.BindAddress) errCh <- listenAndServe() }() diff --git a/go-controller/pkg/metrics/server_test.go b/go-controller/pkg/metrics/server_test.go index 32d4144dd8..c02bfd1188 100644 --- a/go-controller/pkg/metrics/server_test.go +++ b/go-controller/pkg/metrics/server_test.go @@ -13,6 +13,7 @@ import ( "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" + "github.com/prometheus/client_golang/prometheus" "github.com/spf13/afero" "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" @@ -48,7 +49,7 @@ func TestNewMetricServerRunAndShutdown(t *testing.T) { server := NewMetricServer(opts, ovsDBClient, kubeClient) require.NotNil(t, server, "Server should not be nil") require.NotNil(t, server.mux, "Server mux should not be nil") - require.NotNil(t, server.ovnRegistry, "Server OVN registry should not be nil") + require.NotNil(t, server.registerer, "Server registerer should not be nil") // Start server in background serverDone := make(chan struct{}) @@ -109,7 +110,7 @@ func TestNewMetricServerRunAndFailOnFatalError(t *testing.T) { server := NewMetricServer(opts, ovsDBClient, kubeClient) require.NotNil(t, server, "Server should not be nil") require.NotNil(t, server.mux, "Server mux should not be nil") - require.NotNil(t, server.ovnRegistry, "Server OVN registry should not be nil") + require.NotNil(t, server.registerer, "Server registerer should not be nil") // Start server in background serverDone := make(chan struct{}) @@ -316,6 +317,7 @@ type metricsTestCase struct { enableOVNDB bool enableOVNController bool enableOVNNorthd bool + registerer prometheus.Registerer mockRunCommands []ovntest.TestifyMockHelper expectedMetrics []string } @@ -379,6 +381,11 @@ func TestHandleMetrics(t *testing.T) { } defer libovsdbCleanup.Cleanup() + // Register OVN-Kube controller base metrics into the default registry, so the + // metrics in default registry can be tested. + RegisterOVNKubeControllerBase() + MetricOVNKubeControllerSyncDuration.WithLabelValues("pods").Set(0) + testCases := []metricsTestCase{ { name: "OVS metrics", @@ -778,6 +785,56 @@ func TestHandleMetrics(t *testing.T) { "promhttp_metric_handler_requests_total", }, }, + { + name: "default registry metrics", + registerer: prometheus.DefaultRegisterer, + expectedMetrics: []string{ + "ovnkube_controller_leader", + "ovnkube_controller_ready_duration_seconds", + "ovnkube_controller_sync_duration_seconds", + "ovnkube_controller_build_info", + "go_gc_duration_seconds", + "go_gc_gogc_percent", + "go_gc_gomemlimit_bytes", + "go_goroutines", + "go_info", + "go_memstats_alloc_bytes", + "go_memstats_alloc_bytes_total", + "go_memstats_buck_hash_sys_bytes", + "go_memstats_frees_total", + "go_memstats_gc_sys_bytes", + "go_memstats_heap_alloc_bytes", + "go_memstats_heap_idle_bytes", + "go_memstats_heap_inuse_bytes", + "go_memstats_heap_objects", + "go_memstats_heap_released_bytes", + "go_memstats_heap_sys_bytes", + "go_memstats_last_gc_time_seconds", + "go_memstats_mallocs_total", + "go_memstats_mcache_inuse_bytes", + "go_memstats_mcache_sys_bytes", + "go_memstats_mspan_inuse_bytes", + "go_memstats_mspan_sys_bytes", + "go_memstats_next_gc_bytes", + "go_memstats_other_sys_bytes", + "go_memstats_stack_inuse_bytes", + "go_memstats_stack_sys_bytes", + "go_memstats_sys_bytes", + "go_sched_gomaxprocs_threads", + "go_threads", + "process_cpu_seconds_total", + "process_max_fds", + "process_network_receive_bytes_total", + "process_network_transmit_bytes_total", + "process_open_fds", + "process_resident_memory_bytes", + "process_start_time_seconds", + "process_virtual_memory_bytes", + "process_virtual_memory_max_bytes", + "promhttp_metric_handler_requests_in_flight", + "promhttp_metric_handler_requests_total", + }, + }, } for _, tc := range testCases { @@ -789,6 +846,7 @@ func TestHandleMetrics(t *testing.T) { EnableOVNDBMetrics: tc.enableOVNDB, EnableOVNControllerMetrics: tc.enableOVNController, EnableOVNNorthdMetrics: tc.enableOVNNorthd, + Registerer: tc.registerer, } // Mock the exec runner for RunOvsVswitchdAppCtl calls mockCmd := new(mock_k8s_io_utils_exec.Cmd) @@ -813,8 +871,8 @@ func TestHandleMetrics(t *testing.T) { server := NewMetricServer(opts, ovsDBClient, kubeClient) server.registerMetrics() - // iterate s.ovnRegistry to list all registered metrics' names - regMetrics, err := server.ovnRegistry.Gather() + // Iterate server registry to list all registered metric names. + regMetrics, err := server.registerer.(prometheus.Gatherer).Gather() if err != nil { t.Fatalf("Failed to gather metrics: %v", err) } diff --git a/go-controller/pkg/networkmanager/api.go b/go-controller/pkg/networkmanager/api.go index 7c47997276..1581afcfa2 100644 --- a/go-controller/pkg/networkmanager/api.go +++ b/go-controller/pkg/networkmanager/api.go @@ -43,11 +43,13 @@ type watchFactory interface { // information to the rest of the project. type Interface interface { // GetActiveNetworkForNamespace returns a copy of the primary network for - // the namespace if any or the default network otherwise. If there is a - // primary UDN defined but the NAD has not been processed yet, returns - // ErrNetworkControllerTopologyNotManaged. Used for controllers that are not - // capable of reconciling primary network changes. If unsure, use this one - // and not GetActiveNetworkForNamespaceFast. + // the namespace if any or the default network otherwise. + // If the network is non-existent for a legitimate reason (namespace gone or + // filtered by Dynamic UDN) it returns nil NetInfo and no error. + // If the network is non-existent, but should exist, return InvalidPrimaryNetworkError. + // If unsure, use this one and not GetActiveNetworkForNamespaceFast. + // Note this function is filtered by Dynamic UDN, so if your caller wants NAD/Network + // information without D-UDN filtering, use GetPrimaryNADForNamespace. GetActiveNetworkForNamespace(namespace string) (util.NetInfo, error) // GetActiveNetworkForNamespaceFast returns the primary network for the @@ -61,6 +63,7 @@ type Interface interface { // GetPrimaryNADForNamespace returns the full namespaced key of the // primary NAD for the given namespace, if one exists. // Returns default network if namespace has no primary UDN. + // This function is not filtered based on Dynamic UDN. GetPrimaryNADForNamespace(namespace string) (string, error) // GetNetwork returns the network of the given name or nil if unknown diff --git a/go-controller/pkg/networkmanager/egressip_tracker.go b/go-controller/pkg/networkmanager/egressip_tracker.go index be941b9bcd..9b86bb2cce 100644 --- a/go-controller/pkg/networkmanager/egressip_tracker.go +++ b/go-controller/pkg/networkmanager/egressip_tracker.go @@ -250,7 +250,7 @@ func (t *EgressIPTrackerController) reconcileNamespace(key string) error { primaryNAD, err := t.primaryNADForNamespace(ns.Name) if err != nil { - if util.IsUnprocessedActiveNetworkError(err) { + if util.IsInvalidPrimaryNetworkError(err) { // Namespace requires a primary network but none exists yet; NAD controller will requeue. return nil } @@ -380,5 +380,5 @@ func (t *EgressIPTrackerController) getPrimaryNADForNamespaceFromLister(namespac } // The namespace declared it needs a primary UDN but none exists yet. - return "", util.NewUnprocessedActiveNetworkError(namespace, "") + return "", util.NewInvalidPrimaryNetworkError(namespace) } diff --git a/go-controller/pkg/networkmanager/egressip_tracker_test.go b/go-controller/pkg/networkmanager/egressip_tracker_test.go index e82d2b9813..68069b5e6a 100644 --- a/go-controller/pkg/networkmanager/egressip_tracker_test.go +++ b/go-controller/pkg/networkmanager/egressip_tracker_test.go @@ -278,6 +278,15 @@ func TestEgressIPTrackerControllerWithInformer(t *testing.T) { }, metav1.CreateOptions{}) g.Expect(err).NotTo(gomega.HaveOccurred()) + // Mirror production ordering: NAD controller notifies registered reconcilers + // after the primary NAD is observed, so namespace reconcile isn't dropped due + // to a transient "primary not found" window in informer caches. + primaryNADKey := util.GetNADName(tt.namespace, "primary") + g.Eventually(func() (string, error) { + return tracker.primaryNADForNamespace(tt.namespace) + }, 2*time.Second, 100*time.Millisecond).Should(gomega.Equal(primaryNADKey)) + tracker.NADReconciler().Reconcile(primaryNADKey) + // Expect add events g.Eventually(func() []callbackEvent { gotMu.Lock() diff --git a/go-controller/pkg/networkmanager/nad_controller.go b/go-controller/pkg/networkmanager/nad_controller.go index b282535f93..29a2099c5c 100644 --- a/go-controller/pkg/networkmanager/nad_controller.go +++ b/go-controller/pkg/networkmanager/nad_controller.go @@ -31,7 +31,6 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util/errors" - utiludn "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util/udn" ) // nadController handles namespaced scoped NAD events and @@ -839,6 +838,10 @@ func (c *nadController) nadNeedsUpdate(oldNAD, newNAD *nettypes.NetworkAttachmen oldNAD.Annotations[types.OvnNetworkNameAnnotation] != newNAD.Annotations[types.OvnNetworkNameAnnotation] } +// GetActiveNetworkForNamespace attempts to get the netInfo of a primary active network where this OVNK instance is running. +// Returns DefaultNetwork if Network Segmentation disabled or namespace does not require primary UDN. +// Returns nil if there is no active network. +// Returns InvalidPrimaryNetworkError if a network should be present but is not. func (c *nadController) GetActiveNetworkForNamespace(namespace string) (util.NetInfo, error) { if !util.IsNetworkSegmentationSupportEnabled() { return &util.DefaultNetInfo{}, nil @@ -847,6 +850,10 @@ func (c *nadController) GetActiveNetworkForNamespace(namespace string) (util.Net // check if required UDN label is on namespace ns, err := c.namespaceLister.Get(namespace) if err != nil { + if apierrors.IsNotFound(err) { + // namespace is gone, no active network for it + return nil, nil + } return nil, fmt.Errorf("failed to get namespace %q: %w", namespace, err) } if _, exists := ns.Labels[types.RequiredUDNNamespaceLabel]; !exists { @@ -854,49 +861,26 @@ func (c *nadController) GetActiveNetworkForNamespace(namespace string) (util.Net return &util.DefaultNetInfo{}, nil } - network, nad := c.getActiveNetworkForNamespace(namespace) + // primary UDN territory, check if our NAD controller to see if it has processed the network and if the + // network manager has rendered the network + network, primaryNAD := c.getActiveNetworkForNamespace(namespace) if network != nil && network.IsPrimaryNetwork() { - // primary UDN found + // primary UDN network found in network controller copy := util.NewMutableNetInfo(network) - copy.SetNADs(nad) + copy.SetNADs(primaryNAD) return copy, nil } - // no primary UDN found, make sure we just haven't processed it yet and no UDN / CUDN exists - udns, err := c.udnLister.UserDefinedNetworks(namespace).List(labels.Everything()) - if err != nil { - return nil, fmt.Errorf("error getting user defined networks: %w", err) - } - for _, udn := range udns { - if utiludn.IsPrimaryNetwork(&udn.Spec) { - return nil, util.NewUnprocessedActiveNetworkError(namespace, udn.Name) - } - } - cudns, err := c.cudnLister.List(labels.Everything()) - if err != nil { - return nil, fmt.Errorf("failed to list CUDNs: %w", err) - } - for _, cudn := range cudns { - if !utiludn.IsPrimaryNetwork(&cudn.Spec.Network) { - continue - } - // check the subject namespace referred by the specified namespace-selector - cudnNamespaceSelector, err := metav1.LabelSelectorAsSelector(&cudn.Spec.NamespaceSelector) - if err != nil { - return nil, fmt.Errorf("failed to convert CUDN %q namespaceSelector: %w", cudn.Name, err) - } - selectedNamespaces, err := c.namespaceLister.List(cudnNamespaceSelector) - if err != nil { - return nil, fmt.Errorf("failed to list namespaces using selector %q: %w", cudnNamespaceSelector, err) - } - for _, ns := range selectedNamespaces { - if ns.Name == namespace { - return nil, util.NewUnprocessedActiveNetworkError(namespace, cudn.Name) - } + // no network exists in the network manager + if primaryNAD != "" { + if config.OVNKubernetesFeature.EnableDynamicUDNAllocation { + // primary NAD exists, no network, and DUDN is enabled, treat this like the network doesn't exist + return nil, nil } + // primary NAD exists, but missing in network manager. This should never happen. + panic(fmt.Sprintf("NAD Controller broken consistency with Network Manager for primary NAD: %s", primaryNAD)) } - // namespace has required UDN label, but no UDN was found return nil, util.NewInvalidPrimaryNetworkError(namespace) } @@ -907,8 +891,11 @@ func (c *nadController) GetActiveNetworkForNamespaceFast(namespace string) util. // GetPrimaryNADForNamespace returns the full namespaced key of the // primary NAD for the given namespace, if one exists. -// Returns default network if namespace has no primary UDN +// Returns default network if namespace has no primary UDN or Network Segmentation is disabled func (c *nadController) GetPrimaryNADForNamespace(namespace string) (string, error) { + if !util.IsNetworkSegmentationSupportEnabled() { + return types.DefaultNetworkName, nil + } c.RLock() primary := c.primaryNADs[namespace] c.RUnlock() @@ -927,7 +914,7 @@ func (c *nadController) GetPrimaryNADForNamespace(namespace string) (string, err } if _, exists := ns.Labels[types.RequiredUDNNamespaceLabel]; exists { // Namespace promises a primary UDN, but we haven't cached one yet. - return "", util.NewUnprocessedActiveNetworkError(namespace, "") + return "", util.NewInvalidPrimaryNetworkError(namespace) } // No required label: means default network only. diff --git a/go-controller/pkg/networkmanager/pod_tracker.go b/go-controller/pkg/networkmanager/pod_tracker.go index 4a300dd099..3a682d41ed 100644 --- a/go-controller/pkg/networkmanager/pod_tracker.go +++ b/go-controller/pkg/networkmanager/pod_tracker.go @@ -191,7 +191,7 @@ func (c *PodTrackerController) getPrimaryNADForNamespaceFromLister(namespace str return util.GetNADName(nad.Namespace, nad.Name), nil } } - return "", util.NewUnprocessedActiveNetworkError(namespace, "") + return "", util.NewInvalidPrimaryNetworkError(namespace) } // syncAll builds the cache on initial controller start diff --git a/go-controller/pkg/node/base_node_network_controller_dpu.go b/go-controller/pkg/node/base_node_network_controller_dpu.go index db79e35c39..cdcbae6c2f 100644 --- a/go-controller/pkg/node/base_node_network_controller_dpu.go +++ b/go-controller/pkg/node/base_node_network_controller_dpu.go @@ -104,6 +104,7 @@ func (bnnc *BaseNodeNetworkController) watchPodsDPU() (*factory.Handler, error) return bnnc.watchFactory.AddPodHandler(cache.ResourceEventHandlerFuncs{ AddFunc: func(obj interface{}) { var activeNetwork util.NetInfo + var err error pod := obj.(*corev1.Pod) klog.V(5).Infof("Add for Pod: %s/%s for network %s", pod.Namespace, pod.Name, netName) @@ -116,22 +117,16 @@ func (bnnc *BaseNodeNetworkController) watchPodsDPU() (*factory.Handler, error) nadToDPUCDMap := map[string]*util.DPUConnectionDetails{} if bnnc.IsUserDefinedNetwork() { if bnnc.IsPrimaryNetwork() { - // check to see if the primary NAD is even applicable to our controller - foundNamespaceNAD, err := bnnc.networkManager.GetPrimaryNADForNamespace(pod.Namespace) + activeNetwork, err = bnnc.networkManager.GetActiveNetworkForNamespace(pod.Namespace) if err != nil { - klog.Errorf("Failed to get primary network NAD for namespace %s: %v", pod.Namespace, err) - return - } - if foundNamespaceNAD == types.DefaultNetworkName { + klog.Errorf("Failed looking for the active network for namespace %s: %v", pod.Namespace, err) return } - networkName := bnnc.networkManager.GetNetworkNameForNADKey(foundNamespaceNAD) - if networkName != "" && networkName != netName { + if activeNetwork == nil { + klog.Errorf("Unable to find an active network for namespace %s", pod.Namespace) return } - activeNetwork, err = bnnc.networkManager.GetActiveNetworkForNamespace(pod.Namespace) - if err != nil { - klog.Errorf("Failed looking for the active network for namespace %s: %v", pod.Namespace, err) + if activeNetwork.GetNetworkName() != netName { return } } diff --git a/go-controller/pkg/node/controllers/egressip/egressip.go b/go-controller/pkg/node/controllers/egressip/egressip.go index 08726875a3..bcbe568ddb 100644 --- a/go-controller/pkg/node/controllers/egressip/egressip.go +++ b/go-controller/pkg/node/controllers/egressip/egressip.go @@ -567,6 +567,10 @@ func (c *Controller) processEIP(eip *eipv1.EgressIP) (*eIPConfig, sets.Set[strin if err != nil { return nil, selectedNamespaces, selectedPods, selectedNamespacesPodIPs, fmt.Errorf("failed to get active network for namespace %s: %v", namespace.Name, err) } + if netInfo == nil { + // no active network + continue + } if netInfo.IsUserDefinedNetwork() { // EIP for secondary host interfaces is not supported for secondary networks continue @@ -1036,6 +1040,10 @@ func (c *Controller) repairNode() error { if err != nil { return fmt.Errorf("failed to get active network for namespace %s: %v", namespace.Name, err) } + if netInfo == nil { + // no active network + continue + } if netInfo.IsUserDefinedNetwork() { // EIP for secondary host interfaces is not supported for secondary networks continue @@ -1142,8 +1150,12 @@ func (c *Controller) migrateFromAddrLabelToAnnotation() error { if err != nil { return err } - node.Annotations[util.OVNNodeSecondaryHostEgressIPs] = string(patch) - return c.kube.UpdateNodeStatus(node) + nodeToUpdate := node.DeepCopy() + if nodeToUpdate.Annotations == nil { + nodeToUpdate.Annotations = map[string]string{} + } + nodeToUpdate.Annotations[util.OVNNodeSecondaryHostEgressIPs] = string(patch) + return c.kube.UpdateNodeStatus(nodeToUpdate) }) } @@ -1174,8 +1186,12 @@ func (c *Controller) addIPToAnnotation(ip string) error { if err != nil { return err } - node.Annotations[util.OVNNodeSecondaryHostEgressIPs] = string(patch) - return c.kube.UpdateNodeStatus(node) + nodeToUpdate := node.DeepCopy() + if nodeToUpdate.Annotations == nil { + nodeToUpdate.Annotations = map[string]string{} + } + nodeToUpdate.Annotations[util.OVNNodeSecondaryHostEgressIPs] = string(patch) + return c.kube.UpdateNodeStatus(nodeToUpdate) }) } @@ -1206,8 +1222,12 @@ func (c *Controller) deleteIPFromAnnotation(ip string) error { if err != nil { return err } - node.Annotations[util.OVNNodeSecondaryHostEgressIPs] = string(patch) - return c.kube.UpdateNodeStatus(node) + nodeToUpdate := node.DeepCopy() + if nodeToUpdate.Annotations == nil { + nodeToUpdate.Annotations = map[string]string{} + } + nodeToUpdate.Annotations[util.OVNNodeSecondaryHostEgressIPs] = string(patch) + return c.kube.UpdateNodeStatus(nodeToUpdate) }) } diff --git a/go-controller/pkg/node/default_node_network_controller.go b/go-controller/pkg/node/default_node_network_controller.go index 512aa6fb53..55ed5a3f78 100644 --- a/go-controller/pkg/node/default_node_network_controller.go +++ b/go-controller/pkg/node/default_node_network_controller.go @@ -38,6 +38,7 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/networkmanager" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/controllers/egressip" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/controllers/egressservice" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/dpulease" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/linkmanager" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/managementport" nodenft "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/nftables" @@ -126,6 +127,8 @@ type DefaultNodeNetworkController struct { // retry framework for nodes, used for updating routes/nftables rules for node PMTUD guarding retryNodes *retry.RetryFramework + dpuNodeLeaseManager *dpulease.Manager + apbExternalRouteNodeController *apbroute.ExternalGatewayNodeController cniServer *cni.Server @@ -740,6 +743,22 @@ func (nc *DefaultNodeNetworkController) Init(ctx context.Context) error { return fmt.Errorf("failed to parse kubernetes node IP address. %v", nodeAddrStr) } + if (config.OvnKubeNode.Mode == types.NodeModeDPUHost || config.OvnKubeNode.Mode == types.NodeModeDPU) && + config.OvnKubeNode.DPUNodeLeaseRenewInterval > 0 { + nc.dpuNodeLeaseManager = dpulease.NewManager( + nc.client, + config.Kubernetes.OVNConfigNamespace, + node, + time.Duration(config.OvnKubeNode.DPUNodeLeaseRenewInterval)*time.Second, + time.Duration(config.OvnKubeNode.DPUNodeLeaseDuration)*time.Second, + ) + if config.OvnKubeNode.Mode == types.NodeModeDPUHost { + if _, err := nc.dpuNodeLeaseManager.EnsureLease(ctx); err != nil { + return err + } + } + } + // Make sure that the node zone matches with the Southbound db zone. // Wait for 300s before giving up var sbZone string @@ -814,7 +833,7 @@ func (nc *DefaultNodeNetworkController) Init(ctx context.Context) error { if !ok { return fmt.Errorf("cannot get kubeclient for starting CNI server") } - cniServer, err = cni.NewCNIServer(nc.watchFactory, kclient.KClient, nc.networkManager, nc.ovsClient) + cniServer, err = cni.NewCNIServer(nc.watchFactory, kclient.KClient, nc.networkManager, nc.ovsClient, nc.dpuNodeLeaseManager) if err != nil { return err } @@ -969,10 +988,10 @@ func (nc *DefaultNodeNetworkController) Start(ctx context.Context) error { return err } nc.wg.Add(1) - go func() { + go func(stopCh <-chan struct{}) { defer nc.wg.Done() - nodeController.Run(nc.stopChan) - }() + nodeController.Run(stopCh) + }(nc.stopChan) } else if config.OvnKubeNode.Mode != types.NodeModeDPUHost { // attempt to cleanup the possibly stale bridge _, stderr, err := util.RunOVSVsctl("--if-exists", "del-br", "br-ext") @@ -1027,6 +1046,25 @@ func (nc *DefaultNodeNetworkController) Start(ctx context.Context) error { nc.healthzServer.Start(nc.stopChan, nc.wg) } + if nc.dpuNodeLeaseManager != nil { + if config.OvnKubeNode.Mode == types.NodeModeDPU { + nc.wg.Add(1) + go func() { + defer nc.wg.Done() + nc.dpuNodeLeaseManager.RunUpdater(ctx) + }() + } else if config.OvnKubeNode.Mode == types.NodeModeDPUHost { + if err := nc.dpuNodeLeaseManager.CheckStatus(ctx); err != nil { + klog.Warningf("Initial DPU node lease check failed: %v", err) + } + nc.wg.Add(1) + go func() { + defer nc.wg.Done() + nc.dpuNodeLeaseManager.RunMonitor(ctx) + }() + } + } + if config.OvnKubeNode.Mode == types.NodeModeDPU { if _, err := nc.watchPodsDPU(); err != nil { return err @@ -1080,7 +1118,7 @@ func (nc *DefaultNodeNetworkController) Start(ctx context.Context) error { nc.linkManager.Run(nc.stopChan, nc.wg) nc.wg.Add(1) - go func() { + go func(stopCh <-chan struct{}) { defer nc.wg.Done() podResClient, err := podresourcesapi.New() if err != nil { @@ -1092,8 +1130,8 @@ func (nc *DefaultNodeNetworkController) Start(ctx context.Context) error { klog.V(4).Infof("Error closing PodResourcesAPI client: %v", err) } }() - ovspinning.Run(ctx, nc.stopChan, podResClient) - }() + ovspinning.Run(ctx, stopCh, podResClient) + }(nc.stopChan) klog.Infof("Default node network controller initialized and ready.") return nil @@ -1135,10 +1173,10 @@ func (nc *DefaultNodeNetworkController) startEgressIPHealthCheckingServer(mgmtPo } nc.wg.Add(1) - go func() { + go func(stopCh <-chan struct{}) { defer nc.wg.Done() - healthServer.Run(nc.stopChan) - }() + healthServer.Run(stopCh) + }(nc.stopChan) return nil } diff --git a/go-controller/pkg/node/dpulease/manager.go b/go-controller/pkg/node/dpulease/manager.go new file mode 100644 index 0000000000..9ae1758088 --- /dev/null +++ b/go-controller/pkg/node/dpulease/manager.go @@ -0,0 +1,308 @@ +package dpulease + +import ( + "context" + "fmt" + "sync" + "time" + + coordinationv1 "k8s.io/api/coordination/v1" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/util/retry" + "k8s.io/klog/v2" +) + +const ( + // HolderIdentity is used on the DPU node lease object + HolderIdentity = "ovnkube-dpu-node" + leaseNamePrefix = "ovn-dpu-" +) + +// Manager handles lifecycle and readiness tracking for the DPU node lease. +type Manager struct { + client kubernetes.Interface + namespace string + nodeName string + nodeUID types.UID + renewInterval time.Duration + leaseDuration time.Duration + + statusMu sync.RWMutex + ready bool + reason string +} + +// NewManager builds a new Manager. +func NewManager(client kubernetes.Interface, namespace string, node *corev1.Node, renewInterval, leaseDuration time.Duration) *Manager { + m := &Manager{ + client: client, + namespace: namespace, + nodeName: node.Name, + nodeUID: node.UID, + renewInterval: renewInterval, + leaseDuration: leaseDuration, + } + + m.setStatus("", true) + + return m +} + +// Ready reports the current readiness and message for consumers such as the CNI server. +func (m *Manager) Ready() (bool, string) { + m.statusMu.RLock() + defer m.statusMu.RUnlock() + return m.ready, m.reason +} + +// EnsureLease creates or updates the DPU lease. +func (m *Manager) EnsureLease(ctx context.Context) (*coordinationv1.Lease, error) { + if m.renewInterval == 0 { + return nil, nil + } + + var lease *coordinationv1.Lease + err := retry.RetryOnConflict(retry.DefaultRetry, func() error { + existing, err := m.client.CoordinationV1().Leases(m.namespace).Get(ctx, m.leaseName(), metav1.GetOptions{}) + if apierrors.IsNotFound(err) { + now := metav1.NowMicro() + lease = m.newLease(now) + created, createErr := m.client.CoordinationV1().Leases(m.namespace).Create(ctx, lease, metav1.CreateOptions{}) + if createErr != nil { + if apierrors.IsAlreadyExists(createErr) { + // Treat concurrent lease creation as a retriable conflict so we retry into the Get/Update path. + return apierrors.NewConflict(schema.GroupResource{Group: coordinationv1.GroupName, Resource: "leases"}, m.leaseName(), createErr) + } + return createErr + } + lease = created + return nil + } + if err != nil { + return err + } + lease = existing.DeepCopy() + if !m.updateLeaseSpec(lease, metav1.NowMicro(), true) { + return nil + } + updated, updateErr := m.client.CoordinationV1().Leases(m.namespace).Update(ctx, lease, metav1.UpdateOptions{}) + if updateErr != nil { + return updateErr + } + lease = updated + return nil + }) + if err != nil { + m.setStatus(fmt.Sprintf("failed ensuring DPU lease: %v", err), false) + return nil, err + } + + m.setStatus("", true) + return lease, nil +} + +// RunUpdater periodically renews the lease heartbeat. Intended for DPU nodes. +func (m *Manager) RunUpdater(ctx context.Context) { + if m.renewInterval == 0 { + return + } + + wait.UntilWithContext(ctx, func(ctx context.Context) { + if err := m.Renew(ctx); err != nil { + klog.Warningf("Failed to renew DPU lease %s: %v", m.leaseName(), err) + } + }, m.renewInterval) +} + +// RunMonitor periodically checks the lease for expiry. Intended for DPU host nodes. +func (m *Manager) RunMonitor(ctx context.Context) { + if m.renewInterval == 0 { + return + } + + period := m.monitorPeriod() + wait.UntilWithContext(ctx, func(ctx context.Context) { + if err := m.CheckStatus(ctx); err != nil { + klog.Warningf("DPU lease %s marked unhealthy: %v", m.leaseName(), err) + } + }, period) +} + +// CheckStatus validates the lease and updates readiness. +func (m *Manager) CheckStatus(ctx context.Context) error { + if m.renewInterval == 0 { + m.setStatus("", true) + return nil + } + + lease, err := m.client.CoordinationV1().Leases(m.namespace).Get(ctx, m.leaseName(), metav1.GetOptions{}) + if err != nil { + if apierrors.IsNotFound(err) { + m.setStatus("DPU node lease not found", false) + } else { + m.setStatus(fmt.Sprintf("failed to read DPU node lease: %v", err), false) + } + return err + } + + expired, msg := m.isExpired(lease) + if expired { + m.setStatus(msg, false) + return fmt.Errorf("%s", msg) + } + + m.setStatus("", true) + return nil +} + +// Renew bumps the lease renew time, creating the lease if needed. +func (m *Manager) Renew(ctx context.Context) error { + if m.renewInterval == 0 { + return nil + } + + return retry.RetryOnConflict(retry.DefaultRetry, func() error { + lease, err := m.client.CoordinationV1().Leases(m.namespace).Get(ctx, m.leaseName(), metav1.GetOptions{}) + if apierrors.IsNotFound(err) { + _, err = m.EnsureLease(ctx) + return err + } + if err != nil { + return err + } + if !m.updateLeaseSpec(lease, metav1.NowMicro(), true) { + return nil + } + _, err = m.client.CoordinationV1().Leases(m.namespace).Update(ctx, lease, metav1.UpdateOptions{}) + return err + }) +} + +func (m *Manager) monitorPeriod() time.Duration { + period := m.renewInterval + durationFraction := m.leaseDuration / 4 + if durationFraction > 0 && durationFraction < period { + period = durationFraction + } + if period <= 0 { + return time.Second + } + return period +} + +func (m *Manager) setStatus(reason string, ready bool) { + m.statusMu.Lock() + defer m.statusMu.Unlock() + + if m.ready != ready || m.reason != reason { + m.ready = ready + m.reason = reason + } +} + +func (m *Manager) leaseName() string { + return leaseNamePrefix + m.nodeName +} + +func (m *Manager) newLease(now metav1.MicroTime) *coordinationv1.Lease { + return &coordinationv1.Lease{ + ObjectMeta: metav1.ObjectMeta{ + Name: m.leaseName(), + Namespace: m.namespace, + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: "v1", + Kind: "Node", + Name: m.nodeName, + UID: m.nodeUID, + Controller: boolPtr(true), + BlockOwnerDeletion: boolPtr(true), + }, + }, + }, + Spec: coordinationv1.LeaseSpec{ + HolderIdentity: stringPtr(HolderIdentity), + LeaseDurationSeconds: int32Ptr(int32(m.leaseDuration.Seconds())), + AcquireTime: &now, + RenewTime: &now, + }, + } +} + +func (m *Manager) updateLeaseSpec(lease *coordinationv1.Lease, now metav1.MicroTime, bumpRenew bool) bool { + changed := false + + if lease.Spec.HolderIdentity == nil || *lease.Spec.HolderIdentity != HolderIdentity { + lease.Spec.HolderIdentity = stringPtr(HolderIdentity) + changed = true + } + + if lease.Spec.LeaseDurationSeconds == nil || int32(m.leaseDuration.Seconds()) != *lease.Spec.LeaseDurationSeconds { + lease.Spec.LeaseDurationSeconds = int32Ptr(int32(m.leaseDuration.Seconds())) + changed = true + } + + if bumpRenew { + if lease.Spec.RenewTime == nil || !lease.Spec.RenewTime.Equal(&now) { + lease.Spec.RenewTime = &now + changed = true + } + if lease.Spec.AcquireTime == nil { + lease.Spec.AcquireTime = &now + changed = true + } + } + + if !m.hasOwnerRef(lease.OwnerReferences) { + lease.OwnerReferences = append(lease.OwnerReferences, metav1.OwnerReference{ + APIVersion: "v1", + Kind: "Node", + Name: m.nodeName, + UID: m.nodeUID, + Controller: boolPtr(true), + BlockOwnerDeletion: boolPtr(true), + }) + changed = true + } + + return changed +} + +func (m *Manager) hasOwnerRef(refs []metav1.OwnerReference) bool { + for _, ref := range refs { + if ref.Kind == "Node" && ref.Name == m.nodeName && ref.UID == m.nodeUID { + return true + } + } + return false +} + +func (m *Manager) isExpired(lease *coordinationv1.Lease) (bool, string) { + if lease.Spec.LeaseDurationSeconds == nil || lease.Spec.RenewTime == nil { + return true, "DPU node lease missing renew time or duration" + } + + expire := lease.Spec.RenewTime.Time.Add(time.Duration(*lease.Spec.LeaseDurationSeconds) * time.Second) + if time.Now().After(expire) { + return true, fmt.Sprintf("DPU node lease expired at %s", expire.UTC().Format(time.RFC3339)) + } + return false, "" +} + +func stringPtr(val string) *string { + return &val +} + +func int32Ptr(val int32) *int32 { + return &val +} + +func boolPtr(val bool) *bool { + return &val +} diff --git a/go-controller/pkg/node/dpulease/manager_test.go b/go-controller/pkg/node/dpulease/manager_test.go new file mode 100644 index 0000000000..c669c3067c --- /dev/null +++ b/go-controller/pkg/node/dpulease/manager_test.go @@ -0,0 +1,171 @@ +package dpulease + +import ( + "context" + "testing" + "time" + + "github.com/onsi/gomega" + + coordinationv1 "k8s.io/api/coordination/v1" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/kubernetes/fake" + k8stesting "k8s.io/client-go/testing" +) + +func TestEnsureLeaseCreatesObject(t *testing.T) { + g := gomega.NewWithT(t) + client := fake.NewSimpleClientset() + node := &corev1.Node{ObjectMeta: metav1.ObjectMeta{Name: "worker", UID: types.UID("nodeuid")}} + mgr := NewManager(client, "ovn-kubernetes", node, 10*time.Second, 40*time.Second) + + lease, err := mgr.EnsureLease(context.Background()) + g.Expect(err).NotTo(gomega.HaveOccurred()) + g.Expect(lease).NotTo(gomega.BeNil()) + + fetched, err := client.CoordinationV1().Leases("ovn-kubernetes").Get(context.Background(), lease.Name, metav1.GetOptions{}) + g.Expect(err).NotTo(gomega.HaveOccurred()) + g.Expect(fetched.Spec.HolderIdentity).NotTo(gomega.BeNil()) + g.Expect(*fetched.Spec.HolderIdentity).To(gomega.Equal(HolderIdentity)) + g.Expect(fetched.Spec.LeaseDurationSeconds).NotTo(gomega.BeNil()) + g.Expect(*fetched.Spec.LeaseDurationSeconds).To(gomega.Equal(int32(40))) + g.Expect(fetched.Spec.RenewTime).NotTo(gomega.BeNil()) + g.Expect(fetched.OwnerReferences).NotTo(gomega.BeEmpty()) + g.Expect(fetched.OwnerReferences[0].UID).To(gomega.Equal(node.UID)) + + ready, reason := mgr.Ready() + g.Expect(ready).To(gomega.BeTrue()) + g.Expect(reason).To(gomega.BeEmpty()) +} + +func TestRenewUpdatesTimestamp(t *testing.T) { + g := gomega.NewWithT(t) + client := fake.NewSimpleClientset() + node := &corev1.Node{ObjectMeta: metav1.ObjectMeta{Name: "worker", UID: types.UID("nodeuid")}} + mgr := NewManager(client, "ovn-kubernetes", node, time.Second, 20*time.Second) + + lease, err := mgr.EnsureLease(context.Background()) + g.Expect(err).NotTo(gomega.HaveOccurred()) + g.Expect(lease.Spec.RenewTime).NotTo(gomega.BeNil()) + originalRenew := lease.Spec.RenewTime.DeepCopy() + + time.Sleep(10 * time.Millisecond) + g.Expect(mgr.Renew(context.Background())).To(gomega.Succeed()) + + updated, err := client.CoordinationV1().Leases("ovn-kubernetes").Get(context.Background(), lease.Name, metav1.GetOptions{}) + g.Expect(err).NotTo(gomega.HaveOccurred()) + g.Expect(updated.Spec.RenewTime.Time.After(originalRenew.Time)).To(gomega.BeTrue()) +} + +func TestCheckStatusDetectsExpiry(t *testing.T) { + g := gomega.NewWithT(t) + oldTime := metav1.NewMicroTime(time.Now().Add(-2 * time.Minute)) + lease := &coordinationv1.Lease{ + ObjectMeta: metav1.ObjectMeta{ + Name: "ovn-dpu-worker", + Namespace: "ovn-kubernetes", + }, + Spec: coordinationv1.LeaseSpec{ + HolderIdentity: ptrToString(HolderIdentity), + LeaseDurationSeconds: ptrToInt32(10), + RenewTime: &oldTime, + }, + } + client := fake.NewSimpleClientset(lease) + node := &corev1.Node{ObjectMeta: metav1.ObjectMeta{Name: "worker", UID: types.UID("nodeuid")}} + mgr := NewManager(client, "ovn-kubernetes", node, time.Second, 10*time.Second) + + err := mgr.CheckStatus(context.Background()) + g.Expect(err).To(gomega.HaveOccurred()) + ready, reason := mgr.Ready() + g.Expect(ready).To(gomega.BeFalse()) + g.Expect(reason).To(gomega.ContainSubstring("expired")) +} + +func TestCheckStatusHealthy(t *testing.T) { + g := gomega.NewWithT(t) + now := metav1.NowMicro() + lease := &coordinationv1.Lease{ + ObjectMeta: metav1.ObjectMeta{ + Name: "ovn-dpu-worker", + Namespace: "ovn-kubernetes", + }, + Spec: coordinationv1.LeaseSpec{ + HolderIdentity: ptrToString(HolderIdentity), + LeaseDurationSeconds: ptrToInt32(30), + RenewTime: &now, + }, + } + client := fake.NewSimpleClientset(lease) + node := &corev1.Node{ObjectMeta: metav1.ObjectMeta{Name: "worker", UID: types.UID("nodeuid")}} + mgr := NewManager(client, "ovn-kubernetes", node, time.Second, 30*time.Second) + + g.Expect(mgr.CheckStatus(context.Background())).To(gomega.Succeed()) + ready, reason := mgr.Ready() + g.Expect(ready).To(gomega.BeTrue()) + g.Expect(reason).To(gomega.BeEmpty()) +} + +func TestEnsureLeaseRetriesOnAlreadyExists(t *testing.T) { + g := gomega.NewWithT(t) + client := fake.NewSimpleClientset() + node := &corev1.Node{ObjectMeta: metav1.ObjectMeta{Name: "worker", UID: types.UID("nodeuid")}} + mgr := NewManager(client, "ovn-kubernetes", node, time.Second, 20*time.Second) + + getCalls := 0 + client.Fake.PrependReactor("get", "leases", func(_ k8stesting.Action) (bool, runtime.Object, error) { + getCalls++ + if getCalls == 1 { + return true, nil, apierrors.NewNotFound(schema.GroupResource{Group: coordinationv1.GroupName, Resource: "leases"}, "ovn-dpu-worker") + } + return false, nil, nil + }) + + createCalls := 0 + client.Fake.PrependReactor("create", "leases", func(_ k8stesting.Action) (bool, runtime.Object, error) { + createCalls++ + if createCalls == 1 { + now := metav1.NowMicro() + existing := &coordinationv1.Lease{ + ObjectMeta: metav1.ObjectMeta{ + Name: "ovn-dpu-worker", + Namespace: "ovn-kubernetes", + }, + Spec: coordinationv1.LeaseSpec{ + HolderIdentity: ptrToString("someone-else"), + LeaseDurationSeconds: ptrToInt32(1), + RenewTime: &now, + }, + } + g.Expect(client.Tracker().Add(existing)).To(gomega.Succeed()) + return true, nil, apierrors.NewAlreadyExists(schema.GroupResource{Group: coordinationv1.GroupName, Resource: "leases"}, "ovn-dpu-worker") + } + return false, nil, nil + }) + + lease, err := mgr.EnsureLease(context.Background()) + g.Expect(err).NotTo(gomega.HaveOccurred()) + g.Expect(lease).NotTo(gomega.BeNil()) + g.Expect(createCalls).To(gomega.Equal(1)) + g.Expect(getCalls).To(gomega.BeNumerically(">=", 2)) + + fetched, err := client.CoordinationV1().Leases("ovn-kubernetes").Get(context.Background(), "ovn-dpu-worker", metav1.GetOptions{}) + g.Expect(err).NotTo(gomega.HaveOccurred()) + g.Expect(fetched.Spec.HolderIdentity).NotTo(gomega.BeNil()) + g.Expect(*fetched.Spec.HolderIdentity).To(gomega.Equal(HolderIdentity)) + g.Expect(fetched.Spec.LeaseDurationSeconds).NotTo(gomega.BeNil()) + g.Expect(*fetched.Spec.LeaseDurationSeconds).To(gomega.Equal(int32(20))) +} + +func ptrToString(val string) *string { + return &val +} + +func ptrToInt32(val int32) *int32 { + return &val +} diff --git a/go-controller/pkg/node/egressip/gateway_egressip.go b/go-controller/pkg/node/egressip/gateway_egressip.go index 83657404b8..cccc79642b 100644 --- a/go-controller/pkg/node/egressip/gateway_egressip.go +++ b/go-controller/pkg/node/egressip/gateway_egressip.go @@ -175,6 +175,7 @@ type BridgeEIPAddrManager struct { nodeName string bridgeName string nodeAnnotationMu sync.Mutex + annotationIPs sets.Set[string] eIPLister egressiplisters.EgressIPLister eIPInformer cache.SharedIndexInformer nodeLister corev1listers.NodeLister @@ -195,6 +196,7 @@ func NewBridgeEIPAddrManager(nodeName, bridgeName string, linkManager *linkmanag nodeName: nodeName, // k8 node name bridgeName: bridgeName, // bridge name for which EIP IPs are managed nodeAnnotationMu: sync.Mutex{}, // mu for updating Node annotation + annotationIPs: sets.New[string](), eIPLister: eIPInformer.Lister(), eIPInformer: eIPInformer.Informer(), nodeLister: nodeInformer.Lister(), @@ -305,6 +307,9 @@ func (g *BridgeEIPAddrManager) SyncEgressIP(objs []interface{}) error { if err != nil { return fmt.Errorf("failed to sync EgressIP gateway config because unable to get Node annotation: %v", err) } + g.nodeAnnotationMu.Lock() + g.annotationIPs = sets.New[string](getIPsStr(annotIPs...)...) + g.nodeAnnotationMu.Unlock() configs := markIPs{v4: map[int]string{}, v6: map[int]string{}} for _, obj := range objs { eip, ok := obj.(*egressipv1.EgressIP) @@ -349,72 +354,60 @@ func (g *BridgeEIPAddrManager) SyncEgressIP(objs []interface{}) error { return nil } -// addIPToAnnotation adds an address to the collection of existing addresses stored in the nodes annotation. Caller -// may repeat addition of addresses without care for duplicate addresses being added. -func (g *BridgeEIPAddrManager) addIPToAnnotation(candidateIP net.IP) error { - g.nodeAnnotationMu.Lock() - defer g.nodeAnnotationMu.Unlock() +// updateAnnotationLocked updates the node's egress IPs +// Must be called with nodeAnnotationMu locked +func (g *BridgeEIPAddrManager) updateAnnotationLocked(updatedIPs sets.Set[string]) error { return retry.RetryOnConflict(retry.DefaultRetry, func() error { node, err := g.nodeLister.Get(g.nodeName) if err != nil { return err } - existingIPsStr, err := util.ParseNodeBridgeEgressIPsAnnotation(node) - if err != nil { - if util.IsAnnotationNotSetError(err) { - existingIPsStr = make([]string, 0) - } else { - return fmt.Errorf("failed to parse annotation key %q from node object: %v", util.OVNNodeBridgeEgressIPs, err) - } - } - existingIPsSet := sets.New[string](existingIPsStr...) - candidateIPStr := candidateIP.String() - if existingIPsSet.Has(candidateIPStr) { - return nil - } - patch, err := json.Marshal(existingIPsSet.Insert(candidateIPStr).UnsortedList()) + patch, err := json.Marshal(updatedIPs.UnsortedList()) if err != nil { return err } - node.Annotations[util.OVNNodeBridgeEgressIPs] = string(patch) - return g.kube.UpdateNodeStatus(node) + nodeToUpdate := node.DeepCopy() + if nodeToUpdate.Annotations == nil { + nodeToUpdate.Annotations = map[string]string{} + } + nodeToUpdate.Annotations[util.OVNNodeBridgeEgressIPs] = string(patch) + return g.kube.UpdateNodeStatus(nodeToUpdate) }) } +// addIPToAnnotation adds an address to the collection of existing addresses stored in the nodes annotation. Caller +// may repeat addition of addresses without care for duplicate addresses being added. +func (g *BridgeEIPAddrManager) addIPToAnnotation(candidateIP net.IP) error { + g.nodeAnnotationMu.Lock() + defer g.nodeAnnotationMu.Unlock() + updatedIPs := sets.New[string](g.annotationIPs.UnsortedList()...) + updatedIPs.Insert(candidateIP.String()) + if updatedIPs.Equal(g.annotationIPs) { + return nil + } + if err := g.updateAnnotationLocked(updatedIPs); err != nil { + return err + } + g.annotationIPs = updatedIPs + return nil +} + // deleteIPsFromAnnotation deletes address from annotation. If multiple users, callers must synchronise. // deletion of address that doesn't exist will not cause an error. func (g *BridgeEIPAddrManager) deleteIPsFromAnnotation(candidateIPs ...net.IP) error { g.nodeAnnotationMu.Lock() defer g.nodeAnnotationMu.Unlock() - return retry.RetryOnConflict(retry.DefaultRetry, func() error { - node, err := g.nodeLister.Get(g.nodeName) - if err != nil { - return err - } - existingIPsStr, err := util.ParseNodeBridgeEgressIPsAnnotation(node) - if err != nil { - if util.IsAnnotationNotSetError(err) { - existingIPsStr = make([]string, 0) - } else { - return fmt.Errorf("failed to parse annotation key %q from node object: %v", util.OVNNodeBridgeEgressIPs, err) - } - } - if len(existingIPsStr) == 0 { - return nil - } - existingIPsSet := sets.New[string](existingIPsStr...) - candidateIPsStr := getIPsStr(candidateIPs...) - if !existingIPsSet.HasAny(candidateIPsStr...) { - return nil - } - existingIPsSet.Delete(candidateIPsStr...) - patch, err := json.Marshal(existingIPsSet.UnsortedList()) - if err != nil { - return err - } - node.Annotations[util.OVNNodeBridgeEgressIPs] = string(patch) - return g.kube.UpdateNodeStatus(node) - }) + candidateIPsStr := getIPsStr(candidateIPs...) + updatedIPs := sets.New[string](g.annotationIPs.UnsortedList()...) + updatedIPs.Delete(candidateIPsStr...) + if updatedIPs.Equal(g.annotationIPs) { + return nil + } + if err := g.updateAnnotationLocked(updatedIPs); err != nil { + return err + } + g.annotationIPs = updatedIPs + return nil } func (g *BridgeEIPAddrManager) addIPBridge(ip net.IP) error { diff --git a/go-controller/pkg/node/egressip/gateway_egressip_test.go b/go-controller/pkg/node/egressip/gateway_egressip_test.go index 6493cb968a..816219df0f 100644 --- a/go-controller/pkg/node/egressip/gateway_egressip_test.go +++ b/go-controller/pkg/node/egressip/gateway_egressip_test.go @@ -12,6 +12,7 @@ import ( corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/sets" "k8s.io/client-go/kubernetes/fake" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" @@ -72,9 +73,11 @@ var _ = ginkgo.Describe("Gateway EgressIP", func() { isUpdated, err := addrMgr.AddEgressIP(eip) gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "should process a valid EgressIP") gomega.Expect(isUpdated).Should(gomega.BeTrue()) - node, err := addrMgr.nodeLister.Get(nodeName) - gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "node should be present within kapi") - gomega.Expect(parseEIPsFromAnnotation(node)).Should(gomega.ConsistOf(ipV4Addr)) + gomega.Eventually(func() []string { + node, err := addrMgr.nodeLister.Get(nodeName) + gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "node should be present within kapi") + return parseEIPsFromAnnotation(node) + }).Should(gomega.ConsistOf(ipV4Addr)) gomega.Expect(nlMock.AssertCalled(ginkgo.GinkgoT(), "AddrAdd", nlLinkMock, egressip.GetNetlinkAddress(net.ParseIP(ipV4Addr), bridgeLinkIndex))).Should(gomega.BeTrue()) }) @@ -122,9 +125,11 @@ var _ = ginkgo.Describe("Gateway EgressIP", func() { isUpdated, err := addrMgr.AddEgressIP(eip) gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "should process a valid EgressIP") gomega.Expect(isUpdated).Should(gomega.BeTrue()) - node, err := addrMgr.nodeLister.Get(nodeName) - gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "node should be present within kapi") - gomega.Expect(parseEIPsFromAnnotation(node)).Should(gomega.ConsistOf(ipV4Addr, ipV4Addr2)) + gomega.Eventually(func() []string { + node, err := addrMgr.nodeLister.Get(nodeName) + gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "node should be present within kapi") + return parseEIPsFromAnnotation(node) + }).Should(gomega.ConsistOf(ipV4Addr, ipV4Addr2)) gomega.Expect(nlMock.AssertCalled(ginkgo.GinkgoT(), "AddrAdd", nlLinkMock, egressip.GetNetlinkAddress(net.ParseIP(ipV4Addr), bridgeLinkIndex))).Should(gomega.BeTrue()) }) @@ -164,9 +169,11 @@ var _ = ginkgo.Describe("Gateway EgressIP", func() { isUpdated, err := addrMgr.UpdateEgressIP(unassignedEIP, assignedEIP) gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "should process a valid EgressIP") gomega.Expect(isUpdated).Should(gomega.BeTrue()) - node, err := addrMgr.nodeLister.Get(nodeName) - gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "node should be present within kapi") - gomega.Expect(parseEIPsFromAnnotation(node)).Should(gomega.ConsistOf(ipV4Addr)) + gomega.Eventually(func() []string { + node, err := addrMgr.nodeLister.Get(nodeName) + gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "node should be present within kapi") + return parseEIPsFromAnnotation(node) + }).Should(gomega.ConsistOf(ipV4Addr)) gomega.Expect(nlMock.AssertCalled(ginkgo.GinkgoT(), "AddrAdd", nlLinkMock, egressip.GetNetlinkAddress(net.ParseIP(ipV4Addr), bridgeLinkIndex))).Should(gomega.BeTrue()) }) @@ -189,9 +196,11 @@ var _ = ginkgo.Describe("Gateway EgressIP", func() { isUpdated, err = addrMgr.UpdateEgressIP(assignedEIP, unassignedEIP) gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "should process a valid EgressIP") gomega.Expect(isUpdated).Should(gomega.BeTrue()) - node, err := addrMgr.nodeLister.Get(nodeName) - gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "node should be present within kapi") - gomega.Expect(parseEIPsFromAnnotation(node)).ShouldNot(gomega.ConsistOf(ipV4Addr)) + gomega.Eventually(func() []string { + node, err := addrMgr.nodeLister.Get(nodeName) + gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "node should be present within kapi") + return parseEIPsFromAnnotation(node) + }).ShouldNot(gomega.ConsistOf(ipV4Addr)) gomega.Expect(nlMock.AssertCalled(ginkgo.GinkgoT(), "AddrAdd", nlLinkMock, egressip.GetNetlinkAddress(net.ParseIP(ipV4Addr), bridgeLinkIndex))).Should(gomega.BeTrue()) gomega.Expect(nlMock.AssertCalled(ginkgo.GinkgoT(), "AddrDel", nlLinkMock, @@ -250,9 +259,11 @@ var _ = ginkgo.Describe("Gateway EgressIP", func() { isUpdated, err = addrMgr.DeleteEgressIP(eip) gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "should process a valid EgressIP") gomega.Expect(isUpdated).Should(gomega.BeTrue()) - node, err := addrMgr.nodeLister.Get(nodeName) - gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "node should be present within kapi") - gomega.Expect(parseEIPsFromAnnotation(node)).ShouldNot(gomega.ConsistOf(ipV4Addr)) + gomega.Eventually(func() []string { + node, err := addrMgr.nodeLister.Get(nodeName) + gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "node should be present within kapi") + return parseEIPsFromAnnotation(node) + }).ShouldNot(gomega.ConsistOf(ipV4Addr)) gomega.Expect(nlMock.AssertCalled(ginkgo.GinkgoT(), "AddrAdd", nlLinkMock, egressip.GetNetlinkAddress(net.ParseIP(ipV4Addr), bridgeLinkIndex))).Should(gomega.BeTrue()) gomega.Expect(nlMock.AssertCalled(ginkgo.GinkgoT(), "AddrDel", nlLinkMock, @@ -290,9 +301,11 @@ var _ = ginkgo.Describe("Gateway EgressIP", func() { eipUnassigned3 := getEIPNotAssignedToNode(mark3, ipV4Addr3) err := addrMgr.SyncEgressIP([]interface{}{eipAssigned1, eipAssigned2, eipUnassigned3}) gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "should process valid EgressIPs") - node, err := addrMgr.nodeLister.Get(nodeName) - gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "node should be present within kapi") - gomega.Expect(parseEIPsFromAnnotation(node)).Should(gomega.ConsistOf(ipV4Addr, ipV4Addr2)) + gomega.Eventually(func() []string { + node, err := addrMgr.nodeLister.Get(nodeName) + gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "node should be present within kapi") + return parseEIPsFromAnnotation(node) + }).Should(gomega.ConsistOf(ipV4Addr, ipV4Addr2)) gomega.Expect(nlMock.AssertCalled(ginkgo.GinkgoT(), "AddrAdd", nlLinkMock, egressip.GetNetlinkAddress(net.ParseIP(ipV4Addr), bridgeLinkIndex))).Should(gomega.BeTrue()) gomega.Expect(nlMock.AssertCalled(ginkgo.GinkgoT(), "AddrAdd", nlLinkMock, @@ -374,9 +387,11 @@ var _ = ginkgo.Describe("Gateway EgressIP", func() { // Verify cleanup: secondary IP removed from cache, annotation, and bridge gomega.Expect(addrMgr.cache.IsIPPresent(net.ParseIP(secondaryIP))).Should(gomega.BeFalse(), "secondary IP should be removed from cache") - node, err = addrMgr.nodeLister.Get(nodeName) - gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) - gomega.Expect(parseEIPsFromAnnotation(node)).Should(gomega.ConsistOf(ipV4Addr), "only valid OVN IP should be in annotation") + gomega.Eventually(func() []string { + node, err := addrMgr.nodeLister.Get(nodeName) + gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) + return parseEIPsFromAnnotation(node) + }).Should(gomega.ConsistOf(ipV4Addr), "only valid OVN IP should be in annotation") gomega.Expect(nlMock.AssertCalled(ginkgo.GinkgoT(), "AddrDel", nlLinkMock, egressip.GetNetlinkAddress(net.ParseIP(secondaryIP), bridgeLinkIndex))).Should(gomega.BeTrue(), "should delete secondary IP from bridge") gomega.Expect(nlMock.AssertCalled(ginkgo.GinkgoT(), "AddrAdd", nlLinkMock, @@ -411,8 +426,17 @@ func initBridgeEIPAddrManagerWithHostCIDRs(nodeName, bridgeName string, bridgeEI gomega.Expect(watchFactory.Start()).Should(gomega.Succeed(), "watch factory should start") gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "watch factory creation must succeed") linkManager := linkmanager.NewController(nodeName, true, true, nil) - return NewBridgeEIPAddrManager(nodeName, bridgeName, linkManager, &kube.Kube{KClient: client}, watchFactory.EgressIPInformer(), watchFactory.NodeCoreInformer()), - watchFactory.Shutdown + addrMgr := NewBridgeEIPAddrManager(nodeName, bridgeName, linkManager, &kube.Kube{KClient: client}, watchFactory.EgressIPInformer(), watchFactory.NodeCoreInformer()) + initialAnnotIPs, err := util.ParseNodeBridgeEgressIPsAnnotation(node) + if err != nil { + if util.IsAnnotationNotSetError(err) { + initialAnnotIPs = make([]string, 0) + } else { + gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "bridge EgressIP annotation should be parseable") + } + } + addrMgr.annotationIPs = sets.New[string](initialAnnotIPs...) + return addrMgr, watchFactory.Shutdown } func getEIPAssignedToNode(nodeName, mark, assignedIP string) *egressipv1.EgressIP { diff --git a/go-controller/pkg/node/gateway_shared_intf.go b/go-controller/pkg/node/gateway_shared_intf.go index de5d1ee235..29d6103c0a 100644 --- a/go-controller/pkg/node/gateway_shared_intf.go +++ b/go-controller/pkg/node/gateway_shared_intf.go @@ -827,6 +827,9 @@ func delServiceRules(service *corev1.Service, localEndpoints util.PortToLBEndpoi } nftElems := getGatewayNFTRules(service, localEndpoints, true) nftElems = append(nftElems, getGatewayNFTRules(service, localEndpoints, false)...) + if util.IsNetworkSegmentationSupportEnabled() { + nftElems = append(nftElems, getUDNNFTRules(service, nil)...) + } if len(nftElems) > 0 { if err := nodenft.DeleteNFTElements(nftElems); err != nil { err = fmt.Errorf("failed to delete nftables rules for service %s/%s: %v", @@ -834,33 +837,6 @@ func delServiceRules(service *corev1.Service, localEndpoints util.PortToLBEndpoi errors = append(errors, err) } } - - if util.IsNetworkSegmentationSupportEnabled() { - // NOTE: The code below is not using nodenft.DeleteNFTElements because it first adds elements - // before removing them, which fails for UDN NFT rules. These rules only have map keys, - // not key-value pairs, making it impossible to add. - // Attempt to delete the elements directly and handle the IsNotFound error. - // - // TODO: Switch to `nft destroy` when supported. - nftElems = getUDNNFTRules(service, nil) - if len(nftElems) > 0 { - nft, err := nodenft.GetNFTablesHelper() - if err != nil { - return utilerrors.Join(append(errors, err)...) - } - - tx := nft.NewTransaction() - for _, elem := range nftElems { - tx.Delete(elem) - } - - if err := nft.Run(context.TODO(), tx); err != nil && !knftables.IsNotFound(err) { - err = fmt.Errorf("failed to delete nftables rules for UDN service %s/%s: %v", - service.Namespace, service.Name, err) - errors = append(errors, err) - } - } - } } return utilerrors.Join(errors...) @@ -889,15 +865,16 @@ func (npw *nodePortWatcher) AddService(service *corev1.Service) error { } klog.V(5).Infof("Adding service %s in namespace %s", service.Name, service.Namespace) - netInfo, err := npw.networkManager.GetActiveNetworkForNamespace(service.Namespace) if err != nil { - if util.IsInvalidPrimaryNetworkError(err) { - return nil - } return fmt.Errorf("error getting active network for service %s in namespace %s: %w", service.Name, service.Namespace, err) } + if netInfo == nil { + // network not active on our node + return nil + } + name := ktypes.NamespacedName{Namespace: service.Namespace, Name: service.Name} epSlices, err := npw.watchFactory.GetServiceEndpointSlices(service.Namespace, service.Name, netInfo.GetNetworkName()) if err != nil { @@ -977,11 +954,12 @@ func (npw *nodePortWatcher) UpdateService(old, new *corev1.Service) error { netInfo, err := npw.networkManager.GetActiveNetworkForNamespace(new.Namespace) if err != nil { - if util.IsInvalidPrimaryNetworkError(err) { - return utilerrors.Join(errors...) - } return fmt.Errorf("error getting active network for service %s in namespace %s: %w", new.Name, new.Namespace, err) } + if netInfo == nil { + // network not active on our node + return utilerrors.Join(errors...) + } if err = addServiceRules(new, netInfo, svcConfig.localEndpoints, svcConfig.hasLocalHostNetworkEp, npw); err != nil { errors = append(errors, err) @@ -1219,14 +1197,20 @@ func (npw *nodePortWatcher) SyncServices(services []interface{}) error { } netInfo, err := npw.networkManager.GetActiveNetworkForNamespace(service.Namespace) - // The InvalidPrimaryNetworkError is returned when the UDN is not found because it has already been deleted. - if util.IsInvalidPrimaryNetworkError(err) { - continue - } if err != nil { + // During startup sync, avoid failing the entire processExisting loop for namespaces that + // require a UDN but have no primary NAD yet (or it has been deleted). Those services will + // be reconciled later via regular add/update events once the NAD exists. + if util.IsInvalidPrimaryNetworkError(err) { + continue + } errors = append(errors, err) continue } + if netInfo == nil { + // network not active on our node + continue + } epSlices, err := npw.watchFactory.GetServiceEndpointSlices(service.Namespace, service.Name, netInfo.GetNetworkName()) if err != nil { @@ -1307,6 +1291,10 @@ func (npw *nodePortWatcher) AddEndpointSlice(epSlice *discovery.EndpointSlice) e if err != nil { return fmt.Errorf("error getting active network for endpointslice %s in namespace %s: %w", epSlice.Name, epSlice.Namespace, err) } + if netInfo == nil { + // network not active on our node + return nil + } if util.IsNetworkSegmentationSupportEnabled() && !util.IsEndpointSliceForNetwork(epSlice, netInfo) { return nil @@ -1425,21 +1413,19 @@ func (npw *nodePortWatcher) DeleteEndpointSlice(epSlice *discovery.EndpointSlice // and allows graceful handling of deletion race conditions. netInfo, err := npw.networkManager.GetActiveNetworkForNamespace(namespacedName.Namespace) if err != nil { - // If the namespace was deleted, skip adding new service rules - if apierrors.IsNotFound(err) { - klog.V(5).Infof("Namespace not found for service %s/%s during endpoint slice delete, skipping adding service rules", - namespacedName.Namespace, namespacedName.Name) - return utilerrors.Join(errors...) - } - // If the UDN was deleted, skip adding new service rules + // If the UDN was deleted or not processed yet, skip adding new service rules if util.IsInvalidPrimaryNetworkError(err) { - klog.V(5).Infof("Skipping addServiceRules for %s/%s during endpoint slice delete: primary network invalid: %v", + klog.V(5).Infof("Skipping addServiceRules for %s/%s during endpoint slice delete: primary network unavailable: %v", namespacedName.Namespace, namespacedName.Name, err) return utilerrors.Join(errors...) } errors = append(errors, fmt.Errorf("error getting active network for service %s/%s: %w", namespacedName.Namespace, namespacedName.Name, err)) return utilerrors.Join(errors...) } + if netInfo == nil { + // network not active on our node + return utilerrors.Join(errors...) + } if err = addServiceRules(svcConfig.service, netInfo, localEndpoints, hasLocalHostNetworkEp, npw); err != nil { errors = append(errors, err) @@ -1480,6 +1466,10 @@ func (npw *nodePortWatcher) UpdateEndpointSlice(oldEpSlice, newEpSlice *discover if err != nil { return fmt.Errorf("error getting active network for endpointslice %s in namespace %s: %w", newEpSlice.Name, newEpSlice.Namespace, err) } + if netInfo == nil { + // network not active on our node + return nil + } if util.IsNetworkSegmentationSupportEnabled() && !util.IsEndpointSliceForNetwork(newEpSlice, netInfo) { return nil @@ -1566,11 +1556,12 @@ func (npwipt *nodePortWatcherIptables) AddService(service *corev1.Service) error netInfo, err := npwipt.networkManager.GetActiveNetworkForNamespace(service.Namespace) if err != nil { - if util.IsInvalidPrimaryNetworkError(err) { - return nil - } return fmt.Errorf("error getting active network for service %s in namespace %s: %w", service.Name, service.Namespace, err) } + if netInfo == nil { + // network not active on our node + return nil + } if err := addServiceRules(service, netInfo, nil, false, nil); err != nil { return fmt.Errorf("AddService failed for nodePortWatcherIptables: %v", err) @@ -1597,11 +1588,12 @@ func (npwipt *nodePortWatcherIptables) UpdateService(old, new *corev1.Service) e if util.ServiceTypeHasClusterIP(new) && util.IsClusterIPSet(new) { netInfo, err := npwipt.networkManager.GetActiveNetworkForNamespace(new.Namespace) if err != nil { - if util.IsInvalidPrimaryNetworkError(err) { - return utilerrors.Join(errors...) - } return fmt.Errorf("error getting active network for service %s in namespace %s: %w", new.Name, new.Namespace, err) } + if netInfo == nil { + // network not active on our node + return utilerrors.Join(errors...) + } if err = addServiceRules(new, netInfo, nil, false, nil); err != nil { errors = append(errors, err) @@ -1642,6 +1634,21 @@ func (npwipt *nodePortWatcherIptables) SyncServices(services []interface{}) erro if !util.ServiceTypeHasClusterIP(service) || !util.IsClusterIPSet(service) { continue } + netInfo, err := npwipt.networkManager.GetActiveNetworkForNamespace(service.GetNamespace()) + if err != nil { + // During startup sync, avoid failing the entire processExisting loop for namespaces that + // require a UDN but have no primary NAD yet (or it has been deleted). Those services will + // be reconciled later via regular add/update events once the NAD exists. + if util.IsInvalidPrimaryNetworkError(err) { + continue + } + errors = append(errors, err) + continue + } + if netInfo == nil { + // network not on our node + continue + } // Add correct iptables rules. // TODO: ETP and ITP is not implemented for smart NIC mode. keepIPTRules = append(keepIPTRules, getGatewayIPTRules(service, nil, false)...) diff --git a/go-controller/pkg/node/gateway_shared_intf_test.go b/go-controller/pkg/node/gateway_shared_intf_test.go index 065b7c52ad..43078f027a 100644 --- a/go-controller/pkg/node/gateway_shared_intf_test.go +++ b/go-controller/pkg/node/gateway_shared_intf_test.go @@ -10,8 +10,7 @@ import ( corev1 "k8s.io/api/core/v1" discovery "k8s.io/api/discovery/v1" - apierrors "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/runtime/schema" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/client-go/kubernetes/fake" @@ -21,6 +20,8 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/networkmanager" + nodenft "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/nftables" + ovntest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" @@ -38,9 +39,14 @@ type mockNetworkManagerWithNamespaceNotFoundError struct { networkmanager.Interface } -func (m *mockNetworkManagerWithNamespaceNotFoundError) GetActiveNetworkForNamespace(namespace string) (util.NetInfo, error) { - notFoundErr := apierrors.NewNotFound(schema.GroupResource{Resource: "namespaces"}, namespace) - return nil, fmt.Errorf("failed to get namespace %q: %w", namespace, notFoundErr) +func (m *mockNetworkManagerWithNamespaceNotFoundError) GetPrimaryNADForNamespace(_ string) (string, error) { + // Simulate namespace deletion: no primary NAD by definition. + return "", nil +} + +func (m *mockNetworkManagerWithNamespaceNotFoundError) GetActiveNetworkForNamespace(_ string) (util.NetInfo, error) { + // Namespace is gone; new GetActiveNetworkForNamespace semantics return nil, nil. + return nil, nil } // mockNetworkManagerWithInvalidPrimaryNetworkError simulates UDN deletion scenario @@ -48,6 +54,11 @@ type mockNetworkManagerWithInvalidPrimaryNetworkError struct { networkmanager.Interface } +func (m *mockNetworkManagerWithInvalidPrimaryNetworkError) GetPrimaryNADForNamespace(_ string) (string, error) { + // just a trigger to ensure GetActiveNetworkForNamespace gets called + return types.DefaultNetworkName, nil +} + func (m *mockNetworkManagerWithInvalidPrimaryNetworkError) GetActiveNetworkForNamespace(namespace string) (util.NetInfo, error) { return nil, util.NewInvalidPrimaryNetworkError(namespace) } @@ -57,10 +68,74 @@ type mockNetworkManagerWithError struct { networkmanager.Interface } +func (m *mockNetworkManagerWithError) GetPrimaryNADForNamespace(_ string) (string, error) { + // just a trigger to ensure GetActiveNetworkForNamespace gets called + return types.DefaultNetworkName, nil +} + func (m *mockNetworkManagerWithError) GetActiveNetworkForNamespace(namespace string) (util.NetInfo, error) { return nil, fmt.Errorf("network lookup failed for namespace %q", namespace) } +// mockNetworkManagerWithInvalidPrimaryNetworkSkip simulates a namespace that +// requires a primary UDN but is currently in invalid primary network state. +type mockNetworkManagerWithInvalidPrimaryNetworkSkip struct { + networkmanager.Interface +} + +func (m *mockNetworkManagerWithInvalidPrimaryNetworkSkip) GetPrimaryNADForNamespace(namespace string) (string, error) { + return "", util.NewInvalidPrimaryNetworkError(namespace) +} + +func (m *mockNetworkManagerWithInvalidPrimaryNetworkSkip) GetActiveNetworkForNamespace(namespace string) (util.NetInfo, error) { + return nil, util.NewInvalidPrimaryNetworkError(namespace) +} + +// mockNetworkManagerWithInactiveNode simulates a UDN where the node is inactive for the network. +type mockNetworkManagerWithInactiveNode struct { + networkmanager.Interface +} + +func (m *mockNetworkManagerWithInactiveNode) GetPrimaryNADForNamespace(_ string) (string, error) { + return "test-namespace/test-nad", nil +} + +func (m *mockNetworkManagerWithInactiveNode) GetNetworkNameForNADKey(_ string) string { + return "test-udn" +} + +func (m *mockNetworkManagerWithInactiveNode) NodeHasNetwork(_, _ string) bool { + return false +} + +func (m *mockNetworkManagerWithInactiveNode) GetActiveNetworkForNamespace(_ string) (util.NetInfo, error) { + // New code paths resolve activity directly via GetActiveNetworkForNamespace. + // Returning nil netInfo means "network not active on this node". + return nil, nil +} + +// mockNetworkManagerWithActiveUDN simulates a UDN active on this node. +type mockNetworkManagerWithActiveUDN struct { + networkmanager.Interface + netInfo util.NetInfo +} + +func (m *mockNetworkManagerWithActiveUDN) GetPrimaryNADForNamespace(_ string) (string, error) { + return "test-namespace/test-nad", nil +} + +func (m *mockNetworkManagerWithActiveUDN) GetNetworkNameForNADKey(_ string) string { + return m.netInfo.GetNetworkName() +} + +func (m *mockNetworkManagerWithActiveUDN) NodeHasNetwork(_, _ string) bool { + return true +} + +func (m *mockNetworkManagerWithActiveUDN) GetActiveNetworkForNamespace(_ string) (util.NetInfo, error) { + return m.netInfo, nil +} + // verifyIPTablesRule checks if an iptables rule exists and asserts the expected state func verifyIPTablesRule(ipt util.IPTablesHelper, serviceIP string, servicePort, nodePort int32, shouldExist bool, message string) { exists, err := ipt.Exists("nat", "OVN-KUBE-NODEPORT", @@ -256,3 +331,153 @@ var _ = Describe("DeleteEndpointSlice", func() { }) }) }) + +var _ = Describe("SyncServices", func() { + var ( + fakeClient *util.OVNNodeClientset + watcher *factory.WatchFactory + npw *nodePortWatcher + iptV4 util.IPTablesHelper + iptV6 util.IPTablesHelper + ) + + const ( + nodeName = "test-node" + testNamespace = "test-namespace" + testService = "test-service" + ) + + BeforeEach(func() { + var err error + Expect(config.PrepareTestConfig()).To(Succeed()) + config.Gateway.Mode = config.GatewayModeLocal + config.IPv4Mode = true + config.IPv6Mode = false + _ = nodenft.SetFakeNFTablesHelper() + + fakeClient = &util.OVNNodeClientset{ + KubeClient: fake.NewSimpleClientset(), + } + fakeClient.AdminPolicyRouteClient = adminpolicybasedrouteclient.NewSimpleClientset() + fakeClient.NetworkAttchDefClient = nadfake.NewSimpleClientset() + fakeClient.UserDefinedNetworkClient = udnfakeclient.NewSimpleClientset() + + watcher, err = factory.NewNodeWatchFactory(fakeClient, nodeName) + Expect(err).NotTo(HaveOccurred()) + err = watcher.Start() + Expect(err).NotTo(HaveOccurred()) + + iptV4, iptV6 = util.SetFakeIPTablesHelpers() + npw = initFakeNodePortWatcher(iptV4, iptV6) + npw.watchFactory = watcher + npw.networkManager = networkmanager.Default().Interface() + + k := &kube.Kube{KClient: fakeClient.KubeClient} + npw.nodeIPManager = newAddressManagerInternal(nodeName, k, nil, watcher, nil, false) + }) + + AfterEach(func() { + watcher.Shutdown() + }) + + Context("when namespace has invalid primary network", func() { + It("should skip service sync without failing startup", func() { + service := newService(testService, testNamespace, "10.96.0.20", + []corev1.ServicePort{{ + Name: "http", + Protocol: corev1.ProtocolTCP, + Port: 80, + TargetPort: intstr.FromInt(8080), + NodePort: 30091, + }}, + corev1.ServiceTypeNodePort, nil, corev1.ServiceStatus{}, false, false) + + npw.networkManager = &mockNetworkManagerWithInvalidPrimaryNetworkSkip{} + + err := npw.SyncServices([]interface{}{service}) + Expect(err).NotTo(HaveOccurred()) + + verifyIPTablesRule(iptV4, "10.96.0.20", 80, 30091, false, + "iptables rule should not be created when primary network is invalid") + }) + }) + + Context("when UDN is inactive on this node", func() { + It("should skip service sync without installing rules", func() { + service := newService(testService, testNamespace, "10.96.0.30", + []corev1.ServicePort{{ + Name: "http", + Protocol: corev1.ProtocolTCP, + Port: 80, + TargetPort: intstr.FromInt(8080), + NodePort: 30092, + }}, + corev1.ServiceTypeNodePort, nil, corev1.ServiceStatus{}, false, false) + + npw.networkManager = &mockNetworkManagerWithInactiveNode{} + + err := npw.SyncServices([]interface{}{service}) + Expect(err).NotTo(HaveOccurred()) + + verifyIPTablesRule(iptV4, "10.96.0.30", 80, 30092, false, + "iptables rule should not be created when UDN is inactive on this node") + }) + }) + + Context("when UDN is active on this node", func() { + It("should install nodeport rules", func() { + // Avoid openflow dependency in this test. + config.Gateway.AllowNoUplink = true + npw.ofportPhys = "" + + service := newService(testService, testNamespace, "10.96.0.40", + []corev1.ServicePort{{ + Name: "http", + Protocol: corev1.ProtocolTCP, + Port: 80, + TargetPort: intstr.FromInt(8080), + NodePort: 30093, + }}, + corev1.ServiceTypeNodePort, nil, corev1.ServiceStatus{}, false, false) + + nad := ovntest.GenerateNAD("test-udn", "test-nad", testNamespace, types.Layer3Topology, "10.1.0.0/16", types.NetworkRolePrimary) + netInfo, err := util.ParseNADInfo(nad) + Expect(err).NotTo(HaveOccurred()) + npw.networkManager = &mockNetworkManagerWithActiveUDN{netInfo: netInfo} + + nodeName := npw.nodeIPManager.nodeName + epPortName := "http" + epPortValue := int32(8080) + epPortProtocol := corev1.ProtocolTCP + epSlice := &discovery.EndpointSlice{ + ObjectMeta: metav1.ObjectMeta{ + Name: testService + "ab23", + Namespace: testNamespace, + Labels: map[string]string{ + types.LabelUserDefinedServiceName: testService, + }, + Annotations: map[string]string{ + types.UserDefinedNetworkEndpointSliceAnnotation: netInfo.GetNetworkName(), + }, + }, + AddressType: discovery.AddressTypeIPv4, + Endpoints: []discovery.Endpoint{{ + Addresses: []string{"10.244.0.9"}, + NodeName: &nodeName, + }}, + Ports: []discovery.EndpointPort{{ + Name: &epPortName, + Protocol: &epPortProtocol, + Port: &epPortValue, + }}, + } + Expect(watcher.EndpointSliceInformer().GetStore().Add(epSlice)).To(Succeed()) + + err = npw.SyncServices([]interface{}{service}) + Expect(err).NotTo(HaveOccurred()) + + verifyIPTablesRule(iptV4, "10.96.0.40", 80, 30093, true, + "iptables rule should be created when UDN is active on this node") + }) + }) +}) diff --git a/go-controller/pkg/node/healthcheck_service.go b/go-controller/pkg/node/healthcheck_service.go index dc906f4f1d..30ce4e793b 100644 --- a/go-controller/pkg/node/healthcheck_service.go +++ b/go-controller/pkg/node/healthcheck_service.go @@ -47,6 +47,8 @@ func (l *loadBalancerHealthChecker) AddService(svc *corev1.Service) error { if err := l.server.SyncServices(l.services); err != nil { return fmt.Errorf("unable to sync service %v; err: %v", name, err) } + // we can use CDN here and do not care about UDN because we are just looking for a count + // which will be the same between CDN and UDN epSlices, err := l.watchFactory.GetServiceEndpointSlices(svc.Namespace, svc.Name, types.DefaultNetworkName) if err != nil { return fmt.Errorf("could not fetch endpointslices "+ diff --git a/go-controller/pkg/node/managementport/portDeviceManager.go b/go-controller/pkg/node/managementport/portDeviceManager.go index 5447836298..b4e7d2f593 100644 --- a/go-controller/pkg/node/managementport/portDeviceManager.go +++ b/go-controller/pkg/node/managementport/portDeviceManager.go @@ -2,6 +2,7 @@ package managementport import ( "fmt" + "slices" "sync" "k8s.io/klog/v2" @@ -60,10 +61,22 @@ func (mpdm *MgmtPortDeviceManager) Init() error { // validate the existing management port reservations: for network, annotatedMgmtPortDetails := range annotatedMgmtPortDetailsMap { deviceId := annotatedMgmtPortDetails.DeviceId + allDeviceIDs := mpdm.deviceAllocator.DeviceIDs() + if deviceId != "" && !slices.Contains(allDeviceIDs, deviceId) { + // The device ID from the annotation is no longer available in the + // resource pool. This can happen if the management port is + // re-enumerated at a different PCI address after a host reboot (for + // example, due to changes in DPU firmware settings). + klog.V(5).Infof("Manage port device %s of resource %s for network %s is no longer available, "+ + "ignore DeviceID value from the annotation", deviceId, mpdm.deviceAllocator.ResourceName(), network) + deviceId = "" + } if deviceId == "" { - // this must be legacyManagementPortDetails annotation for default network, try to find its deviceId. - // luckily this is one time thing - allDeviceIDs := mpdm.deviceAllocator.DeviceIDs() + // The device ID may be missing from the annotation (legacy default + // network annotation) or stale (not present in the resource pool). + // In either case, look up the device by PfId and FuncId, assuming + // the device plugin still exposes the same port and that we should + // consume the same VF index. for _, d := range allDeviceIDs { mgmtDetails, err := util.GetNetworkDeviceDetails(d) if err == nil && mgmtDetails.PfId == annotatedMgmtPortDetails.PfId && mgmtDetails.FuncId == annotatedMgmtPortDetails.FuncId { diff --git a/go-controller/pkg/node/managementport/portDeviceManager_test.go b/go-controller/pkg/node/managementport/portDeviceManager_test.go new file mode 100644 index 0000000000..79428672da --- /dev/null +++ b/go-controller/pkg/node/managementport/portDeviceManager_test.go @@ -0,0 +1,201 @@ +package managementport + +import ( + "encoding/json" + "os" + "strings" + + "github.com/stretchr/testify/mock" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes/fake" + + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/deviceresource" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" + kubeMocks "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube/mocks" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" + utilMocks "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util/mocks" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +const testNodeName = "test-node" + +// envVarForResource returns the device-plugin environment variable +// name that corresponds to the given resource name. +func envVarForResource(resourceName string) string { + s := strings.ReplaceAll(resourceName, ".", "_") + s = strings.ReplaceAll(s, "/", "_") + return "PCIDEVICE_" + strings.ToUpper(s) +} + +// setupInitTestEnv creates a DeviceResourceAllocator backed by the +// given PCI IDs list and a NodeWatchFactory whose fake client +// contains a single node with the provided management-port annotation. +// If annotation is nil the node is created without the annotation. +func setupInitTestEnv( + resourceName string, availableDevices []string, + annotation util.NetworkDeviceDetailsMap, +) (*deviceresource.DeviceResourceAllocator, factory.NodeWatchFactory) { + envVarName := envVarForResource(resourceName) + os.Setenv(envVarName, strings.Join(availableDevices, ",")) + DeferCleanup(os.Unsetenv, envVarName) + + allocator, err := deviceresource.DeviceResourceManager().GetDeviceResourceAllocator(resourceName) + Expect(err).NotTo(HaveOccurred()) + + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: testNodeName, + Annotations: map[string]string{}, + }, + } + if annotation != nil { + annotationBytes, err := json.Marshal(annotation) + Expect(err).NotTo(HaveOccurred()) + node.Annotations[util.OvnNodeManagementPort] = string(annotationBytes) + } + + fakeClient := fake.NewSimpleClientset( + &corev1.NodeList{Items: []corev1.Node{*node}}) + fakeNodeClient := &util.OVNNodeClientset{KubeClient: fakeClient} + wf, err := factory.NewNodeWatchFactory(fakeNodeClient, testNodeName) + Expect(err).NotTo(HaveOccurred()) + Expect(wf.Start()).To(Succeed()) + + return allocator, wf +} + +// mockDeviceDetails sets up SriovnetOps mock expectations so that +// util.GetNetworkDeviceDetails(deviceId) returns the given pfId and +// funcId. +func mockDeviceDetails( + sriovMock *utilMocks.SriovnetOps, + deviceId string, pfId, funcId int, +) { + sriovMock.On("GetVfIndexByPciAddress", deviceId).Return(funcId, nil) + sriovMock.On("GetPfIndexByVfPciAddress", deviceId).Return(pfId, nil) +} + +var _ = Describe("MgmtPortDeviceManager tests", func() { + var ( + sriovnetOpsMock *utilMocks.SriovnetOps + origSriovnetOps util.SriovnetOps + kubeMock *kubeMocks.Interface + ) + BeforeEach(func() { + origSriovnetOps = util.GetSriovnetOps() + Expect(config.PrepareTestConfig()).To(Succeed()) + sriovnetOpsMock = &utilMocks.SriovnetOps{} + util.SetSriovnetOpsInst(sriovnetOpsMock) + kubeMock = &kubeMocks.Interface{} + }) + + AfterEach(func() { + util.SetSriovnetOpsInst(origSriovnetOps) + sriovnetOpsMock.AssertExpectations(GinkgoT()) + kubeMock.AssertExpectations(GinkgoT()) + }) + + Context("Init", func() { + It("Succeeds with no management port annotation", func() { + allocator, wf := setupInitTestEnv( + "example.com/pool_no_annotation", + []string{"0000:05:00.0"}, + nil, + ) + DeferCleanup(wf.Shutdown) + mpdm := NewMgmtPortDeviceManager(kubeMock, wf, testNodeName, allocator) + Expect(mpdm.Init()).NotTo(HaveOccurred()) + Expect(mpdm.mgmtPortDetails).To(BeEmpty()) + }) + It("Restores valid DeviceId with matching PfId/FuncId", func() { + const ( + device0 = "0000:03:00.0" + device1 = "0000:03:00.1" + ) + allocator, wf := setupInitTestEnv( + "example.com/pool_valid_restore", + []string{device0, device1}, + util.NetworkDeviceDetailsMap{ + "default": {DeviceId: device0, PfId: 0, FuncId: 4}, + }, + ) + DeferCleanup(wf.Shutdown) + mockDeviceDetails(sriovnetOpsMock, device0, 0, 4) + mpdm := NewMgmtPortDeviceManager(kubeMock, wf, testNodeName, allocator) + Expect(mpdm.Init()).NotTo(HaveOccurred()) + Expect(mpdm.mgmtPortDetails["default"].DeviceId).To(Equal(device0)) + }) + It("Restores legacy annotation without DeviceId by PfId/FuncId match", func() { + const ( + device0 = "0000:04:00.0" + device1 = "0000:04:00.1" + ) + allocator, wf := setupInitTestEnv( + "example.com/pool_legacy_restore", + []string{device0, device1}, + util.NetworkDeviceDetailsMap{"default": {PfId: 0, FuncId: 4}}, + ) + DeferCleanup(wf.Shutdown) + mockDeviceDetails(sriovnetOpsMock, device0, 0, 4) + + kubeMock.On("SetAnnotationsOnNode", testNodeName, + mock.Anything).Return(nil).Once() + + mpdm := NewMgmtPortDeviceManager(kubeMock, wf, testNodeName, allocator) + Expect(mpdm.Init()).NotTo(HaveOccurred()) + Expect(mpdm.mgmtPortDetails["default"].DeviceId). + To(Equal(device0)) + }) + It("Recovers by PfId/FuncId when annotated DeviceId is stale", func() { + const ( + staleDevice = "0000:01:01.0" + matchDevice = "0000:01:00.0" + otherDevice = "0000:01:00.1" + ) + allocator, wf := setupInitTestEnv( + "example.com/pool_stale_recovery", + []string{matchDevice, otherDevice}, + util.NetworkDeviceDetailsMap{ + "default": {DeviceId: staleDevice, PfId: 3, FuncId: 5}, + }, + ) + DeferCleanup(wf.Shutdown) + mockDeviceDetails(sriovnetOpsMock, matchDevice, 3, 5) + kubeMock.On("SetAnnotationsOnNode", testNodeName, + mock.Anything).Return(nil).Once() + + mpdm := NewMgmtPortDeviceManager(kubeMock, wf, testNodeName, allocator) + Expect(mpdm.Init()).NotTo(HaveOccurred()) + Expect(mpdm.mgmtPortDetails["default"].DeviceId). + To(Equal(matchDevice)) + }) + It("Fails when no PfId/FuncId match after ignoring stale DeviceId", func() { + const ( + staleDevice = "0000:02:01.0" + device1 = "0000:02:00.0" + device2 = "0000:02:00.1" + ) + allocator, wf := setupInitTestEnv( + "example.com/pool_stale_no_match", + []string{device1, device2}, + util.NetworkDeviceDetailsMap{ + "default": {DeviceId: staleDevice, PfId: 3, FuncId: 5}, + }, + ) + DeferCleanup(wf.Shutdown) + // Neither device matches PfId=3, FuncId=5 + mockDeviceDetails(sriovnetOpsMock, device1, 1, 1) + mockDeviceDetails(sriovnetOpsMock, device2, 2, 2) + + mpdm := NewMgmtPortDeviceManager(kubeMock, wf, testNodeName, allocator) + err := mpdm.Init() + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("failed to find match manage port device")) + }) + }) +}) diff --git a/go-controller/pkg/node/nftables/helpers.go b/go-controller/pkg/node/nftables/helpers.go index 3e8ed11ff4..07873378d6 100644 --- a/go-controller/pkg/node/nftables/helpers.go +++ b/go-controller/pkg/node/nftables/helpers.go @@ -28,7 +28,7 @@ func SetFakeNFTablesHelper() *knftables.Fake { // called, it will create a "real" knftables.Interface func GetNFTablesHelper() (knftables.Interface, error) { if nftHelper == nil { - nft, err := knftables.New(knftables.InetFamily, OVNKubernetesNFTablesName) + nft, err := knftables.New(knftables.InetFamily, OVNKubernetesNFTablesName, knftables.RequireDestroy) if err != nil { return nil, err } diff --git a/go-controller/pkg/node/nftables/testing.go b/go-controller/pkg/node/nftables/testing.go index ad377caeca..ba42069e25 100644 --- a/go-controller/pkg/node/nftables/testing.go +++ b/go-controller/pkg/node/nftables/testing.go @@ -5,34 +5,68 @@ package nftables import ( "fmt" + "slices" + "sort" "strings" "k8s.io/apimachinery/pkg/util/sets" ) // MatchNFTRules checks that the expected nftables rules match the actual ones, ignoring -// order. +// order and extra whitespace. func MatchNFTRules(expected, actual string) error { - expectedSet := sets.New(strings.Split(expected, "\n")...) - actualSet := sets.New(strings.Split(actual, "\n")...) - - // ignore blank lines - expectedSet.Delete("") - actualSet.Delete("") - - missing := expectedSet.Difference(actualSet) - extra := actualSet.Difference(expectedSet) - + missing, extra := diffNFTRules(expected, actual) if len(missing) == 0 && len(extra) == 0 { return nil } msg := "nftables rule mismatch:" if len(missing) > 0 { - msg += fmt.Sprintf("\nMissing rules: %v\n", missing.UnsortedList()) + msg += fmt.Sprintf("\nRules missing from `nft dump ruleset`:\n%s\n", strings.Join(missing, "\n")) } if len(extra) > 0 { - msg += fmt.Sprintf("\nExtra rules: %v\n", extra.UnsortedList()) + msg += fmt.Sprintf("\nUnexpected extra rules in `nft dump ruleset`:\n%s\n", strings.Join(extra, "\n")) } return fmt.Errorf("%s", msg) } + +// helper function, for ease of unit testing +func diffNFTRules(expected, actual string) (missing, extra []string) { + expectedLines := strings.Split(expected, "\n") + expectedSet := sets.New[string]() + for _, line := range expectedLines { + line = strings.TrimSpace(line) + if line != "" { + expectedSet.Insert(line) + } + } + + actualLines := strings.Split(actual, "\n") + actualSet := sets.New[string]() + for _, line := range actualLines { + line = strings.TrimSpace(line) + if line != "" { + actualSet.Insert(line) + } + } + + missingSet := expectedSet.Difference(actualSet) + extraSet := actualSet.Difference(expectedSet) + + // While we ignore order for purposes of the comparison, it's confusing to output + // the missing/extra rules in essentially random order (and makes it harder to see + // what the problem is in cases like "the rules are basically correct, except that + // they have the wrong IP"). So we sort the `missing` rules back into the same + // order as they appeared in `expected`, and the `extra` rules into the same order + // as they appeared in `actual`. + missingSorted := missingSet.UnsortedList() + sort.Slice(missingSorted, func(i, j int) bool { + return slices.Index(expectedLines, missingSorted[i]) < slices.Index(expectedLines, missingSorted[j]) + }) + extraSorted := extraSet.UnsortedList() + sort.Slice(extraSorted, func(i, j int) bool { + return slices.Index(actualLines, extraSorted[i]) < slices.Index(actualLines, extraSorted[j]) + }) + + return missingSorted, extraSorted +} diff --git a/go-controller/pkg/node/nftables/testing_test.go b/go-controller/pkg/node/nftables/testing_test.go new file mode 100644 index 0000000000..d0ce907d23 --- /dev/null +++ b/go-controller/pkg/node/nftables/testing_test.go @@ -0,0 +1,86 @@ +//go:build linux +// +build linux + +package nftables + +import ( + "reflect" + "testing" +) + +func Test_diffNFTRules(t *testing.T) { + for _, tc := range []struct { + name string + expected string + actual string + missing []string + extra []string + }{ + { + name: "empty match", + expected: "", + actual: "", + missing: []string{}, + extra: []string{}, + }, + { + name: "non-empty match", + expected: "line one\nline two\nline three\n", + actual: "line three\nline one\nline two\n", + missing: []string{}, + extra: []string{}, + }, + { + name: "match with extra whitespace", + expected: " line one\n line two\n line three\n", + actual: "\nline three\nline one\nline two\n\n", + missing: []string{}, + extra: []string{}, + }, + { + name: "missing lines", + expected: "line one\nline two\nline three\nline four\n", + actual: "line two\nline four\n", + missing: []string{"line one", "line three"}, + extra: []string{}, + }, + { + name: "missing lines, alternate order", + expected: "line one\nline two\nline three\nline four\n", + actual: "line four\nline two\n", + missing: []string{"line one", "line three"}, + extra: []string{}, + }, + { + name: "extra lines", + expected: "line two\nline four\n", + actual: "line one\nline two\nline three\nline four\n", + missing: []string{}, + extra: []string{"line one", "line three"}, + }, + { + name: "extra lines, alternate order", + expected: "line four\nline two\n", + actual: "line one\nline two\nline three\nline four\n", + missing: []string{}, + extra: []string{"line one", "line three"}, + }, + { + name: "missing and extra lines, inconsistent whitespace", + expected: " line one\n line two\n line three\n", + actual: " line two\n line two-and-a-half\nline three", + missing: []string{"line one"}, + extra: []string{"line two-and-a-half"}, + }, + } { + t.Run(tc.name, func(t *testing.T) { + missing, extra := diffNFTRules(tc.expected, tc.actual) + if !reflect.DeepEqual(tc.missing, missing) { + t.Errorf("expected missing=%#v, got %#v", tc.missing, missing) + } + if !reflect.DeepEqual(tc.extra, extra) { + t.Errorf("expected extra=%#v, got %#v", tc.extra, extra) + } + }) + } +} diff --git a/go-controller/pkg/node/nftables/util.go b/go-controller/pkg/node/nftables/util.go index 1a4a3bdd21..ce14186e9f 100644 --- a/go-controller/pkg/node/nftables/util.go +++ b/go-controller/pkg/node/nftables/util.go @@ -34,10 +34,7 @@ func DeleteNFTElements(elements []*knftables.Element) error { tx := nft.NewTransaction() for _, elem := range elements { - // We add+delete the elements, rather than just deleting them, so that if - // they weren't already in the set/map, we won't get an error on delete. - tx.Add(elem) - tx.Delete(elem) + tx.Destroy(elem) } return nft.Run(context.TODO(), tx) } diff --git a/go-controller/pkg/node/node_ip_handler_linux.go b/go-controller/pkg/node/node_ip_handler_linux.go index dda4e69da0..d46c758780 100644 --- a/go-controller/pkg/node/node_ip_handler_linux.go +++ b/go-controller/pkg/node/node_ip_handler_linux.go @@ -12,6 +12,7 @@ import ( "github.com/vishvananda/netlink" + corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/client-go/tools/cache" "k8s.io/klog/v2" @@ -96,10 +97,10 @@ func (c *addressManager) addAddr(ipnet net.IPNet, linkIndex int) bool { // removes IP from address manager // returns true if there was an update -func (c *addressManager) delAddr(ipnet net.IPNet, linkIndex int) bool { +func (c *addressManager) delAddr(ipnet net.IPNet) bool { c.Lock() defer c.Unlock() - if c.cidrs.Has(ipnet.String()) && c.isValidNodeIP(ipnet.IP, linkIndex) { + if c.cidrs.Has(ipnet.String()) { klog.Infof("Removing IP: %s, from node IP manager", ipnet) c.cidrs.Delete(ipnet.String()) return true @@ -134,7 +135,7 @@ func (c *addressManager) Run(stopChan <-chan struct{}, doneWg *sync.WaitGroup) { return } - c.addHandlerForPrimaryAddrChange() + c.addHandlerForAddrChange() doneWg.Add(1) go func() { c.runInternal(stopChan, c.getNetlinkAddrSubFunc(stopChan)) @@ -172,7 +173,7 @@ func (c *addressManager) runInternal(stopChan <-chan struct{}, subscribe subscri if a.NewAddr { addrChanged = c.addAddr(a.LinkAddress, a.LinkIndex) } else { - addrChanged = c.delAddr(a.LinkAddress, a.LinkIndex) + addrChanged = c.delAddr(a.LinkAddress) } c.handleNodePrimaryAddrChange() @@ -218,14 +219,24 @@ func (c *addressManager) getNetlinkAddrSubFunc(stopChan <-chan struct{}) func() } } -// addHandlerForPrimaryAddrChange handles reconfiguration of a node primary IP address change -func (c *addressManager) addHandlerForPrimaryAddrChange() { +// addHandlerForAddrChange handles reconfiguration of a node primary IP address change or egress IP annotation changes +func (c *addressManager) addHandlerForAddrChange() { // Add an event handler to the node informer. This is needed for cases where users first update the node's IP // address but only later update kubelet configuration and restart kubelet (which in turn will update the reported // IP address inside the node's status field). + // It is also needed to cover gaps when the egress IPs are updated in annotations, in order to + // maintain a consistent host-cidrs set, without stale Egress IPs. nodeInformer := c.watchFactory.NodeInformer() _, err := nodeInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ - UpdateFunc: func(_, _ interface{}) { + UpdateFunc: func(oldObj, newObj interface{}) { + oldNode, oldOK := oldObj.(*corev1.Node) + newNode, newOK := newObj.(*corev1.Node) + if oldOK && newOK && newNode.Name == c.nodeName && nodeEgressIPAnnotationsChanged(oldNode, newNode) { + klog.V(5).Infof("Node %s egress IP annotations changed, syncing node IP manager", c.nodeName) + c.sync() + // c.sync() already calls c.handleNodePrimaryAddrChange, so safe to return + return + } c.handleNodePrimaryAddrChange() }, }) @@ -234,6 +245,20 @@ func (c *addressManager) addHandlerForPrimaryAddrChange() { } } +func nodeEgressIPAnnotationsChanged(oldNode, newNode *corev1.Node) bool { + if oldNode == nil || newNode == nil { + return false + } + for _, key := range []string{util.OVNNodeSecondaryHostEgressIPs, util.OVNNodeBridgeEgressIPs} { + oldVal, oldSet := oldNode.Annotations[key] + newVal, newSet := newNode.Annotations[key] + if oldSet != newSet || oldVal != newVal { + return true + } + } + return false +} + // updates OVN's EncapIP if the node IP changed func (c *addressManager) handleNodePrimaryAddrChange() { c.Lock() @@ -381,8 +406,11 @@ func (c *addressManager) nodePrimaryAddrChanged() (bool, error) { return true, nil } -// detects if the IP is valid for a node -// excludes things like local IPs, mgmt port ip, special masquerade IP and Egress IPs for non-ovs type interfaces +// isValidNodeIP detects if the IP is valid for a node. +// It excludes things like local IPs, mgmt port ip, special masquerade IP and Egress IPs +// for non-ovs type interfaces. +// Note, it possible that the node annotations may not be up to date when this check is executed. +// For this reason, sync is triggered on annotation change via addHandlerForAddrChange. func (c *addressManager) isValidNodeIP(addr net.IP, linkIndex int) bool { if addr == nil { return false diff --git a/go-controller/pkg/node/node_ip_handler_linux_test.go b/go-controller/pkg/node/node_ip_handler_linux_test.go index c78307cca1..fd549ed7df 100644 --- a/go-controller/pkg/node/node_ip_handler_linux_test.go +++ b/go-controller/pkg/node/node_ip_handler_linux_test.go @@ -170,6 +170,54 @@ var _ = Describe("Node IP Handler event tests", func() { }) }) +var _ = Describe("Node IP Handler helper tests", func() { + const nodeName = "node1" + + It("removes cached IPs even when they are no longer valid node IPs", func() { + Expect(config.PrepareTestConfig()).To(Succeed()) + tc := configureKubeOVNContext(nodeName, false) + defer tc.watchFactory.Shutdown() + + tc.ipManager.Lock() + tc.ipManager.cidrs.Insert(tc.mgmtPortIP4.String()) + tc.ipManager.Unlock() + + Expect(tc.ipManager.delAddr(*tc.mgmtPortIP4)).To(BeTrue()) + _, networks := tc.ipManager.ListAddresses() + Expect(networks).To(BeEmpty()) + }) + + It("syncs stale host-cidrs when egress IP annotations change", func() { + Expect(config.PrepareTestConfig()).To(Succeed()) + tc := configureKubeOVNContext(nodeName, false) + defer tc.watchFactory.Shutdown() + + tc.ipManager.addHandlerForAddrChange() + + staleEIP := "2001:db8:abcd:1234:c001::" + node, err := tc.fakeClient.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + + nodeToUpdate := node.DeepCopy() + nodeToUpdate.Annotations[util.OVNNodeHostCIDRs] = fmt.Sprintf("[\"%s\", \"%s\", \"%s/128\"]", "10.1.1.10/24", "2001:db8::10/64", staleEIP) + nodeToUpdate.Annotations[util.OVNNodeSecondaryHostEgressIPs] = fmt.Sprintf("[\"%s\"]", staleEIP) + _, err = tc.fakeClient.CoreV1().Nodes().Update(context.TODO(), nodeToUpdate, metav1.UpdateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + Eventually(func() bool { + updatedNode, err := tc.fakeClient.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{}) + if err != nil { + return false + } + hostIPs, err := util.ParseNodeHostCIDRsDropNetMask(updatedNode) + if err != nil { + return false + } + return !hostIPs.Has(staleEIP) + }, 5).Should(BeTrue()) + }) +}) + var _ = Describe("Node IP Handler tests", func() { // To ensure that variables don't leak between parallel Ginkgo specs, // put all test context into a single struct and reference it via diff --git a/go-controller/pkg/node/openflow_manager.go b/go-controller/pkg/node/openflow_manager.go index b55fff21cd..d2dc2eb82f 100644 --- a/go-controller/pkg/node/openflow_manager.go +++ b/go-controller/pkg/node/openflow_manager.go @@ -119,34 +119,40 @@ func (c *openflowManager) requestFlowSync() { func (c *openflowManager) syncFlows() { c.flowMutex.Lock() - defer c.flowMutex.Unlock() - - flows := []string{} - for _, entry := range c.flowCache { - flows = append(flows, entry...) - } + flows := flattenFlowCacheEntries(c.flowCache) + c.flowMutex.Unlock() _, stderr, err := util.ReplaceOFFlows(c.defaultBridge.GetBridgeName(), flows) if err != nil { - klog.Errorf("Failed to add flows, error: %v, stderr, %s, flows: %s", err, stderr, c.flowCache) + klog.Errorf("Failed to add flows for bridge %s, error: %v, stderr, %s, flow count: %d", + c.defaultBridge.GetBridgeName(), err, stderr, len(flows)) } if c.externalGatewayBridge != nil { c.exGWFlowMutex.Lock() - defer c.exGWFlowMutex.Unlock() - - flows := []string{} - for _, entry := range c.exGWFlowCache { - flows = append(flows, entry...) - } + exGWFlows := flattenFlowCacheEntries(c.exGWFlowCache) + c.exGWFlowMutex.Unlock() - _, stderr, err := util.ReplaceOFFlows(c.externalGatewayBridge.GetBridgeName(), flows) + _, stderr, err := util.ReplaceOFFlows(c.externalGatewayBridge.GetBridgeName(), exGWFlows) if err != nil { - klog.Errorf("Failed to add flows, error: %v, stderr, %s, flows: %s", err, stderr, c.exGWFlowCache) + klog.Errorf("Failed to add flows for bridge %s, error: %v, stderr, %s, flow count: %d", + c.externalGatewayBridge.GetBridgeName(), err, stderr, len(exGWFlows)) } } } +func flattenFlowCacheEntries(flowCache map[string][]string) []string { + flowCount := 0 + for _, entry := range flowCache { + flowCount += len(entry) + } + flows := make([]string, 0, flowCount) + for _, entry := range flowCache { + flows = append(flows, entry...) + } + return flows +} + // since we share the host's k8s node IP, add OpenFlow flows // -- to steer the NodePort traffic arriving on the host to the OVN logical topology and // -- to also connection track the outbound north-south traffic through l3 gateway so that diff --git a/go-controller/pkg/ovn/base_network_controller.go b/go-controller/pkg/ovn/base_network_controller.go index f67f3b3972..4620b7bb69 100644 --- a/go-controller/pkg/ovn/base_network_controller.go +++ b/go-controller/pkg/ovn/base_network_controller.go @@ -13,7 +13,6 @@ import ( corev1 "k8s.io/api/core/v1" knet "k8s.io/api/networking/v1" - apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/sets" clientset "k8s.io/client-go/kubernetes" @@ -125,6 +124,9 @@ type BaseNetworkController struct { // A cache of all logical ports known to the controller logicalPortCache *PortCache + // optional callback for consumers that need to react when a pod's logical + // port info is inserted/refreshed in logicalPortCache. + onLogicalPortCacheAdd func(pod *corev1.Pod, nadKey string) // Info about known namespaces. You must use oc.getNamespaceLocked() or // oc.waitForNamespaceLocked() to read this map, and oc.createNamespaceLocked() @@ -338,12 +340,6 @@ func (oc *BaseUserDefinedNetworkController) shouldFilterNamespace(namespace stri nadKey, err := oc.networkManager.GetPrimaryNADForNamespace(namespace) if err != nil { - if util.IsUnprocessedActiveNetworkError(err) { - return false - } - if util.IsInvalidPrimaryNetworkError(err) { - return true - } return false } if nadKey == types.DefaultNetworkName { @@ -1047,7 +1043,7 @@ func (bnc *BaseNetworkController) GetNetworkRole(pod *corev1.Pod) (string, error pod, ) if err != nil { - if util.IsUnprocessedActiveNetworkError(err) { + if util.IsInvalidPrimaryNetworkError(err) { bnc.recordPodErrorEvent(pod, err) } return "", err @@ -1168,11 +1164,39 @@ func (bnc *BaseNetworkController) AddResourceCommon(objType reflect.Type, obj in if !ok { return fmt.Errorf("could not cast %T object to *knet.NetworkPolicy", obj) } - netinfo, err := bnc.networkManager.GetActiveNetworkForNamespace(np.Namespace) + foundNamespaceNAD, err := bnc.networkManager.GetPrimaryNADForNamespace(np.Namespace) + if err != nil { + // If this is a UDN namespace that hasn't been processed yet, the default + // controller should skip it while UDN controllers should retry. + if bnc.GetNetworkName() == types.DefaultNetworkName && util.IsInvalidPrimaryNetworkError(err) { + return nil + } + // Retry until the NAD controller has processed the primary NAD for this namespace. + return fmt.Errorf("could not get primary network NAD for namespace %s: %v", np.Namespace, err) + } + if foundNamespaceNAD == types.DefaultNetworkName { + // Only the default network controller should handle policies in default namespaces. + if bnc.GetNetworkName() != types.DefaultNetworkName { + return nil + } + } else { + networkName := bnc.networkManager.GetNetworkNameForNADKey(foundNamespaceNAD) + if networkName == "" { + return fmt.Errorf("no primary network found for namespace %s", np.Namespace) + } + if bnc.GetNetworkName() != networkName { + return nil + } + } + netInfo, err := bnc.networkManager.GetActiveNetworkForNamespace(np.Namespace) if err != nil { return fmt.Errorf("could not get active network for namespace %s: %v", np.Namespace, err) } - if bnc.GetNetworkName() != netinfo.GetNetworkName() { + if netInfo == nil { + // no active network, nothing to do + return nil + } + if bnc.GetNetworkName() != netInfo.GetNetworkName() { return nil } if err := bnc.addNetworkPolicy(np); err != nil { @@ -1193,15 +1217,6 @@ func (bnc *BaseNetworkController) DeleteResourceCommon(objType reflect.Type, obj if !ok { return fmt.Errorf("could not cast obj of type %T to *knet.NetworkPolicy", obj) } - netinfo, err := bnc.networkManager.GetActiveNetworkForNamespace(knp.Namespace) - // The InvalidPrimaryNetworkError is returned when the UDN is not found because it has already been deleted, - // while the NotFound error occurs when the namespace no longer exists. In both cases, proceed with deleting the NetworkPolicy. - if err != nil && !util.IsInvalidPrimaryNetworkError(err) && !apierrors.IsNotFound(err) { - return fmt.Errorf("could not get active network for namespace %s: %w", knp.Namespace, err) - } - if err == nil && bnc.GetNetworkName() != netinfo.GetNetworkName() { - return nil - } return bnc.deleteNetworkPolicy(knp) default: klog.Errorf("Can not process delete resource event, object type %s is not supported", objType) diff --git a/go-controller/pkg/ovn/base_network_controller_policy.go b/go-controller/pkg/ovn/base_network_controller_policy.go index 79a46449ae..5507c23bc0 100644 --- a/go-controller/pkg/ovn/base_network_controller_policy.go +++ b/go-controller/pkg/ovn/base_network_controller_policy.go @@ -35,7 +35,10 @@ const ( // netpolDefaultDenyACLType is used to distinguish default deny and arp allow acls create for the same port group defaultDenyACL netpolDefaultDenyACLType = "defaultDeny" arpAllowACL netpolDefaultDenyACLType = "arpAllow" + icmpAllowACL netpolDefaultDenyACLType = "icmpAllow" + // icmpAllowPolicyMatch is the match used when creating default allow ICMP and ICMPv6 ACLs for a namespace + icmpAllowPolicyMatch = "(icmp || icmp6)" // arpAllowPolicyMatch is the match used when creating default allow ARP ACLs for a namespace arpAllowPolicyMatch = "(arp || nd)" allowHairpinningACLID = "allow-hairpinning" @@ -383,16 +386,22 @@ func (bnc *BaseNetworkController) defaultDenyPortGroupName(namespace string, acl } func (bnc *BaseNetworkController) buildDenyACLs(namespace, pgName string, aclLogging *libovsdbutil.ACLLoggingLevels, - aclDir libovsdbutil.ACLDirection) (denyACL, allowACL *nbdb.ACL) { + aclDir libovsdbutil.ACLDirection) []*nbdb.ACL { denyMatch := libovsdbutil.GetACLMatch(pgName, "", aclDir) - allowMatch := libovsdbutil.GetACLMatch(pgName, arpAllowPolicyMatch, aclDir) + allowARPMatch := libovsdbutil.GetACLMatch(pgName, arpAllowPolicyMatch, aclDir) aclPipeline := libovsdbutil.ACLDirectionToACLPipeline(aclDir) - denyACL = libovsdbutil.BuildACLWithDefaultTier(bnc.getDefaultDenyPolicyACLIDs(namespace, aclDir, defaultDenyACL), - types.DefaultDenyPriority, denyMatch, nbdb.ACLActionDrop, aclLogging, aclPipeline) - allowACL = libovsdbutil.BuildACLWithDefaultTier(bnc.getDefaultDenyPolicyACLIDs(namespace, aclDir, arpAllowACL), - types.DefaultAllowPriority, allowMatch, nbdb.ACLActionAllow, nil, aclPipeline) - return + acls := make([]*nbdb.ACL, 0, 3) + acls = append(acls, libovsdbutil.BuildACLWithDefaultTier(bnc.getDefaultDenyPolicyACLIDs(namespace, aclDir, defaultDenyACL), + types.DefaultDenyPriority, denyMatch, nbdb.ACLActionDrop, aclLogging, aclPipeline)) + acls = append(acls, libovsdbutil.BuildACLWithDefaultTier(bnc.getDefaultDenyPolicyACLIDs(namespace, aclDir, arpAllowACL), + types.DefaultAllowPriority, allowARPMatch, nbdb.ACLActionAllow, nil, aclPipeline)) + if config.OVNKubernetesFeature.AllowICMPNetworkPolicy { + allowICMPMatch := libovsdbutil.GetACLMatch(pgName, icmpAllowPolicyMatch, aclDir) + acls = append(acls, libovsdbutil.BuildACLWithDefaultTier(bnc.getDefaultDenyPolicyACLIDs(namespace, aclDir, icmpAllowACL), + types.DefaultAllowPriority, allowICMPMatch, nbdb.ACLActionAllow, nil, aclPipeline)) + } + return acls } func (bnc *BaseNetworkController) addPolicyToDefaultPortGroups(np *networkPolicy, aclLogging *libovsdbutil.ACLLoggingLevels) error { @@ -439,17 +448,18 @@ func (bnc *BaseNetworkController) delPolicyFromDefaultPortGroups(np *networkPoli func (bnc *BaseNetworkController) createDefaultDenyPGAndACLs(namespace, policy string, aclLogging *libovsdbutil.ACLLoggingLevels) error { ingressPGIDs := bnc.getDefaultDenyPolicyPortGroupIDs(namespace, libovsdbutil.ACLIngress) ingressPGName := libovsdbutil.GetPortGroupName(ingressPGIDs) - ingressDenyACL, ingressAllowACL := bnc.buildDenyACLs(namespace, ingressPGName, aclLogging, libovsdbutil.ACLIngress) + ingressACLs := bnc.buildDenyACLs(namespace, ingressPGName, aclLogging, libovsdbutil.ACLIngress) egressPGIDs := bnc.getDefaultDenyPolicyPortGroupIDs(namespace, libovsdbutil.ACLEgress) egressPGName := libovsdbutil.GetPortGroupName(egressPGIDs) - egressDenyACL, egressAllowACL := bnc.buildDenyACLs(namespace, egressPGName, aclLogging, libovsdbutil.ACLEgress) - ops, err := libovsdbops.CreateOrUpdateACLsOps(bnc.nbClient, nil, bnc.GetSamplingConfig(), ingressDenyACL, ingressAllowACL, egressDenyACL, egressAllowACL) + egressACLs := bnc.buildDenyACLs(namespace, egressPGName, aclLogging, libovsdbutil.ACLEgress) + allACLs := append(ingressACLs, egressACLs...) + ops, err := libovsdbops.CreateOrUpdateACLsOps(bnc.nbClient, nil, bnc.GetSamplingConfig(), allACLs...) if err != nil { return err } - ingressPG := libovsdbutil.BuildPortGroup(ingressPGIDs, nil, []*nbdb.ACL{ingressDenyACL, ingressAllowACL}) - egressPG := libovsdbutil.BuildPortGroup(egressPGIDs, nil, []*nbdb.ACL{egressDenyACL, egressAllowACL}) + ingressPG := libovsdbutil.BuildPortGroup(ingressPGIDs, nil, ingressACLs) + egressPG := libovsdbutil.BuildPortGroup(egressPGIDs, nil, egressACLs) ops, err = libovsdbops.CreateOrUpdatePortGroupsOps(bnc.nbClient, ops, ingressPG, egressPG) if err != nil { return err @@ -1307,7 +1317,6 @@ func (bnc *BaseNetworkController) deleteNetworkPolicy(policy *knet.NetworkPolicy err := bnc.networkPolicies.DoWithLock(npKey, func(npKey string) error { np, ok := bnc.networkPolicies.Load(npKey) if !ok { - klog.Infof("Deleting policy %s that is already deleted", npKey) return nil } if err := bnc.cleanupNetworkPolicy(np); err != nil { diff --git a/go-controller/pkg/ovn/base_network_controller_user_defined.go b/go-controller/pkg/ovn/base_network_controller_user_defined.go index 238daee738..c5481ef265 100644 --- a/go-controller/pkg/ovn/base_network_controller_user_defined.go +++ b/go-controller/pkg/ovn/base_network_controller_user_defined.go @@ -274,7 +274,11 @@ func (bsnc *BaseUserDefinedNetworkController) ensurePodForUserDefinedNetwork(pod } activeNetwork, err = bsnc.networkManager.GetActiveNetworkForNamespace(pod.Namespace) if err != nil { - return fmt.Errorf("failed looking for the active network at namespace '%s': %w", pod.Namespace, err) + return fmt.Errorf("failed to find active network for pod %s/%s: %w", pod.Namespace, pod.Name, err) + } + if activeNetwork == nil { + // no active network, pod doesn't belong to our controller + return nil } } @@ -422,6 +426,9 @@ func (bsnc *BaseUserDefinedNetworkController) addLogicalPortToNetworkForNAD(pod if lsp != nil { _ = bsnc.logicalPortCache.add(pod, switchName, nadKey, lsp.UUID, podAnnotation.MAC, podAnnotation.IPs) + if bsnc.onLogicalPortCacheAdd != nil { + bsnc.onLogicalPortCacheAdd(pod, nadKey) + } if bsnc.requireDHCP(pod) { if err := bsnc.ensureDHCP(pod, podAnnotation, lsp); err != nil { return err @@ -624,29 +631,18 @@ func (bsnc *BaseUserDefinedNetworkController) syncPodsForUserDefinedNetwork(pods var activeNetwork util.NetInfo var err error if bsnc.IsPrimaryNetwork() { - // check to see if the primary NAD is even applicable to our controller - foundNamespaceNAD, err := bsnc.networkManager.GetPrimaryNADForNamespace(pod.Namespace) + activeNetwork, err = bsnc.networkManager.GetActiveNetworkForNamespace(pod.Namespace) if err != nil { - return fmt.Errorf("failed to get primary network namespace NAD: %w", err) + return fmt.Errorf("failed to find the active network for pod %s/%s: %w", pod.Namespace, pod.Name, err) } - if foundNamespaceNAD == types.DefaultNetworkName { + if activeNetwork == nil || activeNetwork.IsDefault() { + // no active network for pod, or is a default network pod continue } - networkName := bsnc.networkManager.GetNetworkNameForNADKey(foundNamespaceNAD) - if networkName != "" && networkName != bsnc.GetNetworkName() { + if activeNetwork.GetNetworkName() != bsnc.GetNetworkName() { + // network name found but doesn't apply to our controller continue } - activeNetwork, err = bsnc.networkManager.GetActiveNetworkForNamespace(pod.Namespace) - if err != nil { - if apierrors.IsNotFound(err) { - // namespace is gone after we listed this pod, that means the pod no longer exists - // we don't need to preserve it's previously allocated IP address or logical switch port - klog.Infof("%s network controller pod sync: pod %s/%s namespace has been deleted, ignoring pod", - bsnc.GetNetworkName(), pod.Namespace, pod.Name) - continue - } - return fmt.Errorf("failed looking for the active network at namespace '%s': %w", pod.Namespace, err) - } } on, networkMap, err := util.GetPodNADToNetworkMappingWithActiveNetwork( @@ -823,6 +819,75 @@ func (bsnc *BaseUserDefinedNetworkController) WatchMultiNetworkPolicy() error { return nil } +// cleanupGatewayRoutersForNetworkFromDB discovers all gateway routers for the given network from +// the NB DB (by ExternalIDs and GWRouterPrefix) and cleans each one via a dummy GatewayManager. +// Used when gateway managers are empty (e.g. dummy controller or stale cleanup) so cleanup works +// even when nodes are gone. +func cleanupGatewayRoutersForNetworkFromDB( + nbClient libovsdbclient.Client, + netInfo util.NetInfo, + clusterRouterName, joinSwitchName string, +) error { + var errs []error + networkName := netInfo.GetNetworkName() + pred := func(lr *nbdb.LogicalRouter) bool { + return lr.ExternalIDs[types.NetworkExternalID] == networkName && + strings.HasPrefix(lr.Name, types.GWRouterPrefix) + } + routers, err := libovsdbops.FindLogicalRoutersWithPredicate(nbClient, pred) + if err != nil { + return fmt.Errorf("failed to find gateway routers for network %s: %w", networkName, err) + } + layer2UseTransitRouter := netInfo.TopologyType() == types.Layer2Topology && config.Layer2UsesTransitRouter + for _, lr := range routers { + nodeName := netInfo.RemoveNetworkScopeFromName(util.GetWorkerFromGatewayRouter(lr.Name)) + gw := NewGatewayManagerForCleanup(nbClient, netInfo, clusterRouterName, joinSwitchName, lr.Name, nodeName, layer2UseTransitRouter) + if err := gw.Cleanup(); err != nil { + errs = append(errs, fmt.Errorf("failed to cleanup gateway router %s for network %q (node %s): %w", lr.Name, networkName, nodeName, err)) + } + } + return utilerrors.Join(errs...) +} + +// cleanupLoadBalancerGroups removes load balancer groups for a user-defined network controller. +// When LB group UUIDs are known (normal controller), they are deleted directly by UUID. +// Otherwise (dummy/stale cleanup controller), the groups are looked up by network-scoped name. +func cleanupLoadBalancerGroups( + nbClient libovsdbclient.Client, + netInfo util.NetInfo, + switchLBGroupUUID, clusterLBGroupUUID, routerLBGroupUUID string, +) { + networkName := netInfo.GetNetworkName() + if switchLBGroupUUID != "" || clusterLBGroupUUID != "" || routerLBGroupUUID != "" { + lbGroups := make([]*nbdb.LoadBalancerGroup, 0, 3) + for _, lbGroupUUID := range []string{switchLBGroupUUID, clusterLBGroupUUID, routerLBGroupUUID} { + if lbGroupUUID != "" { + lbGroups = append(lbGroups, &nbdb.LoadBalancerGroup{UUID: lbGroupUUID}) + } + } + if err := libovsdbops.DeleteLoadBalancerGroups(nbClient, lbGroups); err != nil { + klog.Errorf("Failed to delete load balancer groups on network: %q, error: %v", networkName, err) + } + return + } + // Dummy controller (e.g. stale UDN cleanup): find LB groups by network-scoped name and delete them + names := map[string]bool{ + netInfo.GetNetworkScopedLoadBalancerGroupName(types.ClusterLBGroupName): true, + netInfo.GetNetworkScopedLoadBalancerGroupName(types.ClusterSwitchLBGroupName): true, + netInfo.GetNetworkScopedLoadBalancerGroupName(types.ClusterRouterLBGroupName): true, + } + staleLBGroups, err := libovsdbops.FindLoadBalancerGroupsWithPredicate(nbClient, func(g *nbdb.LoadBalancerGroup) bool { + return names[g.Name] + }) + if err != nil { + klog.Errorf("Failed to find load balancer groups for stale network %q: %v", networkName, err) + } else if len(staleLBGroups) > 0 { + if err := libovsdbops.DeleteLoadBalancerGroups(nbClient, staleLBGroups); err != nil { + klog.Errorf("Failed to delete load balancer groups on stale network: %q, error: %v", networkName, err) + } + } +} + // cleanupPolicyLogicalEntities cleans up all the port groups and address sets that belong to the given controller func cleanupPolicyLogicalEntities(nbClient libovsdbclient.Client, ops []ovsdb.Operation, controllerName string) ([]ovsdb.Operation, error) { var err error diff --git a/go-controller/pkg/ovn/base_secondary_layer2_network_controller.go b/go-controller/pkg/ovn/base_secondary_layer2_network_controller.go index e28c138247..5a5f7afc79 100644 --- a/go-controller/pkg/ovn/base_secondary_layer2_network_controller.go +++ b/go-controller/pkg/ovn/base_secondary_layer2_network_controller.go @@ -150,11 +150,11 @@ func (oc *BaseLayer2UserDefinedNetworkController) run() error { return fmt.Errorf("unable to create network qos controller, err: %w", err) } oc.wg.Add(1) - go func() { + go func(ch <-chan struct{}) { defer oc.wg.Done() // Until we have scale issues in future let's spawn only one thread - oc.nqosController.Run(1, oc.stopChan) - }() + oc.nqosController.Run(1, ch) + }(oc.stopChan) } // Add ourselves to the route import manager diff --git a/go-controller/pkg/ovn/controller/egressfirewall/egress_firewall_test.go b/go-controller/pkg/ovn/controller/egressfirewall/egress_firewall_test.go index fbcacf04b6..94f87f0ff2 100644 --- a/go-controller/pkg/ovn/controller/egressfirewall/egress_firewall_test.go +++ b/go-controller/pkg/ovn/controller/egressfirewall/egress_firewall_test.go @@ -484,7 +484,7 @@ var _ = ginkgo.Describe("OVN test basic functions", func() { subnets = append(subnets, config.CIDRNetworkEntry{CIDR: cidr}) } config.Default.ClusterSubnets = subnets - entry := &cacheEntry{} + entry := &cacheEntry{subnets: subnetsForNetInfo(&util.DefaultNetInfo{})} output, err := efController.newEgressFirewallRule("default", tc.egressFirewallRule, tc.id, entry) if tc.err == true { gomega.Expect(err).To(gomega.HaveOccurred()) @@ -716,8 +716,13 @@ func TestValidateAndGetEgressFirewallDestination(t *testing.T) { if len(tc.udnName) > 0 { network = tc.udnName } + entry := &cacheEntry{subnets: subnetsForNetInfo(&util.DefaultNetInfo{})} + if len(tc.udnName) > 0 { + entry.subnets = subnetsForNetInfo(netInfo) + } + cidrSelector, dnsName, clusterSubnetIntersection, nodeSelector, err := - efController.validateAndGetEgressFirewallDestination(network, tc.egressFirewallDestination) + efController.validateAndGetEgressFirewallDestination(network, tc.egressFirewallDestination, entry) if tc.expectedErr { require.Error(t, err) } else { diff --git a/go-controller/pkg/ovn/controller/egressfirewall/egressfirewall.go b/go-controller/pkg/ovn/controller/egressfirewall/egressfirewall.go index d537804f56..8d9dc37403 100644 --- a/go-controller/pkg/ovn/controller/egressfirewall/egressfirewall.go +++ b/go-controller/pkg/ovn/controller/egressfirewall/egressfirewall.go @@ -110,7 +110,7 @@ type matchKind int type cacheEntry struct { pgName string hasNodeSelector bool - subnetsKey string + subnets []*net.IPNet efResourceVersion string logHash string } @@ -422,20 +422,15 @@ func (oc *EFController) sync(key string) (updateErr error) { }() activeNetwork, netErr := oc.networkManager.GetActiveNetworkForNamespace(namespace) - if netErr != nil { - if util.IsUnprocessedActiveNetworkError(netErr) { - klog.V(5).Infof("Skipping egress firewall %s/%s: primary network not ready: %v", namespace, efName, netErr) - skipStatusUpdate = true - return nil - } - if util.IsInvalidPrimaryNetworkError(netErr) { - // Namespace requires P-UDN, but it does not exist. Remove EF config and surface error in status. - updateErr = netErr - } else { - return fmt.Errorf("failed to get active network for egress firewall %s/%s namespace: %w", - namespace, efName, netErr) - } - } else { + switch { + case netErr != nil: + // Failed to resolve active network; surface this in EF status. + updateErr = netErr + case activeNetwork == nil: + // No active network for this namespace in this controller context (e.g. filtered by D-UDN): + // cleanup stale EF config but don't report an EF status error. + skipStatusUpdate = true + default: aclLoggingLevels, logErr := oc.getNamespaceACLLogging(namespace) if logErr != nil { return fmt.Errorf("failed to get acl logging levels for egress firewall %s/%s: %w", @@ -444,7 +439,7 @@ func (oc *EFController) sync(key string) (updateErr error) { ownerController := activeNetwork.GetNetworkName() + "-network-controller" newEntry = &cacheEntry{ pgName: libovsdbutil.GetPortGroupName(getNamespacePortGroupDbIDs(namespace, ownerController)), - subnetsKey: subnetsKeyForNetInfo(activeNetwork), + subnets: subnetsForNetInfo(activeNetwork), efResourceVersion: ef.ResourceVersion, logHash: aclLogHash(aclLoggingLevels), } @@ -540,20 +535,19 @@ func (oc *EFController) sync(key string) (updateErr error) { return } -func subnetsKeyForNetInfo(netInfo util.NetInfo) string { +func subnetsForNetInfo(netInfo util.NetInfo) []*net.IPNet { if netInfo == nil { - return "" + return nil } subnets := netInfo.Subnets() - if len(subnets) == 0 { - return "" - } - keys := make([]string, 0, len(subnets)) - for _, s := range subnets { - keys = append(keys, s.String()) + unsortedSubnets := make([]*net.IPNet, 0, len(subnets)) + for _, subnet := range subnets { + if subnet.CIDR == nil { + continue + } + unsortedSubnets = append(unsortedSubnets, subnet.CIDR) } - slices.Sort(keys) - return strings.Join(keys, ",") + return util.CopyIPNets(unsortedSubnets) } func entriesEqual(a, b *cacheEntry) bool { @@ -564,7 +558,7 @@ func entriesEqual(a, b *cacheEntry) bool { return false default: return a.pgName == b.pgName && - a.subnetsKey == b.subnetsKey && + util.IsIPNetsEqual(a.subnets, b.subnets) && a.efResourceVersion == b.efResourceVersion && a.logHash == b.logHash } @@ -624,7 +618,7 @@ func (oc *EFController) addEgressFirewall(egressFirewall *egressfirewallapi.Egre // validateAndGetEgressFirewallDestination validates an egress firewall rule destination and returns // the parsed contents of the destination. -func (oc *EFController) validateAndGetEgressFirewallDestination(namespace string, egressFirewallDestination egressfirewallapi.EgressFirewallDestination) ( +func (oc *EFController) validateAndGetEgressFirewallDestination(namespace string, egressFirewallDestination egressfirewallapi.EgressFirewallDestination, entry *cacheEntry) ( cidrSelector string, dnsName string, clusterSubnetIntersection []*net.IPNet, @@ -644,15 +638,13 @@ func (oc *EFController) validateAndGetEgressFirewallDestination(namespace string return "", "", nil, nil, err } cidrSelector = egressFirewallDestination.CIDRSelector - netInfo, err := oc.networkManager.GetActiveNetworkForNamespace(namespace) - if err != nil { - return "", "", nil, nil, - fmt.Errorf("failed to validate egress firewall destination: %w", err) + if entry == nil || entry.subnets == nil { + return "", "", nil, nil, fmt.Errorf("failed to "+ + "validate egress firewall destination: missing cached subnets for namespace %s", namespace) } - subnets := netInfo.Subnets() - for _, clusterSubnet := range subnets { - if clusterSubnet.CIDR.Contains(ipNet.IP) || ipNet.Contains(clusterSubnet.CIDR.IP) { - clusterSubnetIntersection = append(clusterSubnetIntersection, clusterSubnet.CIDR) + for _, clusterSubnet := range entry.subnets { + if clusterSubnet.Contains(ipNet.IP) || ipNet.Contains(clusterSubnet.IP) { + clusterSubnetIntersection = append(clusterSubnetIntersection, clusterSubnet) } } } else { @@ -680,7 +672,7 @@ func (oc *EFController) newEgressFirewallRule(namespace string, rawEgressFirewal // fields of efr. var err error efr.to.cidrSelector, efr.to.dnsName, efr.to.clusterSubnetIntersection, efr.to.nodeSelector, err = - oc.validateAndGetEgressFirewallDestination(namespace, rawEgressFirewallRule.To) + oc.validateAndGetEgressFirewallDestination(namespace, rawEgressFirewallRule.To, entry) if err != nil { return efr, err } @@ -948,8 +940,8 @@ func (oc *EFController) moveACLsToNamespacedPortGroups(existingEFNamespaces map[ if namespace != "" && existingEFNamespaces[namespace] { pgName, err := oc.getNamespacePortGroupName(namespace) if err != nil { - return fmt.Errorf("failed to get port group name for egress firewall ACL move with "+ - "namespace: %s, err: %w", namespace, err) + klog.Warningf("Skipping egress firewall ACL move for namespace %s: %v", namespace, err) + continue } // re-attach from ClusterPortGroupNameBase to namespaced port group. // port group should exist, because namespace handler will create it. @@ -1088,11 +1080,18 @@ func getNamespacePortGroupDbIDs(ns string, controller string) *libovsdbops.DbObj } func (oc *EFController) getNamespacePortGroupName(namespace string) (string, error) { - activeNetwork, err := oc.networkManager.GetActiveNetworkForNamespace(namespace) + nadKey, err := oc.networkManager.GetPrimaryNADForNamespace(namespace) if err != nil { - return "", fmt.Errorf("failed to get active network for namespace %s: %w", namespace, err) + return "", fmt.Errorf("failed to get primary NAD for namespace %s: %w", namespace, err) + } + networkName := types.DefaultNetworkName + if nadKey != types.DefaultNetworkName && nadKey != "" { + networkName = oc.networkManager.GetNetworkNameForNADKey(nadKey) + if networkName == "" { + return "", fmt.Errorf("failed to resolve network name for NAD %s in namespace %s", nadKey, namespace) + } } - ownerController := activeNetwork.GetNetworkName() + "-network-controller" + ownerController := networkName + "-network-controller" return libovsdbutil.GetPortGroupName(getNamespacePortGroupDbIDs(namespace, ownerController)), nil } diff --git a/go-controller/pkg/ovn/controller/egressfirewall/egressfirewall_sync_test.go b/go-controller/pkg/ovn/controller/egressfirewall/egressfirewall_sync_test.go index eb280d5109..5c6ec709c6 100644 --- a/go-controller/pkg/ovn/controller/egressfirewall/egressfirewall_sync_test.go +++ b/go-controller/pkg/ovn/controller/egressfirewall/egressfirewall_sync_test.go @@ -187,5 +187,5 @@ func TestEFControllerSync_UpdatesOnSubnetChangeAndSkipsWhenUnchanged(t *testing. entry, ok := oc.cache.Load(namespace) require.True(t, ok) require.Equal(t, pgName, entry.pgName) - require.Equal(t, subnetsKeyForNetInfo(netInfo2), entry.subnetsKey) + require.True(t, util.IsIPNetsEqual(subnetsForNetInfo(netInfo2), entry.subnets)) } diff --git a/go-controller/pkg/ovn/controller/networkconnect/controller_components_test.go b/go-controller/pkg/ovn/controller/networkconnect/controller_components_test.go index 3a08c31901..070f4c19a3 100644 --- a/go-controller/pkg/ovn/controller/networkconnect/controller_components_test.go +++ b/go-controller/pkg/ovn/controller/networkconnect/controller_components_test.go @@ -519,7 +519,7 @@ func TestController_syncNAD(t *testing.T) { context.Background(), cnc, metav1.CreateOptions{}) g.Expect(err).ToNot(gomega.HaveOccurred()) - nadConfig := `{"cniVersion":"0.4.0","name":"net1","type":"ovn-k8s-cni-overlay","topology":"layer3","role":"primary","netAttachDefName":"ns1/nad1"}` + nadConfig := `{"cniVersion":"1.1.0","name":"net1","type":"ovn-k8s-cni-overlay","topology":"layer3","role":"primary","netAttachDefName":"ns1/nad1"}` nad := &nettypes.NetworkAttachmentDefinition{ ObjectMeta: metav1.ObjectMeta{ Namespace: "ns1", diff --git a/go-controller/pkg/ovn/controller/services/services_controller.go b/go-controller/pkg/ovn/controller/services/services_controller.go index 83ccedca49..428d75324d 100644 --- a/go-controller/pkg/ovn/controller/services/services_controller.go +++ b/go-controller/pkg/ovn/controller/services/services_controller.go @@ -600,22 +600,36 @@ func (c *Controller) RequestFullSync(nodeInfos []nodeInfo) { // belong to the network that this service controller is responsible for. func (c *Controller) skipService(name, namespace string) bool { if util.IsNetworkSegmentationSupportEnabled() { - serviceNetwork, err := c.networkManager.GetActiveNetworkForNamespace(namespace) + serviceNAD, err := c.networkManager.GetPrimaryNADForNamespace(namespace) if err != nil { + // If the namespace requires a UDN that hasn't been processed yet, the default controller + // should skip this service; the UDN controller will handle it once ready. + if util.IsInvalidPrimaryNetworkError(err) { + return c.netInfo.IsDefault() + } utilruntime.HandleError(fmt.Errorf("failed to retrieve network for service %s/%s: %w", namespace, name, err)) return true } + serviceNetworkName := types.DefaultNetworkName + isDefaultNetwork := serviceNAD == types.DefaultNetworkName + if !isDefaultNetwork { + serviceNetworkName = c.networkManager.GetNetworkNameForNADKey(serviceNAD) + if serviceNetworkName == "" { + return true + } + } + // Do not skip default network services enabled for UDN - if serviceNetwork.IsDefault() && + if isDefaultNetwork && c.netInfo.IsPrimaryNetwork() && globalconfig.Gateway.Mode == globalconfig.GatewayModeShared && util.IsUDNEnabledService(ktypes.NamespacedName{Namespace: namespace, Name: name}.String()) { return false } - if serviceNetwork.GetNetworkName() != c.netInfo.GetNetworkName() { + if serviceNetworkName != c.netInfo.GetNetworkName() { return true } } diff --git a/go-controller/pkg/ovn/default_network_controller.go b/go-controller/pkg/ovn/default_network_controller.go index 5e850fef14..61023878ad 100644 --- a/go-controller/pkg/ovn/default_network_controller.go +++ b/go-controller/pkg/ovn/default_network_controller.go @@ -248,6 +248,9 @@ func newDefaultNetworkControllerCommon( oc.ovnClusterLRPToJoinIfAddrs = gwLRPIfAddrs oc.initRetryFramework() + if oc.eIPC != nil { + oc.eIPC.retryEgressIPPods = oc.retryEgressIPPods + } return oc, nil } @@ -343,6 +346,9 @@ func (oc *DefaultNetworkController) Stop() { if oc.efController != nil { oc.efController.Stop() } + if oc.eIPC != nil { + oc.eIPC.StopNADReconciler() + } if oc.routeImportManager != nil { oc.routeImportManager.ForgetNetwork(oc.GetNetworkName()) } @@ -459,6 +465,9 @@ func (oc *DefaultNetworkController) run(_ context.Context) error { } if config.OVNKubernetesFeature.EnableEgressIP { + if err := oc.eIPC.StartNADReconciler(); err != nil { + return err + } // This is probably the best starting order for all egress IP handlers. // WatchEgressIPPods and WatchEgressIPNamespaces only use the informer // cache to retrieve the egress IPs when determining if namespace/pods diff --git a/go-controller/pkg/ovn/egressip.go b/go-controller/pkg/ovn/egressip.go index 41f2e9a6af..4c6850e3b2 100644 --- a/go-controller/pkg/ovn/egressip.go +++ b/go-controller/pkg/ovn/egressip.go @@ -22,8 +22,10 @@ import ( "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/wait" listers "k8s.io/client-go/listers/core/v1" + "k8s.io/client-go/tools/cache" "k8s.io/client-go/tools/record" "k8s.io/client-go/util/retry" + "k8s.io/client-go/util/workqueue" "k8s.io/klog/v2" utilnet "k8s.io/utils/net" @@ -32,6 +34,7 @@ import ( ovncnitypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/cni/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/controller" egressipv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressip/v1" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/generator/udn" @@ -44,6 +47,7 @@ import ( addressset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/address_set" egresssvc "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/controller/egressservice" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/controller/udnenabledsvc" + ovnretry "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/retry" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/syncmap" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" @@ -192,7 +196,12 @@ type EgressIPController struct { // value will be true if local to this zone and false otherwise nodeZoneState *syncmap.SyncMap[bool] // networkManager used for getting network information for UDNs - networkManager networkmanager.Interface + networkManager networkmanager.Interface + nadReconciler networkmanager.NADReconciler + nadReconcilerID uint64 + nadReconcilerRegistered bool + // retryEgressIPPods allows requeuing egressIP pod processing on NAD changes + retryEgressIPPods *ovnretry.RetryFramework // An address set factory that creates address sets addressSetFactory addressset.AddressSetFactory // Northbound database zone name to which this Controller is connected to - aka local zone @@ -233,6 +242,16 @@ func NewEIPController( v4: v4, v6: v6, } + nadReconcilerConfig := &controller.ReconcilerConfig{ + RateLimiter: workqueue.DefaultTypedControllerRateLimiter[string](), + Reconcile: e.syncNAD, + Threadiness: 1, + MaxAttempts: controller.InfiniteAttempts, + } + e.nadReconciler = controller.NewReconciler( + controllerName+"-egressip-nad", + nadReconcilerConfig, + ) return e } @@ -364,7 +383,15 @@ func (e *EgressIPController) reconcileEgressIP(old, new *egressipv1.EgressIP) (e if !newNamespaceSelector.Matches(namespaceLabels) && oldNamespaceSelector.Matches(namespaceLabels) { ni, err := e.networkManager.GetActiveNetworkForNamespace(namespace.Name) if err != nil { - return fmt.Errorf("failed to get active network for namespace %s: %v", namespace.Name, err) + if util.IsInvalidPrimaryNetworkError(err) { + // NAD reconciler will notify us later + continue + } + return fmt.Errorf("failed to get active network for namespace %s: %w", namespace.Name, err) + } + if ni == nil { + // our node does not have this network + continue } if err := e.deleteNamespaceEgressIPAssignment(ni, oldEIP.Name, oldEIP.Status.Items, namespace, oldEIP.Spec.PodSelector); err != nil { return fmt.Errorf("network %s: failed to delete namespace %s egress IP config: %v", ni.GetNetworkName(), namespace.Name, err) @@ -373,7 +400,15 @@ func (e *EgressIPController) reconcileEgressIP(old, new *egressipv1.EgressIP) (e if newNamespaceSelector.Matches(namespaceLabels) && !oldNamespaceSelector.Matches(namespaceLabels) { ni, err := e.networkManager.GetActiveNetworkForNamespace(namespace.Name) if err != nil { - return fmt.Errorf("failed to get active network for namespace %s: %v", namespace.Name, err) + if util.IsInvalidPrimaryNetworkError(err) { + // NAD reconciler will notify us later + continue + } + return fmt.Errorf("failed to get active network for namespace %s: %w", namespace.Name, err) + } + if ni == nil { + // our node does not have this network + continue } if err := e.addNamespaceEgressIPAssignments(ni, newEIP.Name, newEIP.Status.Items, mark, namespace, newEIP.Spec.PodSelector); err != nil { errs = append(errs, fmt.Errorf("network %s: failed to add namespace %s egress IP config: %v", ni.GetNetworkName(), namespace.Name, err)) @@ -399,7 +434,15 @@ func (e *EgressIPController) reconcileEgressIP(old, new *egressipv1.EgressIP) (e if !newPodSelector.Matches(podLabels) && oldPodSelector.Matches(podLabels) { ni, err := e.networkManager.GetActiveNetworkForNamespace(namespace.Name) if err != nil { - return fmt.Errorf("failed to get active network for namespace %s: %v", namespace.Name, err) + if util.IsInvalidPrimaryNetworkError(err) { + // NAD reconciler will notify us later + continue + } + return fmt.Errorf("failed to get active network for namespace %s: %w", namespace.Name, err) + } + if ni == nil { + // our node does not have this network + continue } if err := e.deletePodEgressIPAssignmentsWithCleanup(ni, oldEIP.Name, oldEIP.Status.Items, pod); err != nil { return fmt.Errorf("network %s: failed to delete pod %s/%s egress IP config: %v", ni.GetNetworkName(), pod.Namespace, pod.Name, err) @@ -408,7 +451,15 @@ func (e *EgressIPController) reconcileEgressIP(old, new *egressipv1.EgressIP) (e if newPodSelector.Matches(podLabels) && !oldPodSelector.Matches(podLabels) { ni, err := e.networkManager.GetActiveNetworkForNamespace(namespace.Name) if err != nil { - return fmt.Errorf("failed to get active network for namespace %s: %v", namespace.Name, err) + if util.IsInvalidPrimaryNetworkError(err) { + // NAD reconciler will notify us later + continue + } + return fmt.Errorf("failed to get active network for namespace %s: %w", namespace.Name, err) + } + if ni == nil { + // our node does not have this network + continue } if err := e.addPodEgressIPAssignmentsWithLock(ni, newEIP.Name, newEIP.Status.Items, mark, pod); err != nil { errs = append(errs, fmt.Errorf("network %s: failed to add pod %s/%s egress IP config: %v", ni.GetNetworkName(), pod.Namespace, pod.Name, err)) @@ -431,7 +482,15 @@ func (e *EgressIPController) reconcileEgressIP(old, new *egressipv1.EgressIP) (e // reason to look at the pod selector. ni, err := e.networkManager.GetActiveNetworkForNamespace(namespace.Name) if err != nil { - return fmt.Errorf("failed to get active network for namespace %s: %v", namespace.Name, err) + if util.IsInvalidPrimaryNetworkError(err) { + // NAD reconciler will notify us later + continue + } + return fmt.Errorf("failed to get active network for namespace %s: %w", namespace.Name, err) + } + if ni == nil { + // our node does not have this network + continue } if !newNamespaceSelector.Matches(namespaceLabels) && oldNamespaceSelector.Matches(namespaceLabels) { if err := e.deleteNamespaceEgressIPAssignment(ni, oldEIP.Name, oldEIP.Status.Items, namespace, oldEIP.Spec.PodSelector); err != nil { @@ -538,8 +597,16 @@ func (e *EgressIPController) reconcileEgressIPNamespace(old, new *corev1.Namespa if namespaceSelector.Matches(oldLabels) && !namespaceSelector.Matches(newLabels) { ni, err := e.networkManager.GetActiveNetworkForNamespace(namespaceName) if err != nil { + if util.IsInvalidPrimaryNetworkError(err) { + // NAD reconciler will notify us later + return nil + } return fmt.Errorf("failed to get active network for namespace %s: %w", namespaceName, err) } + if ni == nil { + // our node does not have this network + return nil + } if err := e.deleteNamespaceEgressIPAssignment(ni, eIP.Name, eIP.Status.Items, oldNamespace, eIP.Spec.PodSelector); err != nil { return fmt.Errorf("network %s: failed to delete namespace %q for egress IP %q: %w", ni.GetNetworkName(), namespaceName, eIP.Name, err) @@ -549,7 +616,15 @@ func (e *EgressIPController) reconcileEgressIPNamespace(old, new *corev1.Namespa mark := getEgressIPPktMark(eIP.Name, eIP.Annotations) ni, err := e.networkManager.GetActiveNetworkForNamespace(namespaceName) if err != nil { - return fmt.Errorf("failed to get active network for namespace %s: %v", namespaceName, err) + if util.IsInvalidPrimaryNetworkError(err) { + // NAD reconciler will notify us later + return nil + } + return fmt.Errorf("failed to get active network for namespace %s: %w", namespaceName, err) + } + if ni == nil { + // our node does not have this network + return nil } if err := e.addNamespaceEgressIPAssignments(ni, eIP.Name, eIP.Status.Items, mark, newNamespace, eIP.Spec.PodSelector); err != nil { return fmt.Errorf("network %s: failed to add namespace %q for egress IP %q: %w", @@ -653,15 +728,13 @@ func (e *EgressIPController) reconcileEgressIPPod(old, new *corev1.Pod) (err err if err != nil { return err } - ni, err := e.networkManager.GetActiveNetworkForNamespace(namespace.Name) - if err != nil { - return fmt.Errorf("failed to get active network for namespace %s: %w", namespace.Name, err) - } + oldMatches, newMatches := false, false + deletePath := false if !podSelector.Empty() { // Use "new" and "old" instead of "newPod" and "oldPod" to determine whether // pods was created or is being deleted. - newMatches := new != nil && podSelector.Matches(newPodLabels) - oldMatches := old != nil && podSelector.Matches(oldPodLabels) + newMatches = new != nil && podSelector.Matches(newPodLabels) + oldMatches = old != nil && podSelector.Matches(oldPodLabels) // If the podSelector doesn't match the pod, then continue // because this EgressIP intends to match other pods in that // namespace and not this one. Other EgressIP objects might @@ -671,7 +744,32 @@ func (e *EgressIPController) reconcileEgressIPPod(old, new *corev1.Pod) (err err } // Check if the pod stopped matching. If the pod was deleted, // "new" will be nil, so this must account for that case. - if !newMatches && oldMatches { + deletePath = !newMatches && oldMatches + } else { + // Empty pod selector means all pods in namespace are matched. + deletePath = new == nil + } + + ni, err := e.networkManager.GetActiveNetworkForNamespace(namespace.Name) + if err != nil && !util.IsInvalidPrimaryNetworkError(err) { + return fmt.Errorf("failed to get active network for namespace %s: %w", namespace.Name, err) + } + haveNetwork := ni != nil + if !haveNetwork && deletePath && old != nil { + // During dynamic UDN churn, active network resolution can transiently return !ok on delete. + // Fall back to the pod-assignment cache network to avoid skipping stale egressIP cleanup. + if cachedNetwork := e.getNetworkFromPodAssignment(getPodKey(oldPod)); cachedNetwork != nil { + ni = cachedNetwork + haveNetwork = true + klog.V(4).Infof("Using cached network %q for egressIP delete reconciliation of pod %s/%s", + ni.GetNetworkName(), oldPod.Namespace, oldPod.Name) + } + } + if !haveNetwork { + return nil + } + if !podSelector.Empty() { + if deletePath { if err := e.deletePodEgressIPAssignmentsWithCleanup(ni, eIP.Name, eIP.Status.Items, oldPod); err != nil { return fmt.Errorf("network %s: failed to delete pod %s/%s for egress IP %q: %w", ni.GetNetworkName(), oldPod.Namespace, oldPod.Name, eIP.Name, err) @@ -727,8 +825,14 @@ func (e *EgressIPController) addEgressIPAssignments(name string, statusAssignmen for _, namespace := range namespaces { ni, err := e.networkManager.GetActiveNetworkForNamespace(namespace.Name) if err != nil { + if util.IsInvalidPrimaryNetworkError(err) { + continue + } return fmt.Errorf("failed to get active network for namespace %s: %v", namespace.Name, err) } + if ni == nil { + continue + } if err := e.addNamespaceEgressIPAssignments(ni, name, statusAssignments, mark, namespace, podSelector); err != nil { errs = append(errs, err) } @@ -789,7 +893,7 @@ func (e *EgressIPController) addPodEgressIPAssignments(ni util.NetInfo, name str if len(statusAssignments) == 0 { return nil } - var remainingAssignments, staleAssignments []egressipv1.EgressIPStatusItem + var remainingAssignments, staleAssignments, reprogramAssignments []egressipv1.EgressIPStatusItem nadKey, err := e.getPodNADKeyForNetwork(ni, pod) if err != nil { return err @@ -816,6 +920,7 @@ func (e *EgressIPController) addPodEgressIPAssignments(ni util.NetInfo, name str network: ni, } } else if podState.egressIPName == name || podState.egressIPName == "" { + podIPsChanged := !podIPSliceEqual(podState.podIPs, podIPs) // We do the setup only if this egressIP object is the one serving this pod OR // podState.egressIPName can be empty if no re-routes were found in // syncPodAssignmentCache for the existing pod, we will treat this case as a new add @@ -824,6 +929,10 @@ func (e *EgressIPController) addPodEgressIPAssignments(ni util.NetInfo, name str // (meaning it was populated during EIP sync and needs to be processed for the pod). if value, exists := podState.egressStatuses.statusMap[status]; !exists || value == egressStatusStatePending { remainingAssignments = append(remainingAssignments, status) + } else if podIPsChanged { + // A pod can be re-created with the same name but a different IP. + // Force a delete+add for existing statuses so LRP match/NAT gets updated. + reprogramAssignments = append(reprogramAssignments, status) } // Detect stale EIP status entries (same EgressIP reassigned to a different node) // and queue the outdated entry for cleanup. @@ -831,7 +940,6 @@ func (e *EgressIPController) addPodEgressIPAssignments(ni util.NetInfo, name str staleAssignments = append(staleAssignments, *staleStatus) } } - podState.podIPs = podIPs podState.egressIPName = name podState.network = ni podState.standbyEgressIPNames.Delete(name) @@ -865,6 +973,18 @@ func (e *EgressIPController) addPodEgressIPAssignments(ni util.NetInfo, name str } delete(podState.egressStatuses.statusMap, staleStatus) } + if len(reprogramAssignments) > 0 { + klog.V(2).Infof("Pod %s IPs changed, forcing egress IP status reprogram for statuses: %+v", podKey, reprogramAssignments) + if err := e.deletePodEgressIPAssignments(ni, name, reprogramAssignments, pod, false); err != nil { + return fmt.Errorf("failed to force reprogram of pod %s statuses %v for egress IP %s: %w", + podKey, reprogramAssignments, name, err) + } + for _, status := range reprogramAssignments { + delete(podState.egressStatuses.statusMap, status) + } + remainingAssignments = append(remainingAssignments, reprogramAssignments...) + } + podState.podIPs = podIPs // We store podState into podAssignment cache at this place for two reasons. // 1. When podAssignmentState is newly created. // 2. deletePodEgressIPAssignments might clean the podAssignment cache, make sure we add it back. @@ -1306,6 +1426,93 @@ func (e *EgressIPController) getALocalZoneNodeName() (string, error) { return "", fmt.Errorf("failed to find a local OVN zone Node") } +func (e *EgressIPController) StartNADReconciler() error { + if e.networkManager == nil || e.nadReconciler == nil { + return nil + } + if !e.nadReconcilerRegistered { + id, err := e.networkManager.RegisterNADReconciler(e.nadReconciler) + if err != nil { + return err + } + e.nadReconcilerID = id + e.nadReconcilerRegistered = true + } + return controller.Start(e.nadReconciler) +} + +func (e *EgressIPController) StopNADReconciler() { + if e.nadReconciler == nil { + return + } + if e.nadReconcilerRegistered { + if err := e.networkManager.DeRegisterNADReconciler(e.nadReconcilerID); err != nil { + klog.Warningf("Failed to deregister egress IP NAD reconciler: %v", err) + } + e.nadReconcilerRegistered = false + } + controller.Stop(e.nadReconciler) + e.nadReconcilerID = 0 + e.nadReconciler = nil +} + +func (e *EgressIPController) syncNAD(key string) error { + startTime := time.Now() + klog.V(5).Infof("Egress IP NAD reconcile %s", key) + defer func() { + klog.V(4).Infof("Finished syncing Egress IP for NAD %s, took %v", key, time.Since(startTime)) + }() + + namespace, _, err := cache.SplitMetaNamespaceKey(key) + if err != nil { + klog.Errorf("Failed splitting NAD key %s: %v", key, err) + return nil + } + + ni := e.networkManager.GetNetInfoForNADKey(key) + if ni == nil { + return nil + } + // Only reconcile for primary network NADs. Secondary NADs are irrelevant for EgressIP. + if !ni.IsPrimaryNetwork() { + return nil + } + // Ensure egressIP pods for this namespace are retried after NAD processing so + // we don't miss the UDN IPs if pod updates raced the NAD event. + e.addEgressIPPodRetriesForNamespace(namespace) + return nil +} + +func (e *EgressIPController) addEgressIPPodRetriesForNamespace(namespace string) { + if e.retryEgressIPPods == nil { + return + } + pods, err := e.watchFactory.GetPods(namespace) + if err != nil { + klog.Warningf("Failed to list pods for EgressIP NAD retry in namespace %s: %v", namespace, err) + return + } + for _, pod := range pods { + pod := *pod + if util.PodCompleted(&pod) { + continue + } + e.addEgressIPPodRetry(&pod, "NAD change") + } +} + +func (e *EgressIPController) addEgressIPPodRetry(pod *corev1.Pod, reason string) { + if e.retryEgressIPPods == nil || pod == nil || util.PodCompleted(pod) || !util.PodNeedsSNAT(pod) { + return + } + klog.V(5).Infof("Adding egress IP pod %s/%s for immediate retry due to %s", pod.Namespace, pod.Name, reason) + if err := e.retryEgressIPPods.AddRetryObjWithAddNoBackoff(pod); err != nil { + klog.Warningf("Failed to add pod %s/%s to egressIP retry queue: %v", pod.Namespace, pod.Name, err) + return + } + e.retryEgressIPPods.RequestRetryObjs() +} + func (e *EgressIPController) syncStaleAddressSetIPs(egressIPCache egressIPCache) error { for _, networkPodCache := range egressIPCache.egressIPNameToPods { for networkName, podCache := range networkPodCache { @@ -1907,6 +2114,10 @@ func (e *EgressIPController) generateCacheForEgressIP() (egressIPCache, error) { klog.Errorf("Failed to get active network for namespace %s, stale objects may remain: %v", namespace.Name, err) continue } + if ni == nil { + klog.V(5).Infof("Skipping namespace %s while building egress IP cache: network not active on local zone", namespace.Name) + continue + } // skip if already processed if _, ok := redirectCache[ni.GetNetworkName()]; ok { continue @@ -2059,6 +2270,10 @@ func (e *EgressIPController) generateCacheForEgressIP() (egressIPCache, error) { klog.Errorf("Failed to get active network for namespace %s, skipping sync: %v", namespace.Name, err) continue } + if ni == nil { + klog.V(5).Infof("Skipping namespace %s while building egress IP sync cache: network not active on local zone", namespace.Name) + continue + } _, ok := egressIPsCache[egressIP.Name][ni.GetNetworkName()] if ok { continue // aready populated @@ -2332,6 +2547,23 @@ func (e egressStatuses) delete(deleteStatus egressipv1.EgressIPStatusItem) { delete(e.statusMap, deleteStatus) } +func podIPSliceEqual(oldIPs, newIPs []net.IP) bool { + if len(oldIPs) != len(newIPs) { + return false + } + oldIPStrings := make([]string, 0, len(oldIPs)) + for _, podIP := range oldIPs { + oldIPStrings = append(oldIPStrings, podIP.String()) + } + newIPStrings := make([]string, 0, len(newIPs)) + for _, podIP := range newIPs { + newIPStrings = append(newIPStrings, podIP.String()) + } + sort.Strings(oldIPStrings) + sort.Strings(newIPStrings) + return slices.Equal(oldIPStrings, newIPStrings) +} + // podAssignmentState keeps track of which egressIP object is serving // the related pod. // NOTE: At a given time only one object will be configured. This is diff --git a/go-controller/pkg/ovn/gateway.go b/go-controller/pkg/ovn/gateway.go index ddce0de5c7..a961d301c5 100644 --- a/go-controller/pkg/ovn/gateway.go +++ b/go-controller/pkg/ovn/gateway.go @@ -1381,6 +1381,37 @@ func (gw *GatewayManager) Cleanup() error { return nil } +// NewGatewayManagerForCleanup returns a minimal GatewayManager used only for Cleanup(). Used when +// discovering gateway routers from the DB (e.g. stale cleanup when nodes are gone). layer2UseTransitRouter +// selects the peer port cleanup path (transit router LRP vs join switch LSP). +// +// NOTE: transitRouterInfo is set to an empty struct (not nil) when layer2UseTransitRouter is true. +// This is safe because Cleanup() only checks (transitRouterInfo != nil) to choose between +// deleteGWRouterPeerRouterPort and deleteGWRouterPeerSwitchPort — neither of which accesses +// transitRouterInfo fields. If Cleanup() is ever changed to dereference transitRouterInfo fields, +// this constructor must be updated accordingly. +func NewGatewayManagerForCleanup( + nbClient libovsdbclient.Client, + netInfo util.NetInfo, + clusterRouterName, joinSwitchName, gwRouterName, nodeName string, + layer2UseTransitRouter bool, +) *GatewayManager { + var tri *transitRouterInfo + if layer2UseTransitRouter { + tri = &transitRouterInfo{} + } + return &GatewayManager{ + nodeName: nodeName, + clusterRouterName: clusterRouterName, + gwRouterName: gwRouterName, + extSwitchName: netInfo.GetNetworkScopedExtSwitchName(nodeName), + joinSwitchName: joinSwitchName, + nbClient: nbClient, + netInfo: netInfo, + transitRouterInfo: tri, + } +} + func (gw *GatewayManager) delPbrAndNatRules(nodeName string) { // delete the dnat_and_snat entry that we added for the management port IP // Note: we don't need to delete any MAC bindings that are dynamically learned from OVN SB DB diff --git a/go-controller/pkg/ovn/layer2_user_defined_network_controller.go b/go-controller/pkg/ovn/layer2_user_defined_network_controller.go index 63f4994cfa..3649153b41 100644 --- a/go-controller/pkg/ovn/layer2_user_defined_network_controller.go +++ b/go-controller/pkg/ovn/layer2_user_defined_network_controller.go @@ -398,6 +398,11 @@ func NewLayer2UserDefinedNetworkController( eIPController: eIPController, remoteNodesNoRouter: sync.Map{}, } + if oc.IsPrimaryNetwork() && oc.eIPController != nil { + oc.onLogicalPortCacheAdd = func(pod *corev1.Pod, _ string) { + oc.eIPController.addEgressIPPodRetry(pod, "logical port cache update") + } + } if config.OVNKubernetesFeature.EnableInterconnect { oc.zoneICHandler = zoneinterconnect.NewZoneInterconnectHandler(oc.GetNetInfo(), oc.nbClient, oc.sbClient, oc.watchFactory) @@ -490,6 +495,20 @@ func (oc *Layer2UserDefinedNetworkController) run() error { // could be called from a dummy Controller (only has CommonNetworkControllerInfo set) func (oc *Layer2UserDefinedNetworkController) Cleanup() error { networkName := oc.GetNetworkName() + + // For primary Layer2 UDN only: when this is a cleanup-only controller (dummy for stale UDN + // cleanup; GetNetworkID() is InvalidID because netInfo was never reconciled from a NAD), + // discover and cleanup all gateway routers from the NB DB. DB-driven cleanup works even + // when nodes are already gone. + if oc.IsPrimaryNetwork() && oc.GetNetworkID() == types.InvalidID { + if err := cleanupGatewayRoutersForNetworkFromDB(oc.nbClient, oc.GetNetInfo(), + oc.GetNetworkScopedClusterRouterName(), oc.GetNetworkScopedJoinSwitchName()); err != nil { + return fmt.Errorf("failed to cleanup gateway routers for network %s: %w", networkName, err) + } + } + + // Switch that holds management ports is deleted below (BaseLayer2UserDefinedNetworkController.cleanup); + // LSPs are cascade-deleted with the logical switch. if err := oc.BaseLayer2UserDefinedNetworkController.cleanup(); err != nil { return fmt.Errorf("failed to cleanup network %q: %w", networkName, err) } @@ -526,13 +545,8 @@ func (oc *Layer2UserDefinedNetworkController) Cleanup() error { } // remove load balancer groups - lbGroups := make([]*nbdb.LoadBalancerGroup, 0, 3) - for _, lbGroupUUID := range []string{oc.switchLoadBalancerGroupUUID, oc.clusterLoadBalancerGroupUUID, oc.routerLoadBalancerGroupUUID} { - lbGroups = append(lbGroups, &nbdb.LoadBalancerGroup{UUID: lbGroupUUID}) - } - if err := libovsdbops.DeleteLoadBalancerGroups(oc.nbClient, lbGroups); err != nil { - klog.Errorf("Failed to delete load balancer groups on network: %q, error: %v", oc.GetNetworkName(), err) - } + cleanupLoadBalancerGroups(oc.nbClient, oc.GetNetInfo(), + oc.switchLoadBalancerGroupUUID, oc.clusterLoadBalancerGroupUUID, oc.routerLoadBalancerGroupUUID) return nil } diff --git a/go-controller/pkg/ovn/layer2_user_defined_network_controller_test.go b/go-controller/pkg/ovn/layer2_user_defined_network_controller_test.go index 7461784139..32cfaca26b 100644 --- a/go-controller/pkg/ovn/layer2_user_defined_network_controller_test.go +++ b/go-controller/pkg/ovn/layer2_user_defined_network_controller_test.go @@ -483,6 +483,95 @@ var _ = Describe("OVN Multi-Homed pod operations for layer 2 network", func() { ), ) + It("primary layer 2 UDN: controller creates entities via init/watchers, then dummy Cleanup() removes them", func() { + config.OVNKubernetesFeature.EnableMultiNetwork = true + setupConfig(dummyLayer2PrimaryUserDefinedNetwork("192.168.0.0/16"), testConfiguration{}, config.GatewayModeShared) + app.Action = func(ctx *cli.Context) error { + netInfo := dummyLayer2PrimaryUserDefinedNetwork("192.168.0.0/16") + netConf := netInfo.netconf() + networkConfig, err := util.NewNetInfo(netConf) + Expect(err).NotTo(HaveOccurred()) + mutableNetInfo := util.NewMutableNetInfo(networkConfig) + mutableNetInfoCleanup := util.NewMutableNetInfo(networkConfig) + mutableNetInfoCleanup.SetNetworkID(ovntypes.InvalidID) + + nad, err := newNetworkAttachmentDefinition(ns, nadName, *netConf) + Expect(err).NotTo(HaveOccurred()) + fakeNetworkManager := &testnm.FakeNetworkManager{ + PrimaryNetworks: map[string]util.NetInfo{}, + } + fakeNetworkManager.PrimaryNetworks[ns] = mutableNetInfo + + const nodeIPv4CIDR = "192.168.126.202/24" + testNode, err := newNodeWithUserDefinedNetworks(nodeName, nodeIPv4CIDR, netInfo) + Expect(err).NotTo(HaveOccurred()) + nbZone := &nbdb.NBGlobal{Name: config.Default.Zone, UUID: config.Default.Zone} + + // Minimal initialDB: no UDN entities. init() + watchers create them. + initialDB.NBData = append(initialDB.NBData, nbZone) + Expect(netInfo.setupOVNDependencies(&initialDB)).To(Succeed()) + + fakeOvn.startWithDBSetup( + initialDB, + &corev1.NamespaceList{Items: []corev1.Namespace{*newUDNNamespace(ns)}}, + &corev1.NodeList{Items: []corev1.Node{*testNode}}, + &corev1.PodList{Items: []corev1.Pod{}}, + &nadapi.NetworkAttachmentDefinitionList{Items: []nadapi.NetworkAttachmentDefinition{*nad}}, + ) + + Expect(fakeOvn.networkManager.Start()).To(Succeed()) + defer fakeOvn.networkManager.Stop() + Expect(fakeOvn.controller.WatchNamespaces()).To(Succeed()) + Expect(fakeOvn.controller.WatchPods()).To(Succeed()) + + // Run init() to create cluster-level entities, then watchers so node sync creates per-node entities. + l2Controller, ok := fakeOvn.fullL2UDNControllers[userDefinedNetworkName] + Expect(ok).To(BeTrue()) + Expect(l2Controller.init()).To(Succeed()) + udnNetController, ok := fakeOvn.userDefinedNetworkControllers[userDefinedNetworkName] + Expect(ok).To(BeTrue()) + udnNetController.bnc.ovnClusterLRPToJoinIfAddrs = dummyJoinIPs() + Expect(l2Controller.WatchNodes()).To(Succeed()) + Expect(l2Controller.WatchPods()).To(Succeed()) + Expect(l2Controller.WatchNetworkPolicy()).To(Succeed()) + + // Wait for the controller to create the Layer2 switch. + udnLSName := l2Controller.GetNetworkScopedSwitchName(ovntypes.OVNLayer2Switch) + Eventually(func(g Gomega) { + switches, err := libovsdbops.FindLogicalSwitchesWithPredicate(fakeOvn.nbClient, func(ls *nbdb.LogicalSwitch) bool { + return ls.Name == udnLSName + }) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(switches).NotTo(BeEmpty()) + }).WithTimeout(10 * time.Second).Should(Succeed()) + + // Assert gateway router was created before cleanup. + udnGWRouterName := l2Controller.GetNetworkScopedGWRouterName(nodeName) + Eventually(func(g Gomega) { + routers, err := libovsdbops.FindLogicalRoutersWithPredicate(fakeOvn.nbClient, func(lr *nbdb.LogicalRouter) bool { + return lr.Name == udnGWRouterName + }) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(routers).NotTo(BeEmpty()) + }).WithTimeout(10 * time.Second).Should(Succeed()) + + // Dummy controller with InvalidID runs Cleanup() to remove all entities for this network. + dummyController, err := NewLayer2UserDefinedNetworkController( + &l2Controller.CommonNetworkControllerInfo, + mutableNetInfoCleanup, + fakeOvn.networkManager.Interface(), + nil, + NewPortCache(ctx.Done()), + nil, + ) + Expect(err).NotTo(HaveOccurred()) + Expect(dummyController.Cleanup()).To(Succeed()) + Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(generateUDNPostInitDB([]libovsdbtest.TestData{nbZone}))) + return nil + } + Expect(app.Run([]string{app.Name})).To(Succeed()) + }) + It("controller should cleanup stale nodes on startup", func() { app.Action = func(*cli.Context) error { netInfo := dummyLayer2PrimaryUserDefinedNetwork("192.168.0.0/16") diff --git a/go-controller/pkg/ovn/layer3_user_defined_network_controller.go b/go-controller/pkg/ovn/layer3_user_defined_network_controller.go index 24ccf96a5f..a1654d3244 100644 --- a/go-controller/pkg/ovn/layer3_user_defined_network_controller.go +++ b/go-controller/pkg/ovn/layer3_user_defined_network_controller.go @@ -385,6 +385,11 @@ func NewLayer3UserDefinedNetworkController( gatewayManagers: sync.Map{}, eIPController: eIPController, } + if oc.IsPrimaryNetwork() && oc.eIPController != nil { + oc.onLogicalPortCacheAdd = func(pod *corev1.Pod, _ string) { + oc.eIPController.addEgressIPPodRetry(pod, "logical port cache update") + } + } if config.OVNKubernetesFeature.EnableInterconnect { oc.zoneICHandler = zoneic.NewZoneInterconnectHandler(oc.GetNetInfo(), cnci.nbClient, cnci.sbClient, cnci.watchFactory) @@ -518,6 +523,19 @@ func (oc *Layer3UserDefinedNetworkController) Cleanup() error { // Note : Cluster manager removes the subnet annotation for the node. netName := oc.GetNetworkName() klog.Infof("Delete OVN logical entities for %s network controller of network %s", types.Layer3Topology, netName) + + // For primary L3 UDN only: when this is a cleanup-only controller (dummy for stale UDN + // cleanup; GetNetworkID() is InvalidID because netInfo was never reconciled from a NAD), + // discover and cleanup all gateway routers from the NB DB. DB-driven cleanup works even + // when nodes are already gone. + if oc.IsPrimaryNetwork() && oc.GetNetworkID() == types.InvalidID { + if err := cleanupGatewayRoutersForNetworkFromDB(oc.nbClient, oc.GetNetInfo(), + oc.GetNetworkScopedClusterRouterName(), oc.GetNetworkScopedJoinSwitchName()); err != nil { + return fmt.Errorf("failed to cleanup gateway routers for network %s: %w", netName, err) + } + } + + // Node switches (which hold management port LSPs) are deleted below; LSPs are cascade-deleted with the logical switch. // first delete node logical switches ops, err = libovsdbops.DeleteLogicalSwitchesWithPredicateOps(oc.nbClient, ops, func(item *nbdb.LogicalSwitch) bool { @@ -557,6 +575,16 @@ func (oc *Layer3UserDefinedNetworkController) Cleanup() error { return err } + // Delete QoS rows for this network (e.g. from NetworkQoS controller). Applies to primary and + // secondary Layer3 UDNs when EnableNetworkQoS is set. + ops, err = libovsdbops.DeleteQoSesWithPredicateOps(oc.nbClient, ops, + func(item *nbdb.QoS) bool { + return item.ExternalIDs[types.NetworkExternalID] == netName + }) + if err != nil { + return fmt.Errorf("failed to get ops for deleting QoSes of network %s: %v", netName, err) + } + _, err = libovsdbops.TransactAndCheck(oc.nbClient, ops) if err != nil { return fmt.Errorf("failed to deleting routers/switches of network %s: %v", netName, err) @@ -569,13 +597,8 @@ func (oc *Layer3UserDefinedNetworkController) Cleanup() error { } // remove load balancer groups - lbGroups := make([]*nbdb.LoadBalancerGroup, 0, 3) - for _, lbGroupUUID := range []string{oc.switchLoadBalancerGroupUUID, oc.clusterLoadBalancerGroupUUID, oc.routerLoadBalancerGroupUUID} { - lbGroups = append(lbGroups, &nbdb.LoadBalancerGroup{UUID: lbGroupUUID}) - } - if err := libovsdbops.DeleteLoadBalancerGroups(oc.nbClient, lbGroups); err != nil { - klog.Errorf("Failed to delete load balancer groups on network: %q, error: %v", oc.GetNetworkName(), err) - } + cleanupLoadBalancerGroups(oc.nbClient, oc.GetNetInfo(), + oc.switchLoadBalancerGroupUUID, oc.clusterLoadBalancerGroupUUID, oc.routerLoadBalancerGroupUUID) return nil } @@ -639,11 +662,11 @@ func (oc *Layer3UserDefinedNetworkController) run() error { return fmt.Errorf("unable to create network qos controller, err: %w", err) } oc.wg.Add(1) - go func() { + go func(ch <-chan struct{}) { defer oc.wg.Done() // Until we have scale issues in future let's spawn only one thread - oc.nqosController.Run(1, oc.stopChan) - }() + oc.nqosController.Run(1, ch) + }(oc.stopChan) } klog.Infof("Completing all the Watchers for network %s took %v", oc.GetNetworkName(), time.Since(start)) diff --git a/go-controller/pkg/ovn/layer3_user_defined_network_controller_test.go b/go-controller/pkg/ovn/layer3_user_defined_network_controller_test.go index ed70df467f..caea5164a9 100644 --- a/go-controller/pkg/ovn/layer3_user_defined_network_controller_test.go +++ b/go-controller/pkg/ovn/layer3_user_defined_network_controller_test.go @@ -459,6 +459,112 @@ var _ = Describe("OVN Multi-Homed pod operations for layer 3 network", func() { }), ), ) + + It("primary Layer 3 UDN: controller creates entities via init/watchers, then dummy Cleanup() removes them", func() { + config.OVNKubernetesFeature.EnableMultiNetwork = true + config.OVNKubernetesFeature.EnableNetworkSegmentation = true + netInfo := dummyPrimaryLayer3UserDefinedNetwork("192.168.0.0/16", "192.168.1.0/24") + app.Action = func(ctx *cli.Context) error { + netConf := netInfo.netconf() + networkConfig, err := util.NewNetInfo(netConf) + Expect(err).NotTo(HaveOccurred()) + // For cleanup we use a copy with InvalidID so the dummy controller treats the network as stale. + mutableNetInfoCleanup := util.NewMutableNetInfo(networkConfig) + mutableNetInfoCleanup.SetNetworkID(types.InvalidID) + + nad, err := newNetworkAttachmentDefinition(ns, nadName, *netConf) + Expect(err).NotTo(HaveOccurred()) + // Dummy controller only runs Cleanup(), which does not use the network manager; empty fake is enough. + fakeNetworkManager := &networkmanager.FakeNetworkManager{ + PrimaryNetworks: make(map[string]util.NetInfo), + } + + const nodeIPv4CIDR = "192.168.126.202/24" + testNode, err := newNodeWithUserDefinedNetworks(nodeName, nodeIPv4CIDR, netInfo) + Expect(err).NotTo(HaveOccurred()) + + // NB_Global with default zone so GetNBZone returns it; node without zone annotation is treated as local. + nbZone := &nbdb.NBGlobal{Name: types.OvnDefaultZone, UUID: types.OvnDefaultZone} + // Post-cleanup DB: default net node switch + NB_Global + global entities (Copp, meters) as in Layer2 test. + defaultNetExpectations := generateUDNPostInitDB(append(emptyDefaultClusterNetworkNodeSwitch(nodeName), nbZone)) + + // Minimal initialDB: default net node switch, no UDN entities. The UDN controller's Start() + // runs init() which creates cluster router and join switch; then node sync creates per-node entities. + initialDB.NBData = append(initialDB.NBData, nbZone) + Expect(netInfo.setupOVNDependencies(&initialDB)).To(Succeed()) + + fakeOvn.startWithDBSetup( + initialDB, + &corev1.NamespaceList{Items: []corev1.Namespace{*newUDNNamespace(ns)}}, + &corev1.NodeList{Items: []corev1.Node{*testNode}}, + &corev1.PodList{Items: []corev1.Pod{}}, + &nadapi.NetworkAttachmentDefinitionList{Items: []nadapi.NetworkAttachmentDefinition{*nad}}, + ) + + // Mock ovn-nbctl list Load_Balancer_Group (used by UDN controller init; default controller init is not run in this test). + fexec := util.GetExec().(*testing.FakeExec) + fexec.AddFakeCmdsNoOutputNoError([]string{ + "ovn-nbctl --timeout=15 --columns=_uuid list Load_Balancer_Group", + }) + + // networkManager is already started by startWithDBSetup (via init()) and stopped by AfterEach (shutdown). + Expect(fakeOvn.controller.WatchNamespaces()).To(Succeed()) + Expect(fakeOvn.controller.WatchPods()).To(Succeed()) + + // Run init() to create cluster-level entities (cluster router, join switch, LB groups, etc.), + // then start watchers so node sync creates per-node entities (node LS, GW router, etc.). + l3Controller, ok := fakeOvn.fullL3UDNControllers[userDefinedNetworkName] + Expect(ok).To(BeTrue()) + Expect(l3Controller.init()).To(Succeed()) + Expect(l3Controller.WatchNodes()).To(Succeed()) + Expect(l3Controller.WatchPods()).To(Succeed()) + Expect(l3Controller.WatchNetworkPolicy()).To(Succeed()) + + // Wait for the controller to create UDN entities: assert any switches and routers exist with this network's external-ids, + // and that the gateway router for this node exists. + networkName := networkConfig.GetNetworkName() + gwRouterName := networkConfig.GetNetworkScopedGWRouterName(nodeName) + Eventually(func(g Gomega) { + switches, err := libovsdbops.FindLogicalSwitchesWithPredicate(fakeOvn.nbClient, func(ls *nbdb.LogicalSwitch) bool { + return ls.ExternalIDs != nil && ls.ExternalIDs[types.NetworkExternalID] == networkName + }) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(switches).NotTo(BeEmpty(), "at least one LogicalSwitch for network %q should exist", networkName) + }).WithTimeout(10 * time.Second).Should(Succeed()) + Eventually(func(g Gomega) { + routers, err := libovsdbops.FindLogicalRoutersWithPredicate(fakeOvn.nbClient, func(lr *nbdb.LogicalRouter) bool { + return lr.ExternalIDs != nil && lr.ExternalIDs[types.NetworkExternalID] == networkName + }) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(routers).NotTo(BeEmpty(), "at least one LogicalRouter for network %q should exist", networkName) + }).WithTimeout(10 * time.Second).Should(Succeed()) + Eventually(func(g Gomega) { + routers, err := libovsdbops.FindLogicalRoutersWithPredicate(fakeOvn.nbClient, func(lr *nbdb.LogicalRouter) bool { + return lr.Name == gwRouterName + }) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(routers).NotTo(BeEmpty(), "gateway router %q should exist", gwRouterName) + }).WithTimeout(10 * time.Second).Should(Succeed()) + + // Do NOT delete the NAD. Simulate CleanupStaleNetworks(no valid networks): dummy controller + // with InvalidID runs Cleanup() so our network is treated as stale and all its entities are removed. + dummyController, err := NewLayer3UserDefinedNetworkController( + &l3Controller.CommonNetworkControllerInfo, + mutableNetInfoCleanup, + fakeNetworkManager, + nil, + nil, + NewPortCache(ctx.Done()), + ) + Expect(err).NotTo(HaveOccurred()) + Expect(dummyController.Cleanup()).To(Succeed()) + + Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(defaultNetExpectations)) + return nil + } + Expect(app.Run([]string{app.Name})).To(Succeed()) + }) + Describe("Dynamic UDN allocation with remote node", func() { It("activates a remote node when a NAD becomes active and cleans it up when inactive", func() { Expect(config.PrepareTestConfig()).To(Succeed()) diff --git a/go-controller/pkg/ovn/multipolicy_test.go b/go-controller/pkg/ovn/multipolicy_test.go index 0d6ea4b2d3..5dfc0f59dc 100644 --- a/go-controller/pkg/ovn/multipolicy_test.go +++ b/go-controller/pkg/ovn/multipolicy_test.go @@ -454,91 +454,96 @@ var _ = ginkgo.Describe("OVN MultiNetworkPolicy Operations", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) - ginkgo.It("correctly creates and deletes network policy and multi network policy with the same policy", func() { - app.Action = func(*cli.Context) error { - var err error - - topology := ovntypes.Layer2Topology - subnets := "10.1.0.0/24" - setUserDefinedNetworkTestData(topology, subnets) - - namespace1 := *newNamespace(namespaceName1) - nPodTest := getTestPod(namespace1.Name, nodeName) - nPodTest.addNetwork(userDefinedNetworkName, nadNamespacedName, "", "", "", "10.1.1.1", "0a:58:0a:01:01:01", "secondary", 1, nil) - networkPolicy := getPortNetworkPolicy(netPolicyName1, namespace1.Name, labelName, labelVal, portNum) - - watchNodes := false - node := *newNode(nodeName, "192.168.126.202/24") + ginkgo.DescribeTable("correctly creates and deletes network policy and multi network policy with the same policy", + func(allowICMPNetworkPolicy bool) { + app.Action = func(*cli.Context) error { + var err error - startOvn(initialDB, watchNodes, []corev1.Node{node}, []corev1.Namespace{namespace1}, nil, nil, - []nettypes.NetworkAttachmentDefinition{*nad}, []testPod{nPodTest}, map[string]string{labelName: labelVal}) + config.OVNKubernetesFeature.AllowICMPNetworkPolicy = allowICMPNetworkPolicy + topology := ovntypes.Layer2Topology + subnets := "10.1.0.0/24" + setUserDefinedNetworkTestData(topology, subnets) - ginkgo.By("Creating networkPolicy applied to the pod") - _, err = fakeOvn.fakeClient.KubeClient.NetworkingV1().NetworkPolicies(networkPolicy.Namespace). - Create(context.TODO(), networkPolicy, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + namespace1 := *newNamespace(namespaceName1) + nPodTest := getTestPod(namespace1.Name, nodeName) + nPodTest.addNetwork(userDefinedNetworkName, nadNamespacedName, "", "", "", "10.1.1.1", "0a:58:0a:01:01:01", "secondary", 1, nil) + networkPolicy := getPortNetworkPolicy(netPolicyName1, namespace1.Name, labelName, labelVal, portNum) - _, err = fakeOvn.fakeClient.KubeClient.NetworkingV1().NetworkPolicies(networkPolicy.Namespace). - Get(context.TODO(), networkPolicy.Name, metav1.GetOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - fakeOvn.asf.ExpectAddressSetWithAddresses(namespaceName1, []string{nPodTest.podIP}) + watchNodes := false + node := *newNode(nodeName, "192.168.126.202/24") - dataParams := newNetpolDataParams(networkPolicy). - withLocalPortUUIDs(nPodTest.portUUID). - withTCPPeerPorts(portNum) - gressPolicyExpectedData1 := getPolicyData(dataParams) - defaultDenyExpectedData1 := getDefaultDenyData(dataParams) - initData := getUpdatedInitialDB([]testPod{nPodTest}) - expectedData1 := append(initData, gressPolicyExpectedData1...) - expectedData1 = append(expectedData1, defaultDenyExpectedData1...) - gomega.Eventually(fakeOvn.nbClient).Should(libovsdb.HaveData(expectedData1...)) + startOvn(initialDB, watchNodes, []corev1.Node{node}, []corev1.Namespace{namespace1}, nil, nil, + []nettypes.NetworkAttachmentDefinition{*nad}, []testPod{nPodTest}, map[string]string{labelName: labelVal}) - ginkgo.By("Creating multi-networkPolicy applied to the pod") - mpolicy := convertNetPolicyToMultiNetPolicy(networkPolicy) - mpolicy.Annotations = map[string]string{PolicyForAnnotation: nadNamespacedName} + ginkgo.By("Creating networkPolicy applied to the pod") + _, err = fakeOvn.fakeClient.KubeClient.NetworkingV1().NetworkPolicies(networkPolicy.Namespace). + Create(context.TODO(), networkPolicy, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - _, err = fakeOvn.fakeClient.MultiNetworkPolicyClient.K8sCniCncfIoV1beta1().MultiNetworkPolicies(mpolicy.Namespace). - Create(context.TODO(), mpolicy, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + _, err = fakeOvn.fakeClient.KubeClient.NetworkingV1().NetworkPolicies(networkPolicy.Namespace). + Get(context.TODO(), networkPolicy.Name, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + fakeOvn.asf.ExpectAddressSetWithAddresses(namespaceName1, []string{nPodTest.podIP}) + + dataParams := newNetpolDataParams(networkPolicy). + withLocalPortUUIDs(nPodTest.portUUID). + withTCPPeerPorts(portNum) + gressPolicyExpectedData1 := getPolicyData(dataParams) + defaultDenyExpectedData1 := getDefaultDenyData(dataParams) + initData := getUpdatedInitialDB([]testPod{nPodTest}) + expectedData1 := append(initData, gressPolicyExpectedData1...) + expectedData1 = append(expectedData1, defaultDenyExpectedData1...) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdb.HaveData(expectedData1...)) + + ginkgo.By("Creating multi-networkPolicy applied to the pod") + mpolicy := convertNetPolicyToMultiNetPolicy(networkPolicy) + mpolicy.Annotations = map[string]string{PolicyForAnnotation: nadNamespacedName} + + _, err = fakeOvn.fakeClient.MultiNetworkPolicyClient.K8sCniCncfIoV1beta1().MultiNetworkPolicies(mpolicy.Namespace). + Create(context.TODO(), mpolicy, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - _, err = fakeOvn.fakeClient.MultiNetworkPolicyClient.K8sCniCncfIoV1beta1().MultiNetworkPolicies(mpolicy.Namespace). - Get(context.TODO(), mpolicy.Name, metav1.GetOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + _, err = fakeOvn.fakeClient.MultiNetworkPolicyClient.K8sCniCncfIoV1beta1().MultiNetworkPolicies(mpolicy.Namespace). + Get(context.TODO(), mpolicy.Name, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - ocInfo := fakeOvn.userDefinedNetworkControllers[userDefinedNetworkName] - portInfo := nPodTest.getNetworkPortInfo(userDefinedNetworkName, nadNamespacedName) - gomega.Expect(portInfo).NotTo(gomega.BeNil()) - ocInfo.asf.ExpectAddressSetWithAddresses(namespaceName1, []string{portInfo.podIP}) + ocInfo := fakeOvn.userDefinedNetworkControllers[userDefinedNetworkName] + portInfo := nPodTest.getNetworkPortInfo(userDefinedNetworkName, nadNamespacedName) + gomega.Expect(portInfo).NotTo(gomega.BeNil()) + ocInfo.asf.ExpectAddressSetWithAddresses(namespaceName1, []string{portInfo.podIP}) + + dataParams2 := newNetpolDataParams(networkPolicy). + withLocalPortUUIDs(portInfo.portUUID). + withTCPPeerPorts(portNum). + withNetInfo(netInfo) + gressPolicyExpectedData2 := getPolicyData(dataParams2) + defaultDenyExpectedData2 := getDefaultDenyData(dataParams2) + expectedData2 := append(expectedData1, gressPolicyExpectedData2...) + expectedData2 = append(expectedData2, defaultDenyExpectedData2...) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdb.HaveData(expectedData2...)) + + // Delete the multi network policy + ginkgo.By("Deleting the multi network policy") + err = fakeOvn.fakeClient.MultiNetworkPolicyClient.K8sCniCncfIoV1beta1().MultiNetworkPolicies(mpolicy.Namespace). + Delete(context.TODO(), mpolicy.Name, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdb.HaveData(expectedData1)) - dataParams2 := newNetpolDataParams(networkPolicy). - withLocalPortUUIDs(portInfo.portUUID). - withTCPPeerPorts(portNum). - withNetInfo(netInfo) - gressPolicyExpectedData2 := getPolicyData(dataParams2) - defaultDenyExpectedData2 := getDefaultDenyData(dataParams2) - expectedData2 := append(expectedData1, gressPolicyExpectedData2...) - expectedData2 = append(expectedData2, defaultDenyExpectedData2...) - gomega.Eventually(fakeOvn.nbClient).Should(libovsdb.HaveData(expectedData2...)) + ginkgo.By("Deleting the network policy") + err = fakeOvn.fakeClient.KubeClient.NetworkingV1().NetworkPolicies(networkPolicy.Namespace). + Delete(context.TODO(), networkPolicy.Name, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - // Delete the multi network policy - ginkgo.By("Deleting the multi network policy") - err = fakeOvn.fakeClient.MultiNetworkPolicyClient.K8sCniCncfIoV1beta1().MultiNetworkPolicies(mpolicy.Namespace). - Delete(context.TODO(), mpolicy.Name, metav1.DeleteOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Eventually(fakeOvn.nbClient).Should(libovsdb.HaveData(expectedData1)) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdb.HaveData(initData)) + return nil + } - ginkgo.By("Deleting the network policy") - err = fakeOvn.fakeClient.KubeClient.NetworkingV1().NetworkPolicies(networkPolicy.Namespace). - Delete(context.TODO(), networkPolicy.Name, metav1.DeleteOptions{}) + err := app.Run([]string{app.Name}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - gomega.Eventually(fakeOvn.nbClient).Should(libovsdb.HaveData(initData)) - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) + }, + ginkgo.Entry("with allow ICMP network policy disabled", false), + ginkgo.Entry("with allow ICMP network policy enabled", true), + ) ginkgo.DescribeTable("correctly adds and deletes pod IPs from secondary network namespace address set", func(topology string, remote bool) { diff --git a/go-controller/pkg/ovn/pods.go b/go-controller/pkg/ovn/pods.go index e877cb9af6..e43a2cb31f 100644 --- a/go-controller/pkg/ovn/pods.go +++ b/go-controller/pkg/ovn/pods.go @@ -379,6 +379,9 @@ func (oc *DefaultNetworkController) addLogicalPort(pod *corev1.Pod) (err error) // Add the pod's logical switch port to the port cache _ = oc.logicalPortCache.add(pod, switchName, types.DefaultNetworkName, lsp.UUID, podAnnotation.MAC, podAnnotation.IPs) + if oc.onLogicalPortCacheAdd != nil { + oc.onLogicalPortCacheAdd(pod, types.DefaultNetworkName) + } if kubevirt.IsPodLiveMigratable(pod) { if err := oc.ensureDHCP(pod, podAnnotation, lsp); err != nil { diff --git a/go-controller/pkg/ovn/policy_stale_test.go b/go-controller/pkg/ovn/policy_stale_test.go index c1bc791f14..5bb5ac44ca 100644 --- a/go-controller/pkg/ovn/policy_stale_test.go +++ b/go-controller/pkg/ovn/policy_stale_test.go @@ -3,6 +3,7 @@ package ovn import ( "context" "fmt" + "strings" "github.com/onsi/ginkgo/v2" "github.com/onsi/gomega" @@ -40,6 +41,9 @@ func getStaleDefaultDenyACL(netpolName, namespace, match string, deny, egress bo name := namespace + "_" + netpolName if !deny { aclIDs = fakeController.getDefaultDenyPolicyACLIDs(namespace, direction, arpAllowACL) + if strings.Contains(match, "icmp") { + aclIDs = fakeController.getDefaultDenyPolicyACLIDs(namespace, direction, icmpAllowACL) + } priority = types.DefaultAllowPriority action = nbdb.ACLActionAllow name = getStaleARPAllowACLName(namespace) @@ -67,40 +71,54 @@ func getStaleARPAllowACLName(ns string) string { // getStaleDefaultDenyData builds stale ACLs and port groups for given netpol func getStaleDefaultDenyData(networkPolicy *knet.NetworkPolicy) []libovsdbtest.TestData { + return getStaleDefaultDenyDataWithICMP(networkPolicy, config.OVNKubernetesFeature.AllowICMPNetworkPolicy) +} + +func getStaleDefaultDenyDataWithICMP(networkPolicy *knet.NetworkPolicy, includeICMP bool) []libovsdbtest.TestData { namespace := networkPolicy.Namespace netpolName := networkPolicy.Name fakeController := getFakeBaseController(&util.DefaultNetInfo{}) egressPGName := fakeController.defaultDenyPortGroupName(namespace, libovsdbutil.ACLEgress) egressDenyACL := getStaleDefaultDenyACL(netpolName, namespace, "inport == @"+egressPGName, true, true) - egressAllowACL := getStaleDefaultDenyACL(netpolName, namespace, "inport == @"+egressPGName+" && "+arpAllowPolicyMatch, false, true) + egressARPAllowACL := getStaleDefaultDenyACL(netpolName, namespace, "inport == @"+egressPGName+" && "+arpAllowPolicyMatch, false, true) + + testData := []libovsdbtest.TestData{egressDenyACL, egressARPAllowACL} + egressACLs := []*nbdb.ACL{egressDenyACL, egressARPAllowACL} + + if includeICMP { + egressICMPAllowACL := getStaleDefaultDenyACL(netpolName, namespace, "inport == @"+egressPGName+" && "+icmpAllowPolicyMatch, false, true) + testData = append(testData, egressICMPAllowACL) + egressACLs = append(egressACLs, egressICMPAllowACL) + } ingressPGName := fakeController.defaultDenyPortGroupName(namespace, libovsdbutil.ACLIngress) ingressDenyACL := getStaleDefaultDenyACL(netpolName, namespace, "outport == @"+ingressPGName, true, false) - ingressAllowACL := getStaleDefaultDenyACL(netpolName, namespace, "outport == @"+ingressPGName+" && "+arpAllowPolicyMatch, false, false) + ingressARPAllowACL := getStaleDefaultDenyACL(netpolName, namespace, "outport == @"+ingressPGName+" && "+arpAllowPolicyMatch, false, false) + + ingressACLs := []*nbdb.ACL{ingressDenyACL, ingressARPAllowACL} + testData = append(testData, ingressDenyACL, ingressARPAllowACL) + if includeICMP { + ingressICMPAllowACL := getStaleDefaultDenyACL(netpolName, namespace, "outport == @"+ingressPGName+" && "+icmpAllowPolicyMatch, false, false) + testData = append(testData, ingressICMPAllowACL) + ingressACLs = append(ingressACLs, ingressICMPAllowACL) + } egressDenyPG := libovsdbutil.BuildPortGroup( fakeController.getDefaultDenyPolicyPortGroupIDs(namespace, libovsdbutil.ACLEgress), nil, - []*nbdb.ACL{egressDenyACL, egressAllowACL}, + egressACLs, ) egressDenyPG.UUID = egressDenyPG.Name + "-UUID" ingressDenyPG := libovsdbutil.BuildPortGroup( fakeController.getDefaultDenyPolicyPortGroupIDs(namespace, libovsdbutil.ACLIngress), nil, - []*nbdb.ACL{ingressDenyACL, ingressAllowACL}, + ingressACLs, ) ingressDenyPG.UUID = ingressDenyPG.Name + "-UUID" - return []libovsdbtest.TestData{ - egressDenyACL, - egressAllowACL, - ingressDenyACL, - ingressAllowACL, - egressDenyPG, - ingressDenyPG, - } + return append(testData, egressDenyPG, ingressDenyPG) } // getStalePolicyACLs builds stale ACLs for given peers @@ -250,14 +268,47 @@ var _ = ginkgo.Describe("OVN Stale NetworkPolicy Operations", func() { ginkgo.Context("on startup", func() { - ginkgo.It("reconciles an existing networkPolicy updating stale ACLs", func() { + ginkgo.DescribeTable("reconciles an existing networkPolicy updating stale ACLs", + func(allowICMPNetworkPolicy bool) { + config.OVNKubernetesFeature.AllowICMPNetworkPolicy = allowICMPNetworkPolicy + namespace1 := *newNamespace(namespaceName1) + namespace2 := *newNamespace(namespaceName2) + networkPolicy := getMatchLabelsNetworkPolicy(netPolicyName1, namespace1.Name, + namespace2.Name, "", true, true) + // start with stale ACLs + gressPolicyInitialData := getStalePolicyData(networkPolicy, []string{namespace2.Name}) + defaultDenyInitialData := getStaleDefaultDenyData(networkPolicy) + initialData := initialDB.NBData + initialData = append(initialData, gressPolicyInitialData...) + initialData = append(initialData, defaultDenyInitialData...) + startOvn(libovsdbtest.TestSetup{NBData: initialData}, []corev1.Namespace{namespace1, namespace2}, + []knet.NetworkPolicy{*networkPolicy}) + + fakeOvn.asf.ExpectEmptyAddressSet(namespaceName1) + fakeOvn.asf.ExpectEmptyAddressSet(namespaceName2) + + _, err := fakeOvn.fakeClient.KubeClient.NetworkingV1().NetworkPolicies(networkPolicy.Namespace). + Get(context.TODO(), networkPolicy.Name, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + // make sure stale ACLs were updated + expectedData := getNamespaceWithSinglePolicyExpectedData( + newNetpolDataParams(networkPolicy).withPeerNamespaces(namespace2.Name), + initialDB.NBData) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedData...)) + }, + ginkgo.Entry("with allow ICMP network policy disabled", false), + ginkgo.Entry("with allow ICMP network policy enabled", true), + ) + + ginkgo.It("reconciles with allow ICMP network policy disabled and removes stale ICMP default deny ACLs", func() { + config.OVNKubernetesFeature.AllowICMPNetworkPolicy = false namespace1 := *newNamespace(namespaceName1) namespace2 := *newNamespace(namespaceName2) networkPolicy := getMatchLabelsNetworkPolicy(netPolicyName1, namespace1.Name, namespace2.Name, "", true, true) - // start with stale ACLs + // start with stale ACLs containing ICMP allow ACLs from a previously enabled config gressPolicyInitialData := getStalePolicyData(networkPolicy, []string{namespace2.Name}) - defaultDenyInitialData := getStaleDefaultDenyData(networkPolicy) + defaultDenyInitialData := getStaleDefaultDenyDataWithICMP(networkPolicy, true) initialData := initialDB.NBData initialData = append(initialData, gressPolicyInitialData...) initialData = append(initialData, defaultDenyInitialData...) @@ -270,7 +321,7 @@ var _ = ginkgo.Describe("OVN Stale NetworkPolicy Operations", func() { _, err := fakeOvn.fakeClient.KubeClient.NetworkingV1().NetworkPolicies(networkPolicy.Namespace). Get(context.TODO(), networkPolicy.Name, metav1.GetOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - // make sure stale ACLs were updated + // make sure stale ICMP ACLs were removed to match disabled allow-icmp config expectedData := getNamespaceWithSinglePolicyExpectedData( newNetpolDataParams(networkPolicy).withPeerNamespaces(namespace2.Name), initialDB.NBData) diff --git a/go-controller/pkg/ovn/policy_test.go b/go-controller/pkg/ovn/policy_test.go index af7923a5cf..1dfe375dc4 100644 --- a/go-controller/pkg/ovn/policy_test.go +++ b/go-controller/pkg/ovn/policy_test.go @@ -108,7 +108,7 @@ func getDefaultDenyDataHelper(policyTypeIngress, policyTypeEgress bool, params * egressDenyACL.UUID = aclIDs.String() + "-UUID" aclIDs = fakeController.getDefaultDenyPolicyACLIDs(namespace, libovsdbutil.ACLEgress, arpAllowACL) - egressAllowACL := libovsdbops.BuildACL( + egressARPAllowACL := libovsdbops.BuildACL( libovsdbutil.GetACLName(aclIDs), nbdb.ACLDirectionFromLport, types.DefaultAllowPriority, @@ -123,7 +123,36 @@ func getDefaultDenyDataHelper(policyTypeIngress, policyTypeEgress bool, params * }, types.DefaultACLTier, ) - egressAllowACL.UUID = aclIDs.String() + "-UUID" + egressARPAllowACL.UUID = aclIDs.String() + "-UUID" + + testData := []libovsdbtest.TestData{ + egressDenyACL, + egressARPAllowACL, + } + egressACLs := []*nbdb.ACL{egressDenyACL, egressARPAllowACL} + + if config.OVNKubernetesFeature.AllowICMPNetworkPolicy { + aclIDs = fakeController.getDefaultDenyPolicyACLIDs(namespace, libovsdbutil.ACLEgress, icmpAllowACL) + egressICMPAllowACL := libovsdbops.BuildACL( + libovsdbutil.GetACLName(aclIDs), + nbdb.ACLDirectionFromLport, + types.DefaultAllowPriority, + "inport == @"+egressPGName+" && "+icmpAllowPolicyMatch, + nbdb.ACLActionAllow, + types.OvnACLLoggingMeter, + "", + false, + aclIDs.GetExternalIDs(), + map[string]string{ + "apply-after-lb": "true", + }, + types.DefaultACLTier, + ) + egressICMPAllowACL.UUID = aclIDs.String() + "-UUID" + testData = append(testData, egressICMPAllowACL) + egressACLs = append(egressACLs, egressICMPAllowACL) + + } ingressPGName := fakeController.defaultDenyPortGroupName(namespace, libovsdbutil.ACLIngress) aclIDs = fakeController.getDefaultDenyPolicyACLIDs(namespace, libovsdbutil.ACLIngress, defaultDenyACL) @@ -143,7 +172,7 @@ func getDefaultDenyDataHelper(policyTypeIngress, policyTypeEgress bool, params * ingressDenyACL.UUID = aclIDs.String() + "-UUID" aclIDs = fakeController.getDefaultDenyPolicyACLIDs(namespace, libovsdbutil.ACLIngress, arpAllowACL) - ingressAllowACL := libovsdbops.BuildACL( + ingressARPAllowACL := libovsdbops.BuildACL( libovsdbutil.GetACLName(aclIDs), nbdb.ACLDirectionToLport, types.DefaultAllowPriority, @@ -156,7 +185,31 @@ func getDefaultDenyDataHelper(policyTypeIngress, policyTypeEgress bool, params * nil, types.DefaultACLTier, ) - ingressAllowACL.UUID = aclIDs.String() + "-UUID" + ingressARPAllowACL.UUID = aclIDs.String() + "-UUID" + + ingressACLs := []*nbdb.ACL{ingressDenyACL, ingressARPAllowACL} + if config.OVNKubernetesFeature.AllowICMPNetworkPolicy { + aclIDs = fakeController.getDefaultDenyPolicyACLIDs(namespace, libovsdbutil.ACLIngress, icmpAllowACL) + ingressICMPAllowACL := libovsdbops.BuildACL( + libovsdbutil.GetACLName(aclIDs), + nbdb.ACLDirectionToLport, + types.DefaultAllowPriority, + "outport == @"+ingressPGName+" && "+icmpAllowPolicyMatch, + nbdb.ACLActionAllow, + types.OvnACLLoggingMeter, + "", + false, + aclIDs.GetExternalIDs(), + nil, + types.DefaultACLTier, + ) + ingressICMPAllowACL.UUID = aclIDs.String() + "-UUID" + ingressACLs = append(ingressACLs, ingressICMPAllowACL) + } + + for _, acl := range ingressACLs { + testData = append(testData, acl) + } lsps := []*nbdb.LogicalSwitchPort{} for _, uuid := range params.localPortUUIDs { @@ -167,10 +220,11 @@ func getDefaultDenyDataHelper(policyTypeIngress, policyTypeEgress bool, params * if policyTypeEgress { egressDenyPorts = lsps } + egressDenyPG := libovsdbutil.BuildPortGroup( fakeController.getDefaultDenyPolicyPortGroupIDs(namespace, libovsdbutil.ACLEgress), egressDenyPorts, - []*nbdb.ACL{egressDenyACL, egressAllowACL}, + egressACLs, ) egressDenyPG.UUID = egressDenyPG.Name + "-UUID" @@ -181,18 +235,11 @@ func getDefaultDenyDataHelper(policyTypeIngress, policyTypeEgress bool, params * ingressDenyPG := libovsdbutil.BuildPortGroup( fakeController.getDefaultDenyPolicyPortGroupIDs(namespace, libovsdbutil.ACLIngress), ingressDenyPorts, - []*nbdb.ACL{ingressDenyACL, ingressAllowACL}, + ingressACLs, ) ingressDenyPG.UUID = ingressDenyPG.Name + "-UUID" - return []libovsdbtest.TestData{ - egressDenyACL, - egressAllowACL, - ingressDenyACL, - ingressAllowACL, - egressDenyPG, - ingressDenyPG, - } + return append(testData, egressDenyPG, ingressDenyPG) } func getDefaultDenyData(params *netpolDataParams) []libovsdbtest.TestData { @@ -797,33 +844,38 @@ var _ = ginkgo.Describe("OVN NetworkPolicy Operations", func() { gomega.Expect(app.Run([]string{app.Name})).To(gomega.Succeed()) }) - ginkgo.It("reconciles an existing networkPolicy with empty db", func() { - app.Action = func(*cli.Context) error { - namespace1 := *newNamespace(namespaceName1) - namespace2 := *newNamespace(namespaceName2) - namespace1AddressSetv4, _ := buildNamespaceAddressSets(namespace1.Name, nil) - namespace2AddressSetv4, _ := buildNamespaceAddressSets(namespace2.Name, nil) - // add namespaces to initial Database - initialDB.NBData = append(initialDB.NBData, namespace1AddressSetv4, namespace2AddressSetv4) - - networkPolicy := getMatchLabelsNetworkPolicy(netPolicyName1, namespace1.Name, - namespace2.Name, "", true, true) - startOvn(initialDB, []corev1.Namespace{namespace1, namespace2}, []knet.NetworkPolicy{*networkPolicy}, - nil, nil) - - _, err := fakeOvn.fakeClient.KubeClient.NetworkingV1().NetworkPolicies(networkPolicy.Namespace). - Get(context.TODO(), networkPolicy.Name, metav1.GetOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + ginkgo.DescribeTable("reconciles an existing networkPolicy with empty db", + func(allowICMPNetworkPolicy bool) { + app.Action = func(*cli.Context) error { + config.OVNKubernetesFeature.AllowICMPNetworkPolicy = allowICMPNetworkPolicy + namespace1 := *newNamespace(namespaceName1) + namespace2 := *newNamespace(namespaceName2) + namespace1AddressSetv4, _ := buildNamespaceAddressSets(namespace1.Name, nil) + namespace2AddressSetv4, _ := buildNamespaceAddressSets(namespace2.Name, nil) + // add namespaces to initial Database + initialDB.NBData = append(initialDB.NBData, namespace1AddressSetv4, namespace2AddressSetv4) + + networkPolicy := getMatchLabelsNetworkPolicy(netPolicyName1, namespace1.Name, + namespace2.Name, "", true, true) + startOvn(initialDB, []corev1.Namespace{namespace1, namespace2}, []knet.NetworkPolicy{*networkPolicy}, + nil, nil) + + _, err := fakeOvn.fakeClient.KubeClient.NetworkingV1().NetworkPolicies(networkPolicy.Namespace). + Get(context.TODO(), networkPolicy.Name, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - expectedData := getNamespaceWithSinglePolicyExpectedData( - newNetpolDataParams(networkPolicy).withPeerNamespaces(namespace2.Name), - initialDB.NBData) - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedData)) - return nil - } + expectedData := getNamespaceWithSinglePolicyExpectedData( + newNetpolDataParams(networkPolicy).withPeerNamespaces(namespace2.Name), + initialDB.NBData) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedData)) + return nil + } - gomega.Expect(app.Run([]string{app.Name})).To(gomega.Succeed()) - }) + gomega.Expect(app.Run([]string{app.Name})).To(gomega.Succeed()) + }, + ginkgo.Entry("with allow ICMP network policy disabled", false), + ginkgo.Entry("with allow ICMP network policy enabled", true), + ) ginkgo.It("reconciles an ingress networkPolicy updating an existing ACL", func() { app.Action = func(*cli.Context) error { diff --git a/go-controller/pkg/retry/obj_retry.go b/go-controller/pkg/retry/obj_retry.go index c9de84d8c2..27e93d001f 100644 --- a/go-controller/pkg/retry/obj_retry.go +++ b/go-controller/pkg/retry/obj_retry.go @@ -415,7 +415,9 @@ func (r *RetryFramework) resourceRetry(objKey string, now time.Time) { } } - klog.Infof("Retry successful for %s %s after %d failed attempt(s)", r.ResourceHandler.ObjType, objKey, entry.failedAttempts) + if entry.failedAttempts > 0 { + klog.Infof("Retry successful for %s %s after %d failed attempt(s)", r.ResourceHandler.ObjType, objKey, entry.failedAttempts) + } if initObj != nil { r.ResourceHandler.RecordSuccessEvent(initObj) } @@ -489,13 +491,13 @@ func (r *RetryFramework) processObjectInTerminalState(obj interface{}, lockedKey _, loaded := r.terminatedObjects.LoadOrStore(lockedKey, true) if loaded { // object was already terminated - klog.Infof("Detected object %s of type %s in terminal state (e.g. completed) will be "+ + klog.V(5).Infof("Detected object %s of type %s in terminal state (e.g. completed) will be "+ "ignored as it has already been processed", lockedKey, r.ResourceHandler.ObjType) return } // The object is in a terminal state: delete it from the cluster, delete its retry entry and return. - klog.Infof("Detected object %s of type %s in terminal state (e.g. completed)"+ + klog.V(5).Infof("Detected object %s of type %s in terminal state (e.g. completed)"+ " during %s event: will remove it", lockedKey, r.ResourceHandler.ObjType, event) internalCacheEntry := r.ResourceHandler.GetInternalCacheEntry(obj) retryEntry := r.initRetryObjWithDelete(obj, lockedKey, internalCacheEntry, true) // set up the retry obj for deletion @@ -597,8 +599,6 @@ func (r *RetryFramework) WatchResourceFiltered(namespaceForFilteredHandler strin r.ResourceHandler.ObjType, err) return } - klog.V(5).Infof("Update event received for resource %s, old object is equal to new: %t", - r.ResourceHandler.ObjType, areEqual) if areEqual { return } @@ -650,7 +650,6 @@ func (r *RetryFramework) WatchResourceFiltered(namespaceForFilteredHandler strin } klog.V(5).Infof("Update event received for %s %s", r.ResourceHandler.ObjType, newKey) - r.DoWithLock(newKey, func(key string) { // STEP 1: // Delete existing (old) object if: diff --git a/go-controller/pkg/testing/util.go b/go-controller/pkg/testing/util.go index 0a49731b94..7ac3eff77f 100644 --- a/go-controller/pkg/testing/util.go +++ b/go-controller/pkg/testing/util.go @@ -19,7 +19,7 @@ func GenerateNAD(networkName, name, namespace, topology, cidr, role string) *nad return GenerateNADWithConfig(name, namespace, fmt.Sprintf( ` { - "cniVersion": "0.4.0", + "cniVersion": "1.1.0", "name": %q, "type": "ovn-k8s-cni-overlay", "topology":%q, @@ -48,7 +48,7 @@ func GenerateNADWithoutMTU(networkName, name, namespace, topology, cidr, role st return GenerateNADWithConfig(name, namespace, fmt.Sprintf( ` { - "cniVersion": "0.4.0", + "cniVersion": "1.1.0", "name": %q, "type": "ovn-k8s-cni-overlay", "topology":%q, diff --git a/go-controller/pkg/util/multi_network_test.go b/go-controller/pkg/util/multi_network_test.go index 2861650ba8..14071ec99e 100644 --- a/go-controller/pkg/util/multi_network_test.go +++ b/go-controller/pkg/util/multi_network_test.go @@ -317,7 +317,7 @@ func TestParseNetconf(t *testing.T) { inputNetAttachDefConfigSpec: ` { "name": "tenantred", - "cniVersion": "1.0.0", + "cniVersion": "1.1.0", "plugins": [ { "type": "ovn-k8s-cni-overlay", @@ -333,7 +333,7 @@ func TestParseNetconf(t *testing.T) { NADName: "ns1/nad1", MTU: 1400, VLANID: 10, - NetConf: cnitypes.NetConf{Name: "tenantred", CNIVersion: "1.0.0", Type: "ovn-k8s-cni-overlay"}, + NetConf: cnitypes.NetConf{Name: "tenantred", CNIVersion: "1.1.0", Type: "ovn-k8s-cni-overlay"}, }, }, { diff --git a/go-controller/pkg/util/nad.go b/go-controller/pkg/util/nad.go index 3a220e2b82..d80b56c7cd 100644 --- a/go-controller/pkg/util/nad.go +++ b/go-controller/pkg/util/nad.go @@ -35,7 +35,7 @@ func EnsureDefaultNetworkNAD(nadLister nadlisters.NetworkAttachmentDefinitionLis Namespace: config.Kubernetes.OVNConfigNamespace, }, Spec: nadtypes.NetworkAttachmentDefinitionSpec{ - Config: fmt.Sprintf("{\"cniVersion\": \"0.4.0\", \"name\": \"ovn-kubernetes\", \"type\": \"%s\"}", config.CNI.Plugin), + Config: fmt.Sprintf("{\"cniVersion\": \"%s\", \"name\": \"ovn-kubernetes\", \"type\": \"%s\"}", config.CNISpecVersion, config.CNI.Plugin), }, }, // note we don't set ourselves as field manager for this create as we diff --git a/go-controller/pkg/util/net.go b/go-controller/pkg/util/net.go index 6016a946b5..e4628b36d2 100644 --- a/go-controller/pkg/util/net.go +++ b/go-controller/pkg/util/net.go @@ -7,6 +7,7 @@ import ( "fmt" "math/big" "net" + "slices" "strconv" "strings" @@ -329,12 +330,54 @@ func GenerateRandMAC() (net.HardwareAddr, error) { func CopyIPNets(ipnets []*net.IPNet) []*net.IPNet { copy := make([]*net.IPNet, len(ipnets)) for i := range ipnets { - ipnet := *ipnets[i] - copy[i] = &ipnet + if ipnets[i] == nil { + continue + } + copy[i] = &net.IPNet{ + IP: slices.Clone(ipnets[i].IP), + Mask: slices.Clone(ipnets[i].Mask), + } } return copy } +func isIPNetEqual(ipn1, ipn2 *net.IPNet) bool { + if ipn1 == ipn2 { + return true + } + if ipn1 == nil || ipn2 == nil { + return false + } + m1, _ := ipn1.Mask.Size() + m2, _ := ipn2.Mask.Size() + return m1 == m2 && ipn1.IP.Equal(ipn2.IP) +} + +// IsIPNetsEqual returns true if both IPNet slices are equal in length and values, regardless of order. +func IsIPNetsEqual(ipn1, ipn2 []*net.IPNet) bool { + if len(ipn1) != len(ipn2) { + return false + } + used := make([]bool, len(ipn2)) + for i := range ipn1 { + found := false + for j := range ipn2 { + if used[j] { + continue + } + if isIPNetEqual(ipn1[i], ipn2[j]) { + used[j] = true + found = true + break + } + } + if !found { + return false + } + } + return true +} + // IPsToNetworkIPs returns the network CIDRs of the provided IP CIDRs func IPsToNetworkIPs(ips ...*net.IPNet) []*net.IPNet { nets := make([]*net.IPNet, len(ips)) diff --git a/go-controller/pkg/util/ovs.go b/go-controller/pkg/util/ovs.go index b32f73999b..3c301c202d 100644 --- a/go-controller/pkg/util/ovs.go +++ b/go-controller/pkg/util/ovs.go @@ -4,6 +4,7 @@ import ( "bytes" "encoding/json" "fmt" + "io" "net" "path/filepath" "regexp" @@ -669,11 +670,69 @@ func AddOFFlowWithSpecificAction(bridgeName, action string) (string, string, err return strings.Trim(stdout.String(), "\" \n"), stderr.String(), err } +// openFlowStdinReader incrementally renders a flow slice as a newline-delimited +// stream for ovs-ofctl stdin without constructing one large joined string. +type openFlowStdinReader struct { + flows []string + flowIndex int + flowOffset int + needEOL bool +} + +// Read implements io.Reader over r.flows, producing output equivalent to +// strings.Join(flows, "\n"), but in small chunks to reduce peak allocations. +func (r *openFlowStdinReader) Read(p []byte) (int, error) { + if len(p) == 0 { + return 0, nil + } + // Fast path: no flows left and no pending delimiter. + if r.flowIndex >= len(r.flows) && !r.needEOL { + return 0, io.EOF + } + + total := 0 + for total < len(p) { + if r.needEOL { + // Emit exactly one '\n' between flows. + p[total] = '\n' + total++ + r.needEOL = false + if total == len(p) { + return total, nil + } + continue + } + + if r.flowIndex >= len(r.flows) { + break + } + + flow := r.flows[r.flowIndex] + if r.flowOffset >= len(flow) { + // Current flow was fully consumed; advance and schedule delimiter if + // there is another flow. + r.flowIndex++ + r.flowOffset = 0 + r.needEOL = r.flowIndex < len(r.flows) + continue + } + + // Copy as much of the current flow as fits in caller's buffer. + copied := copy(p[total:], flow[r.flowOffset:]) + total += copied + r.flowOffset += copied + } + + if total == 0 { + return 0, io.EOF + } + return total, nil +} + // ReplaceOFFlows replaces flows in the bridge with a slice of flows func ReplaceOFFlows(bridgeName string, flows []string) (string, string, error) { args := []string{"-O", "OpenFlow13", "--bundle", "replace-flows", bridgeName, "-"} - stdin := &bytes.Buffer{} - stdin.Write([]byte(strings.Join(flows, "\n"))) + stdin := &openFlowStdinReader{flows: flows} cmd := runner.exec.Command(runner.ofctlPath, args...) cmd.SetStdin(stdin) diff --git a/go-controller/pkg/util/ovs_benchmark_test.go b/go-controller/pkg/util/ovs_benchmark_test.go new file mode 100644 index 0000000000..3fb8a7b514 --- /dev/null +++ b/go-controller/pkg/util/ovs_benchmark_test.go @@ -0,0 +1,82 @@ +package util + +import ( + "bytes" + "io" + "strings" + "testing" +) + +var benchmarkFlowBytesSink int64 +var benchmarkFlowCountSink int + +func BenchmarkReplaceOFFlowsInputRendering(b *testing.B) { + benchCases := []struct { + name string + flowCount int + }{ + { + name: "1k_flows", + flowCount: 1000, + }, + { + name: "5k_flows", + flowCount: 5000, + }, + } + + for _, tc := range benchCases { + flows := makeBenchmarkFlows(tc.flowCount) + totalBytes := benchmarkFlowsBytes(flows) + + b.Run(tc.name+"/join_buffer", func(b *testing.B) { + b.ReportAllocs() + b.SetBytes(totalBytes) + for i := 0; i < b.N; i++ { + stdin := &bytes.Buffer{} + stdin.Write([]byte(strings.Join(flows, "\n"))) + written, err := io.Copy(io.Discard, stdin) + if err != nil { + b.Fatalf("failed to drain old flow payload: %v", err) + } + benchmarkFlowBytesSink = written + benchmarkFlowCountSink = stdin.Len() + } + }) + + b.Run(tc.name+"/stream_reader", func(b *testing.B) { + b.ReportAllocs() + b.SetBytes(totalBytes) + for i := 0; i < b.N; i++ { + stdin := &openFlowStdinReader{flows: flows} + written, err := io.Copy(io.Discard, stdin) + if err != nil { + b.Fatalf("failed to drain streaming flow payload: %v", err) + } + benchmarkFlowBytesSink = written + benchmarkFlowCountSink = len(flows) + } + }) + } +} + +func makeBenchmarkFlows(flowCount int) []string { + flows := make([]string, flowCount) + // Keep each flow moderately long to emulate real replace-flows payload size. + const flowSuffix = ",ip,nw_src=10.128.0.0/14,tp_dst=8080,actions=ct(commit),output:2" + for i := 0; i < flowCount; i++ { + flows[i] = "table=0,priority=100,in_port=1,reg0=0x1" + flowSuffix + } + return flows +} + +func benchmarkFlowsBytes(flows []string) int64 { + if len(flows) == 0 { + return 0 + } + total := len(flows) - 1 + for _, flow := range flows { + total += len(flow) + } + return int64(total) +} diff --git a/go-controller/pkg/util/ovs_unit_test.go b/go-controller/pkg/util/ovs_unit_test.go index 2b8e633949..b832c89af2 100644 --- a/go-controller/pkg/util/ovs_unit_test.go +++ b/go-controller/pkg/util/ovs_unit_test.go @@ -3,7 +3,9 @@ package util import ( "bytes" "fmt" + "io" "os" + "strings" "testing" "time" @@ -1695,14 +1697,14 @@ func TestReplaceOFFlows(t *testing.T) { expectedErr: fmt.Errorf("failed to execute ovs-ofctl command"), onRetArgsExecUtilsIface: &ovntest.TestifyMockHelper{OnCallMethodName: "RunCmd", OnCallMethodArgType: []string{"*mocks.Cmd", "string", "[]string", "string", "string", "string", "string", "string", "string"}, RetArgList: []interface{}{nil, nil, fmt.Errorf("failed to execute ovs-ofctl command")}}, onRetArgsKexecIface: &ovntest.TestifyMockHelper{OnCallMethodName: "Command", OnCallMethodArgType: []string{"string", "string", "string", "string", "string", "string", "string"}, RetArgList: []interface{}{mockCmd}}, - onRetArgsCmdList: &ovntest.TestifyMockHelper{OnCallMethodName: "SetStdin", OnCallMethodArgType: []string{"*bytes.Buffer"}}, + onRetArgsCmdList: &ovntest.TestifyMockHelper{OnCallMethodName: "SetStdin", OnCallMethodArgType: []string{"*util.openFlowStdinReader"}}, }, { desc: "positive: run `ovs-ofctl` command", expectedErr: nil, onRetArgsExecUtilsIface: &ovntest.TestifyMockHelper{OnCallMethodName: "RunCmd", OnCallMethodArgType: []string{"*mocks.Cmd", "string", "[]string", "string", "string", "string", "string", "string", "string"}, RetArgList: []interface{}{bytes.NewBuffer([]byte("testblah")), bytes.NewBuffer([]byte("")), nil}}, onRetArgsKexecIface: &ovntest.TestifyMockHelper{OnCallMethodName: "Command", OnCallMethodArgType: []string{"string", "string", "string", "string", "string", "string", "string"}, RetArgList: []interface{}{mockCmd}}, - onRetArgsCmdList: &ovntest.TestifyMockHelper{OnCallMethodName: "SetStdin", OnCallMethodArgType: []string{"*bytes.Buffer"}}, + onRetArgsCmdList: &ovntest.TestifyMockHelper{OnCallMethodName: "SetStdin", OnCallMethodArgType: []string{"*util.openFlowStdinReader"}}, }, } for i, tc := range tests { @@ -1722,6 +1724,44 @@ func TestReplaceOFFlows(t *testing.T) { } } +func TestOpenFlowStdinReader(t *testing.T) { + tests := []struct { + desc string + flows []string + }{ + { + desc: "empty flow list", + flows: []string{}, + }, + { + desc: "single flow", + flows: []string{"table=0,priority=0,actions=NORMAL"}, + }, + { + desc: "multiple flows", + flows: []string{"a", "b", "c"}, + }, + { + desc: "includes empty flow", + flows: []string{"a", "", "c"}, + }, + } + + for i, tc := range tests { + t.Run(fmt.Sprintf("%d:%s", i, tc.desc), func(t *testing.T) { + r := &openFlowStdinReader{flows: tc.flows} + out, err := io.ReadAll(r) + require.NoError(t, err) + assert.Equal(t, strings.Join(tc.flows, "\n"), string(out)) + + buf := make([]byte, 1) + n, eof := r.Read(buf) + assert.Equal(t, 0, n) + assert.Equal(t, io.EOF, eof) + }) + } +} + func TestGetOVNDBServerInfo(t *testing.T) { mockKexecIface := new(mock_k8s_io_utils_exec.Interface) mockExecRunner := new(mocks.ExecRunner) diff --git a/go-controller/pkg/util/util.go b/go-controller/pkg/util/util.go index 76a8833f2a..266d05aaaf 100644 --- a/go-controller/pkg/util/util.go +++ b/go-controller/pkg/util/util.go @@ -366,25 +366,7 @@ func IsClusterIP(svcVIP string) bool { return false } -type UnprocessedActiveNetworkError struct { - namespace string - udnName string -} - -func (m *UnprocessedActiveNetworkError) Error() string { - return fmt.Sprintf("primary UDN %q exists in namespace %s, but NAD has not been processed yet", - m.udnName, m.namespace) -} - -func IsUnprocessedActiveNetworkError(err error) bool { - var unprocessedActiveNetworkError *UnprocessedActiveNetworkError - return errors.As(err, &unprocessedActiveNetworkError) -} - -func NewUnprocessedActiveNetworkError(namespace, udnName string) *UnprocessedActiveNetworkError { - return &UnprocessedActiveNetworkError{namespace: namespace, udnName: udnName} -} - +// InvalidPrimaryNetworkError indicates that the namespace requires a primary UDN, but no primary UDN exists yet type InvalidPrimaryNetworkError struct { namespace string } diff --git a/helm/ovn-kubernetes/charts/ovnkube-control-plane/templates/ovnkube-control-plane.yaml b/helm/ovn-kubernetes/charts/ovnkube-control-plane/templates/ovnkube-control-plane.yaml index 5698797434..90efbecad1 100644 --- a/helm/ovn-kubernetes/charts/ovnkube-control-plane/templates/ovnkube-control-plane.yaml +++ b/helm/ovn-kubernetes/charts/ovnkube-control-plane/templates/ovnkube-control-plane.yaml @@ -186,6 +186,8 @@ spec: value: {{ hasKey .Values.global "enablePersistentIPs" | ternary .Values.global.enablePersistentIPs false | quote }} - name: OVN_ENABLE_DNSNAMERESOLVER value: {{ hasKey .Values.global "enableDNSNameResolver" | ternary .Values.global.enableDNSNameResolver false | quote }} + - name: OVN_ALLOW_ICMP_NETPOL + value: {{ hasKey .Values.global "allowICMPNetworkPolicy" | ternary .Values.global.allowICMPNetworkPolicy false | quote }} # end of container volumes: # TODO: Need to check why we need this? diff --git a/helm/ovn-kubernetes/charts/ovnkube-master/templates/deployment-ovnkube-master.yaml b/helm/ovn-kubernetes/charts/ovnkube-master/templates/deployment-ovnkube-master.yaml index df2a7a1d0f..d87d1878fd 100644 --- a/helm/ovn-kubernetes/charts/ovnkube-master/templates/deployment-ovnkube-master.yaml +++ b/helm/ovn-kubernetes/charts/ovnkube-master/templates/deployment-ovnkube-master.yaml @@ -307,6 +307,8 @@ spec: value: {{ hasKey .Values.global "enablePersistentIPs" | ternary .Values.global.enablePersistentIPs false | quote }} - name: OVN_ENABLE_DNSNAMERESOLVER value: {{ hasKey .Values.global "enableDNSNameResolver" | ternary .Values.global.enableDNSNameResolver false | quote }} + - name: OVN_ALLOW_ICMP_NETPOL + value: {{ hasKey .Values.global "allowICMPNetworkPolicy" | ternary .Values.global.allowICMPNetworkPolicy false | quote }} - name: OVN_DISABLE_REQUESTEDCHASSIS value: {{ default "false" .Values.global.disableRequestedchassis | quote }} # end of container diff --git a/helm/ovn-kubernetes/charts/ovnkube-single-node-zone/templates/ovnkube-single-node-zone.yaml b/helm/ovn-kubernetes/charts/ovnkube-single-node-zone/templates/ovnkube-single-node-zone.yaml index c2503c0d1d..19ffdf112d 100644 --- a/helm/ovn-kubernetes/charts/ovnkube-single-node-zone/templates/ovnkube-single-node-zone.yaml +++ b/helm/ovn-kubernetes/charts/ovnkube-single-node-zone/templates/ovnkube-single-node-zone.yaml @@ -472,6 +472,8 @@ spec: value: {{ hasKey .Values.global "enableSvcTemplate" | ternary .Values.global.enableSvcTemplate true | quote }} - name: OVN_ENABLE_DNSNAMERESOLVER value: {{ hasKey .Values.global "enableDNSNameResolver" | ternary .Values.global.enableDNSNameResolver false | quote }} + - name: OVN_ALLOW_ICMP_NETPOL + value: {{ hasKey .Values.global "allowICMPNetworkPolicy" | ternary .Values.global.allowICMPNetworkPolicy false | quote }} - name: OVN_OBSERV_ENABLE value: {{ hasKey .Values.global "enableObservability" | ternary .Values.global.enableObservability false | quote }} - name: OVN_NETWORK_QOS_ENABLE diff --git a/helm/ovn-kubernetes/charts/ovnkube-zone-controller/templates/ovnkube-zone-controller.yaml b/helm/ovn-kubernetes/charts/ovnkube-zone-controller/templates/ovnkube-zone-controller.yaml index bd03a2518c..e26b24a64b 100644 --- a/helm/ovn-kubernetes/charts/ovnkube-zone-controller/templates/ovnkube-zone-controller.yaml +++ b/helm/ovn-kubernetes/charts/ovnkube-zone-controller/templates/ovnkube-zone-controller.yaml @@ -388,6 +388,8 @@ spec: value: "local" - name: OVN_ENABLE_DNSNAMERESOLVER value: {{ hasKey .Values.global "enableDNSNameResolver" | ternary .Values.global.enableDNSNameResolver false | quote }} + - name: OVN_ALLOW_ICMP_NETPOL + value: {{ hasKey .Values.global "allowICMPNetworkPolicy" | ternary .Values.global.allowICMPNetworkPolicy false | quote }} - name: OVN_OBSERV_ENABLE value: {{ hasKey .Values.global "enableObservability" | ternary .Values.global.enableObservability false | quote }} # end of container diff --git a/helm/ovn-kubernetes/templates/rbac-ovnkube-node.yaml b/helm/ovn-kubernetes/templates/rbac-ovnkube-node.yaml index a2bec63d7e..efadaa7219 100644 --- a/helm/ovn-kubernetes/templates/rbac-ovnkube-node.yaml +++ b/helm/ovn-kubernetes/templates/rbac-ovnkube-node.yaml @@ -235,3 +235,39 @@ rules: {{- if eq (hasKey .Values.global "enableInterconnect" | ternary .Values.global.enableInterconnect false) true }} - create {{- end }} + +{{- $tags := (.Values.tags | default dict) }} +{{- if (index $tags "ovnkube-node-dpu-host") }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: ovnkube-node-dpu-leases + namespace: ovn-kubernetes +roleRef: + name: ovnkube-node-dpu-leases + kind: Role + apiGroup: rbac.authorization.k8s.io +subjects: + {{- if eq (hasKey .Values.global "enableOvnKubeIdentity" | ternary .Values.global.enableOvnKubeIdentity true) true }} + - kind: Group + name: system:ovn-nodes + apiGroup: rbac.authorization.k8s.io + {{- else }} + - kind: ServiceAccount + name: ovnkube-node + namespace: ovn-kubernetes + {{- end }} + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: ovnkube-node-dpu-leases + namespace: ovn-kubernetes +rules: + - apiGroups: ["coordination.k8s.io"] + resources: + - leases + verbs: [ "get", "create", "update" ] +{{- end }} diff --git a/helm/ovn-kubernetes/values-multi-node-zone.yaml b/helm/ovn-kubernetes/values-multi-node-zone.yaml index d3b1c16755..ae73d2827a 100644 --- a/helm/ovn-kubernetes/values-multi-node-zone.yaml +++ b/helm/ovn-kubernetes/values-multi-node-zone.yaml @@ -116,6 +116,8 @@ global: lFlowCacheLimitKb: "" # -- Configure to use DNSNameResolver feature with ovn-kubernetes enableDNSNameResolver: false + # -- Configure to allow ICMP and ICMPv6 traffic to bypass NetworkPolicy deny rules + allowICMPNetworkPolicy: false # -- Whether to disable SNAT of egress traffic in namespaces annotated with routing-external-gws disableSnatMultipleGws: "" # -- Controls if forwarding is allowed on OVNK controlled interfaces diff --git a/helm/ovn-kubernetes/values-no-ic.yaml b/helm/ovn-kubernetes/values-no-ic.yaml index 1ed4e30a2d..f366632023 100644 --- a/helm/ovn-kubernetes/values-no-ic.yaml +++ b/helm/ovn-kubernetes/values-no-ic.yaml @@ -104,6 +104,8 @@ global: enableLFlowCache: true # -- Configure to use DNSNameResolver feature with ovn-kubernetes enableDNSNameResolver: false + # -- Configure to allow ICMP and ICMPv6 traffic to bypass NetworkPolicy deny rules + allowICMPNetworkPolicy: false # -- Maximum number of logical flow cache entries ovn-controller may create when the logical flow cache is enabled # @default -- unlimited lFlowCacheLimit: "" diff --git a/helm/ovn-kubernetes/values-single-node-zone.yaml b/helm/ovn-kubernetes/values-single-node-zone.yaml index b9f4f2caf2..5b2dcf6976 100644 --- a/helm/ovn-kubernetes/values-single-node-zone.yaml +++ b/helm/ovn-kubernetes/values-single-node-zone.yaml @@ -118,6 +118,8 @@ global: enablePersistentIPs: true # -- Configure to use DNSNameResolver feature with ovn-kubernetes enableDNSNameResolver: false + # -- Configure to allow ICMP and ICMPv6 traffic to bypass NetworkPolicy deny rules + allowICMPNetworkPolicy: false # -- Whether to disable SNAT of egress traffic in namespaces annotated with routing-external-gws disableSnatMultipleGws: "" # -- Controls if forwarding is allowed on OVNK controlled interfaces diff --git a/mkdocs.yml b/mkdocs.yml index 3d09e08387..528753cce6 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -82,10 +82,11 @@ nav: - Host To NodePort Hairpin: design/host-to-node-port-hairpin-trafficflow.md - ExternalIPs/LoadBalancerIngress: design/external-ip-and-loadbalancer-ingress.md - Internal Subnets: design/ovn-kubernetes-subnets.md - - Kubevirt VM Live Migration: design/live-migration.md + - Kubevirt VM Live Migration: features/live-migration.md - Getting Started: - Launching OVN-Kubernetes: installation/launching-ovn-kubernetes-on-kind.md - Launching OVN-Kubernetes Using Helm: installation/launching-ovn-kubernetes-with-helm.md + - Launching OVN-Kubernetes with DPU Acceleration: installation/launching-ovn-kubernetes-with-dpu.md - Configuration Guide: getting-started/configuration.md - CLI Guide: getting-started/cli-guide.md - Deploying Workloads on OVN-Kubernetes cluster: getting-started/example-pod-creation.md diff --git a/openshift/test/generated/zz_generated.annotations.go b/openshift/test/generated/zz_generated.annotations.go index 4929f618f5..1cd08beb94 100644 --- a/openshift/test/generated/zz_generated.annotations.go +++ b/openshift/test/generated/zz_generated.annotations.go @@ -1141,12 +1141,16 @@ var AppendedAnnotations = map[string]string{ "Multicast when multicast enabled for namespace should be able to send multicast UDP traffic between nodes": "[Disabled:Unimplemented]", + "Network Policy: ICMP bypass allows ICMP between pods with default deny policy on the default network": "[Disabled:Unimplemented]", + "Network Segmentation ClusterUserDefinedNetwork CRD Controller pod connected to ClusterUserDefinedNetwork CR & managed NADs cannot be deleted when being used": "[Suite:openshift/conformance/parallel]", "Network Segmentation ClusterUserDefinedNetwork CRD Controller should create NAD according to spec in each target namespace and report active namespaces": "[Suite:openshift/conformance/parallel]", "Network Segmentation ClusterUserDefinedNetwork CRD Controller should create NAD in new created namespaces that apply to namespace-selector": "[Suite:openshift/conformance/parallel]", + "Network Segmentation ClusterUserDefinedNetwork CRD Controller should delete NAD when target namespace is terminating": "[Suite:openshift/conformance/parallel]", + "Network Segmentation ClusterUserDefinedNetwork CRD Controller when CR is deleted, should delete all managed NAD in each target namespace": "[Suite:openshift/conformance/parallel]", "Network Segmentation ClusterUserDefinedNetwork CRD Controller when namespace-selector is mutated should create NAD in namespaces that apply to mutated namespace-selector": "[Suite:openshift/conformance/parallel]", @@ -1337,6 +1341,10 @@ var AppendedAnnotations = map[string]string{ "Network Segmentation: Localnet using ClusterUserDefinedNetwork CR, pods in different namespaces, should communicate over localnet topology": "[Disabled:Unimplemented]", + "Network Segmentation: Network Policies on a user defined primary network ICMP should bypass default deny policy for UDNs when enabled in L2 dualstack primary UDN": "[Suite:openshift/conformance/parallel]", + + "Network Segmentation: Network Policies on a user defined primary network ICMP should bypass default deny policy for UDNs when enabled in L3 dualstack primary UDN": "[Suite:openshift/conformance/parallel]", + "Network Segmentation: Network Policies on a user defined primary network allow ingress traffic to one pod from a particular namespace in L2 primary UDN": "[Disabled:Unimplemented]", "Network Segmentation: Network Policies on a user defined primary network allow ingress traffic to one pod from a particular namespace in L3 primary UDN": "[Disabled:Unimplemented]", @@ -1365,6 +1373,8 @@ var AppendedAnnotations = map[string]string{ "Network Segmentation: Preconfigured Layer2 UDN unmasked reserved / infrastructure subnets are not allowed Layer2 with unmasked IPv6 reserved subnets": "[Suite:openshift/conformance/parallel]", + "Network Segmentation: integration should recover ovnkube pods after restart with primary and secondary UDN resources": "[Suite:openshift/conformance/parallel]", + "Network Segmentation: services on a user defined primary network should be reachable through their cluster IP, node port and load balancer L2 primary UDN with custom network, cluster-networked pods, NodePort service": "[Suite:openshift/conformance/parallel]", "Network Segmentation: services on a user defined primary network should be reachable through their cluster IP, node port and load balancer L2 primary UDN, cluster-networked pods, NodePort service": "[Disabled:Unimplemented]", diff --git a/openshift/test/infraprovider/openshift.go b/openshift/test/infraprovider/openshift.go index ab4bf119a7..cb3920a580 100644 --- a/openshift/test/infraprovider/openshift.go +++ b/openshift/test/infraprovider/openshift.go @@ -153,6 +153,10 @@ func (c *contextOpenshift) GetExternalContainerLogs(container api.ExternalContai panic("not implemented") } +func (o openshift) ListNetworks() ([]string, error) { + panic("not implemented") +} + func (c contextOpenshift) CreateNetwork(name string, subnets ...string) (api.Network, error) { panic("not implemented") } diff --git a/test/conformance/go.mod b/test/conformance/go.mod index c8e5e1c2fa..db85159bc0 100644 --- a/test/conformance/go.mod +++ b/test/conformance/go.mod @@ -9,7 +9,7 @@ require ( k8s.io/apimachinery v0.34.1 k8s.io/client-go v0.34.1 sigs.k8s.io/controller-runtime v0.22.1 - sigs.k8s.io/network-policy-api v0.1.8 + sigs.k8s.io/network-policy-api v0.1.9-0.20260225114943-e80807c44a00 ) require ( diff --git a/test/conformance/go.sum b/test/conformance/go.sum index 32cc425e5f..140d9575cf 100644 --- a/test/conformance/go.sum +++ b/test/conformance/go.sum @@ -182,8 +182,8 @@ sigs.k8s.io/controller-runtime v0.22.1 h1:Ah1T7I+0A7ize291nJZdS1CabF/lB4E++WizgV sigs.k8s.io/controller-runtime v0.22.1/go.mod h1:FwiwRjkRPbiN+zp2QRp7wlTCzbUXxZ/D4OzuQUDwBHY= sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 h1:gBQPwqORJ8d8/YNZWEjoZs7npUVDpVXUUOFfW6CgAqE= sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg= -sigs.k8s.io/network-policy-api v0.1.8 h1:p/VY4aX6LqohGx4sH1X3jdQh6BZ/Gb+8DoQhHKC1fZQ= -sigs.k8s.io/network-policy-api v0.1.8/go.mod h1:QIWX6Th2h0SmCwOwa1+9Urs0W+WDJGL5rujAPUemdkk= +sigs.k8s.io/network-policy-api v0.1.9-0.20260225114943-e80807c44a00 h1:k9sO9mBPtR4hRBiTQbk2hLVdDXBEdM4m5TEjZetT360= +sigs.k8s.io/network-policy-api v0.1.9-0.20260225114943-e80807c44a00/go.mod h1:QIWX6Th2h0SmCwOwa1+9Urs0W+WDJGL5rujAPUemdkk= sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU= sigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY= sigs.k8s.io/structured-merge-diff/v6 v6.3.0 h1:jTijUJbW353oVOd9oTlifJqOGEkUw2jB/fXCbTiQEco= diff --git a/test/e2e/acl_logging.go b/test/e2e/acl_logging.go index c5c129769b..f07a81e3c4 100644 --- a/test/e2e/acl_logging.go +++ b/test/e2e/acl_logging.go @@ -190,18 +190,20 @@ var _ = Describe("ACL Logging for AdminNetworkPolicy and BaselineAdminNetworkPol nsNames [4]string ) BeforeEach(func() { + nsNames[0] = fr.Namespace.Name + suffix := framework.RandomSuffix() + nsNames[1] = fmt.Sprintf("anp-peer-restricted-%s", suffix) + nsNames[2] = fmt.Sprintf("anp-peer-open-%s", suffix) + nsNames[3] = fmt.Sprintf("anp-peer-unknown-%s", suffix) + By("creating an admin network policy") - err := makeAdminNetworkPolicy(anpName, "10", fr.Namespace.Name) + err := makeAdminNetworkPolicy(anpName, "10", fr.Namespace.Name, nsNames[1], nsNames[2], nsNames[3]) Expect(err).NotTo(HaveOccurred()) By("configuring the ACL logging level for the ANP") Expect(setANPACLLogSeverity(anpName, initialDenyACLSeverity, initialAllowACLSeverity, initialPassACLSeverity)).To(Succeed()) By("creating peer namespaces that are selected by the admin network policy") - nsNames[0] = fr.Namespace.Name - nsNames[1] = "anp-peer-restricted" - nsNames[2] = "anp-peer-open" - nsNames[3] = "anp-peer-unknown" for _, ns := range nsNames[1:] { _, err = e2ekubectl.RunKubectl("default", "create", "ns", ns) Expect(err).NotTo(HaveOccurred()) @@ -309,7 +311,7 @@ var _ = Describe("ACL Logging for AdminNetworkPolicy and BaselineAdminNetworkPol }, maxPokeRetries*pokeInterval, pokeInterval).Should(BeTrue()) By("creating a baseline admin network policy") - err = makeBaselineAdminNetworkPolicy(fr.Namespace.Name) + err = makeBaselineAdminNetworkPolicy(fr.Namespace.Name, nsNames[1], nsNames[3]) Expect(err).NotTo(HaveOccurred()) By("configuring the ACL logging level for the BANP") @@ -956,7 +958,7 @@ func makeDenyAllPolicy(f *framework.Framework, ns string, policyName string) (*k return f.ClientSet.NetworkingV1().NetworkPolicies(ns).Create(context.TODO(), policy, metav1.CreateOptions{}) } -func makeAdminNetworkPolicy(anpName, priority, anpSubjectNS string) error { +func makeAdminNetworkPolicy(anpName, priority, anpSubjectNS, restrictedPeerNS, openPeerNS, unknownPeerNS string) error { anpYaml := "anp.yaml" var anpConfig = fmt.Sprintf(`apiVersion: policy.networking.k8s.io/v1alpha1 kind: AdminNetworkPolicy @@ -974,20 +976,20 @@ spec: to: - namespaces: matchLabels: - kubernetes.io/metadata.name: anp-peer-restricted + kubernetes.io/metadata.name: %s - name: "deny-to-open" action: "Deny" to: - namespaces: matchLabels: - kubernetes.io/metadata.name: anp-peer-open + kubernetes.io/metadata.name: %s - name: "pass-to-unknown" action: "Pass" to: - namespaces: matchLabels: - kubernetes.io/metadata.name: anp-peer-unknown -`, anpName, priority, anpSubjectNS) + kubernetes.io/metadata.name: %s +`, anpName, priority, anpSubjectNS, restrictedPeerNS, openPeerNS, unknownPeerNS) if err := os.WriteFile(anpYaml, []byte(anpConfig), 0644); err != nil { framework.Failf("Unable to write CRD config to disk: %v", err) @@ -1003,7 +1005,7 @@ spec: return err } -func makeBaselineAdminNetworkPolicy(banpSubjectNS string) error { +func makeBaselineAdminNetworkPolicy(banpSubjectNS, restrictedPeerNS, unknownPeerNS string) error { banpYaml := "banp.yaml" var banpConfig = fmt.Sprintf(`apiVersion: policy.networking.k8s.io/v1alpha1 kind: BaselineAdminNetworkPolicy @@ -1020,14 +1022,14 @@ spec: to: - namespaces: matchLabels: - kubernetes.io/metadata.name: anp-peer-restricted + kubernetes.io/metadata.name: %s - name: "deny-to-unknown" action: "Deny" to: - namespaces: matchLabels: - kubernetes.io/metadata.name: anp-peer-unknown -`, banpSubjectNS) + kubernetes.io/metadata.name: %s +`, banpSubjectNS, restrictedPeerNS, unknownPeerNS) if err := os.WriteFile(banpYaml, []byte(banpConfig), 0644); err != nil { framework.Failf("Unable to write CRD config to disk: %v", err) diff --git a/test/e2e/egressip.go b/test/e2e/egressip.go index 6e7d75f147..b795e73bef 100644 --- a/test/e2e/egressip.go +++ b/test/e2e/egressip.go @@ -2160,35 +2160,24 @@ spec: providerPrimaryNetwork, err := infraprovider.Get().PrimaryNetwork() framework.ExpectNoError(err, "failed to get providers primary network") externalContainerPrimary := infraapi.ExternalContainer{Name: "external-container-for-egressip-mtu-test", Image: images.AgnHost(), - Network: providerPrimaryNetwork, CmdArgs: []string{"pause"}, ExtPort: externalContainerPrimaryPort} + Network: providerPrimaryNetwork, RuntimeArgs: []string{"--sysctl", "net.ipv4.ip_no_pmtu_disc=2"}, + CmdArgs: []string{"netexec", httpPort, udpPort}, ExtPort: externalContainerPrimaryPort} externalContainerPrimary, err = providerCtx.CreateExternalContainer(externalContainerPrimary) framework.ExpectNoError(err, "failed to create external container: %s", externalContainerPrimary.String()) - // First disable PMTUD - _, err = infraprovider.Get().ExecExternalContainerCommand(externalContainerPrimary, []string{"sysctl", "-w", "net.ipv4.ip_no_pmtu_disc=2"}) - framework.ExpectNoError(err, "disabling PMTUD in the external kind container failed: %v", err) - providerCtx.AddCleanUpFn(func() error { - _, err = infraprovider.Get().ExecExternalContainerCommand(externalContainerPrimary, []string{"sysctl", "-w", "net.ipv4.ip_no_pmtu_disc=0"}) - return err - }) - - go func() { - _, _ = infraprovider.Get().ExecExternalContainerCommand(externalContainerPrimary, []string{"/agnhost", "netexec", httpPort, udpPort}) - }() - ginkgo.By("Checking connectivity to the external kind container and verify that the source IP is the egress IP") var curlErr error - _ = wait.PollUntilContextTimeout( + err = wait.PollUntilContextTimeout( context.Background(), retryInterval, retryTimeout, true, func(ctx context.Context) (bool, error) { - curlErr := curlAgnHostClientIPFromPod(podNamespace.Name, pod1Name, egressIP1.String(), externalContainerPrimary.GetIPv4(), externalContainerPrimary.GetPortStr()) + curlErr = curlAgnHostClientIPFromPod(podNamespace.Name, pod1Name, egressIP1.String(), externalContainerPrimary.GetIPv4(), externalContainerPrimary.GetPortStr()) return curlErr == nil, nil }, ) - framework.ExpectNoError(curlErr, "connectivity check to the external kind container failed: %v", curlErr) + framework.ExpectNoError(err, "connectivity check to the external kind container failed: %v", curlErr) // We will ask the server to reply with a UDP packet bigger than the pod // network MTU. Since PMTUD has been disabled on the server, the reply diff --git a/test/e2e/infraprovider/api/api.go b/test/e2e/infraprovider/api/api.go index 2a38ef6595..99030444c2 100644 --- a/test/e2e/infraprovider/api/api.go +++ b/test/e2e/infraprovider/api/api.go @@ -18,6 +18,8 @@ type Provider interface { // PrimaryNetwork returns OVN-Kubernetes primary infrastructure network information PrimaryNetwork() (Network, error) + // ListNetworks returns the names of all networks + ListNetworks() ([]string, error) // GetNetwork returns a network GetNetwork(name string) (Network, error) // GetExternalContainerNetworkInterface fetches network interface information from the external container attached to a specific network diff --git a/test/e2e/infraprovider/providers/kind/kind.go b/test/e2e/infraprovider/providers/kind/kind.go index 532a45fe5d..31c28a04c5 100644 --- a/test/e2e/infraprovider/providers/kind/kind.go +++ b/test/e2e/infraprovider/providers/kind/kind.go @@ -68,6 +68,10 @@ func (k *kind) GetNetwork(name string) (api.Network, error) { return getNetwork(name) } +func (k *kind) ListNetworks() ([]string, error) { + return listNetworks() +} + func (k *kind) GetExternalContainerNetworkInterface(container api.ExternalContainer, network api.Network) (api.NetworkInterface, error) { return getNetworkInterface(container.Name, network.Name()) } @@ -607,6 +611,8 @@ const ( inspectNetworkMACKeyStr = "{{ with index .NetworkSettings.Networks %q }}{{ .MacAddress }}{{ end }}" inspectNetworkContainersKeyStr = "{{ range $key, $value := .Containers }}{{ printf \"%s\\n\" $value.Name}}{{ end }}'" emptyValue = "" + // Docker 29+ returns "invalid IP" for IP fields + emptyIPValue = "invalid IP" ) func isNetworkAttachedToContainer(networkName, containerName string) bool { @@ -627,13 +633,27 @@ func doesContainerNameExist(name string) (bool, error) { return state != "", nil } +func listNetworks() ([]string, error) { + output, err := exec.Command(containerengine.Get().String(), "network", "ls", "--format", nameFormat).CombinedOutput() + if err != nil { + return nil, fmt.Errorf("failed to list networks: %w", err) + } + var networks []string + for _, name := range strings.Split(strings.TrimSpace(string(output)), "\n") { + if name != "" { + networks = append(networks, name) + } + } + return networks, nil +} + func doesNetworkExist(networkName string) (bool, error) { - dataBytes, err := exec.Command(containerengine.Get().String(), "network", "ls", "--format", nameFormat).CombinedOutput() + networks, err := listNetworks() if err != nil { - return false, fmt.Errorf("failed to list networks: %w", err) + return false, err } - for _, existingNetworkName := range strings.Split(strings.Trim(string(dataBytes), "\n"), "\n") { - if existingNetworkName == networkName { + for _, name := range networks { + if name == networkName { return true, nil } } @@ -715,7 +735,7 @@ func getNetworkInterface(containerName, networkName string) (api.NetworkInterfac } valueStr := strings.Trim(string(value), "\n") valueStr = strings.Trim(valueStr, "'") - if valueStr == emptyValue { + if valueStr == emptyValue || valueStr == emptyIPValue { return "", nil } return valueStr, nil diff --git a/test/e2e/network_policy_icmp.go b/test/e2e/network_policy_icmp.go new file mode 100644 index 0000000000..510394a2fe --- /dev/null +++ b/test/e2e/network_policy_icmp.go @@ -0,0 +1,71 @@ +package e2e + +import ( + "context" + "time" + + "github.com/onsi/ginkgo/v2" + "github.com/onsi/gomega" + "github.com/ovn-org/ovn-kubernetes/test/e2e/feature" + + "k8s.io/kubernetes/test/e2e/framework" + e2enode "k8s.io/kubernetes/test/e2e/framework/node" +) + +var _ = ginkgo.Describe("Network Policy: ICMP bypass", feature.NetworkPolicy, func() { + f := wrappedTestFramework("network-policy-icmp") + + ginkgo.BeforeEach(func() { + if !isICMPNetworkPolicyBypassEnabled() { + ginkgo.Skip("Allow ICMP bypass with NetworkPolicy is not enabled, skipping ICMP bypass network policy tests") + } + }) + + ginkgo.It("allows ICMP between pods with default deny policy on the default network", func() { + namespace := f.Namespace.Name + + ginkgo.By("creating a \"default deny\" network policy") + _, err := makeDenyAllPolicy(f, namespace, "deny-all") + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + ginkgo.By("creating server and client pods") + serverPodName := "icmp-server" + clientPodName := "icmp-client" + serverCmd := []string{"/bin/bash", "-c", "/agnhost netexec --http-port 8000"} + clientCmd := []string{"/agnhost", "pause"} + + nodes, err := e2enode.GetBoundedReadySchedulableNodes(context.TODO(), f.ClientSet, 2) + framework.ExpectNoError(err, "") + if len(nodes.Items) < 2 { + ginkgo.Skip("requires at least 2 Nodes") + } + serverNode := nodes.Items[0].Name + clientNode := nodes.Items[1].Name + + serverPod, err := createGenericPod(f, serverPodName, serverNode, namespace, serverCmd) + framework.ExpectNoError(err, "failed to create server pod") + _, err = createGenericPod(f, clientPodName, clientNode, namespace, clientCmd) + framework.ExpectNoError(err, "failed to create client pod") + + clientConfig := podConfiguration{name: clientPodName, namespace: namespace} + serverConfig := podConfiguration{name: serverPodName, namespace: namespace} + + ginkgo.By("verifying TCP is denied by the default deny policy") + gomega.Eventually(func() error { + return pokePod(f, clientPodName, serverPod.Status.PodIP) + }, 1*time.Minute, 6*time.Second).ShouldNot(gomega.Succeed()) + gomega.Consistently(func() error { + return pokePod(f, clientPodName, serverPod.Status.PodIP) + }, 15*time.Second, 5*time.Second).ShouldNot(gomega.Succeed()) + + ginkgo.By("verifying ICMP is allowed between pods") + serverIPs, err := podIPsFromStatus(f.ClientSet, namespace, serverPodName) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + for _, serverIP := range serverIPs { + gomega.Eventually(func() error { + return pingServerPodFromClient(f.ClientSet, serverConfig, clientConfig, serverIP) + }, 1*time.Minute, 6*time.Second).Should(gomega.Succeed()) + } + }) +}) diff --git a/test/e2e/network_segmentation.go b/test/e2e/network_segmentation.go index 0aefc2236c..6917ab44f5 100644 --- a/test/e2e/network_segmentation.go +++ b/test/e2e/network_segmentation.go @@ -47,6 +47,7 @@ import ( const openDefaultPortsAnnotation = "k8s.ovn.org/open-default-ports" const RequiredUDNNamespaceLabel = "k8s.ovn.org/primary-user-defined-network" const OvnPodAnnotationName = "k8s.ovn.org/pod-networks" +const expectedUDNCNIVersion = "1.1.0" var _ = Describe("Network Segmentation", feature.NetworkSegmentation, func() { f := wrappedTestFramework("network-segmentation") @@ -1317,6 +1318,40 @@ spec: } }) + It("should delete NAD when target namespace is terminating", func() { + testTerminatingNs := f.Namespace.Name + "terminating" + + By("add new target namespace to CR namespace-selector") + patch := fmt.Sprintf(`[{"op": "add", "path": "./spec/namespaceSelector/matchExpressions/0/values/-", "value": "%s"}]`, testTerminatingNs) + _, err := e2ekubectl.RunKubectl("", "patch", clusterUserDefinedNetworkResource, testClusterUdnName, "--type=json", "-p="+patch) + Expect(err).NotTo(HaveOccurred()) + + By("create the target namespace") + _, err = cs.CoreV1().Namespaces().Create(context.Background(), &v1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: testTerminatingNs, + Labels: map[string]string{RequiredUDNNamespaceLabel: ""}, + }}, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + By("verify NAD is created in the namespace") + Eventually(func() error { + _, err := nadClient.NetworkAttachmentDefinitions(testTerminatingNs).Get(context.Background(), testClusterUdnName, metav1.GetOptions{}) + return err + }, time.Second*15, time.Second*1).Should(Succeed(), "NAD should be created in target namespace") + + By("delete the namespace to trigger termination") + err = cs.CoreV1().Namespaces().Delete(context.Background(), testTerminatingNs, metav1.DeleteOptions{}) + Expect(err).NotTo(HaveOccurred()) + + By("verify NAD is deleted from the terminating namespace") + Eventually(func() bool { + _, err := nadClient.NetworkAttachmentDefinitions(testTerminatingNs).Get(context.Background(), testClusterUdnName, metav1.GetOptions{}) + return err != nil && kerrors.IsNotFound(err) + }, time.Second*30, time.Second*1).Should(BeTrue(), + "NAD should be deleted when namespace is terminating") + }) + It("should create NAD in new created namespaces that apply to namespace-selector", func() { testNewNs := f.Namespace.Name + "green" @@ -2093,7 +2128,7 @@ func assertL2SecondaryNetAttachDefManifest(nadClient nadclient.K8sCniCncfIoV1Int expectedNetworkName := namespace + "_" + udnName expectedNadName := namespace + "/" + udnName ExpectWithOffset(1, nad.Spec.Config).To(MatchJSON(`{ - "cniVersion":"1.0.0", + "cniVersion":"` + expectedUDNCNIVersion + `", "type": "ovn-k8s-cni-overlay", "name": "` + expectedNetworkName + `", "netAttachDefName": "` + expectedNadName + `", @@ -2158,7 +2193,7 @@ func assertClusterNADManifest(nadClient nadclient.K8sCniCncfIoV1Interface, names expectedNetworkName := "cluster_udn_" + udnName expectedNadName := namespace + "/" + udnName ExpectWithOffset(1, nad.Spec.Config).To(MatchJSON(`{ - "cniVersion":"1.0.0", + "cniVersion":"` + expectedUDNCNIVersion + `", "type": "ovn-k8s-cni-overlay", "name": "` + expectedNetworkName + `", "netAttachDefName": "` + expectedNadName + `", diff --git a/test/e2e/network_segmentation_default_network_annotation.go b/test/e2e/network_segmentation_default_network_annotation.go index 4e42658588..fce9005126 100644 --- a/test/e2e/network_segmentation_default_network_annotation.go +++ b/test/e2e/network_segmentation_default_network_annotation.go @@ -177,11 +177,6 @@ var _ = Describe("Network Segmentation: Default network multus annotation", feat Expect(err).NotTo(HaveOccurred(), "Should create UserDefinedNetwork") Eventually(userDefinedNetworkReadyFunc(f.DynamicClient, udn.Namespace, udn.Name), 5*time.Second, time.Second).Should(Succeed()) - By("Creating a pod without the default-network annotation") - podWithoutAnnotation := e2epod.NewAgnhostPod(f.Namespace.Name, "pod-without-annotation", nil, nil, nil) - podWithoutAnnotation.Spec.Containers[0].Command = []string{"sleep", "infinity"} - podWithoutAnnotation = e2epod.NewPodClient(f).CreateSync(context.TODO(), podWithoutAnnotation) - By("Creating a pod with the default-network annotation") nse := []nadapi.NetworkSelectionElement{{ @@ -200,6 +195,11 @@ var _ = Describe("Network Segmentation: Default network multus annotation", feat podWithAnnotation.Spec.Containers[0].Command = []string{"sleep", "infinity"} podWithAnnotation = e2epod.NewPodClient(f).CreateSync(context.TODO(), podWithAnnotation) + By("Creating a pod without the default-network annotation") + podWithoutAnnotation := e2epod.NewAgnhostPod(f.Namespace.Name, "pod-without-annotation", nil, nil, nil) + podWithoutAnnotation.Spec.Containers[0].Command = []string{"sleep", "infinity"} + podWithoutAnnotation = e2epod.NewPodClient(f).CreateSync(context.TODO(), podWithoutAnnotation) + By("Attempting to add the default-network annotation to the pod without annotation") podWithoutAnnotation.Annotations = map[string]string{ "v1.multus-cni.io/default-network": string(marshalledNSE), diff --git a/test/e2e/network_segmentation_integration.go b/test/e2e/network_segmentation_integration.go new file mode 100644 index 0000000000..f2563980bb --- /dev/null +++ b/test/e2e/network_segmentation_integration.go @@ -0,0 +1,304 @@ +package e2e + +import ( + "context" + "encoding/json" + "fmt" + "strings" + "time" + + mnpapi "github.com/k8snetworkplumbingwg/multi-networkpolicy/pkg/apis/k8s.cni.cncf.io/v1beta1" + mnpclient "github.com/k8snetworkplumbingwg/multi-networkpolicy/pkg/client/clientset/versioned/typed/k8s.cni.cncf.io/v1beta1" + nadapi "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "github.com/ovn-org/ovn-kubernetes/test/e2e/deploymentconfig" + "github.com/ovn-org/ovn-kubernetes/test/e2e/feature" + "github.com/ovn-org/ovn-kubernetes/test/e2e/ipalloc" + + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/apimachinery/pkg/util/wait" + clientset "k8s.io/client-go/kubernetes" + "k8s.io/kubernetes/test/e2e/framework" + e2ekubectl "k8s.io/kubernetes/test/e2e/framework/kubectl" + e2enode "k8s.io/kubernetes/test/e2e/framework/node" +) + +var _ = Describe("Network Segmentation: integration", feature.NetworkSegmentation, func() { + f := wrappedTestFramework("network-segmentation-integration") + f.SkipNamespaceCreation = true + + var cs clientset.Interface + + BeforeEach(func() { + cs = f.ClientSet + namespace, err := f.CreateNamespace(context.TODO(), f.BaseName, map[string]string{ + "e2e-framework": f.BaseName, + RequiredUDNNamespaceLabel: "", + }) + f.Namespace = namespace + Expect(err).NotTo(HaveOccurred()) + }) + + It("should recover ovnkube pods after restart with primary and secondary UDN resources", func() { + const ( + primaryUDNName = "primary-udn" + secondaryUDNName = "secondary-udn" + egressIPName = "udn-egressip" + udnPodName = "udn-egress-pod" + udnServiceName = "udn-service" + serviceTargetPort = 80 + nodeHostnameKey = "kubernetes.io/hostname" + egressPodLabelKey = "udn-egress-pod" + egressPodLabelVal = "enabled" + egressNSLabelKey = "udn-egress-namespace" + egressNSLabelValue = "enabled" + ) + DeferCleanup(func() { + e2ekubectl.RunKubectlOrDie("", "delete", "eip", egressIPName, "--ignore-not-found=true") + }) + + primaryNamespace := f.Namespace.Name + + By("creating a primary UDN and waiting until it is ready") + cleanupPrimaryUDN, err := createManifest(primaryNamespace, newPrimaryUserDefinedNetworkManifest(cs, primaryUDNName)) + Expect(err).NotTo(HaveOccurred()) + defer cleanupPrimaryUDN() + Eventually(userDefinedNetworkReadyFunc(f.DynamicClient, primaryNamespace, primaryUDNName), 30*time.Second, time.Second).Should(Succeed()) + + By("creating a secondary UDN and waiting until it is ready") + cleanupSecondaryUDN, err := createManifest(primaryNamespace, newL2SecondaryUDNManifest(secondaryUDNName)) + Expect(err).NotTo(HaveOccurred()) + defer cleanupSecondaryUDN() + Eventually(userDefinedNetworkReadyFunc(f.DynamicClient, primaryNamespace, secondaryUDNName), 30*time.Second, time.Second).Should(Succeed()) + + By("labeling the primary namespace so it matches the EgressIP namespace selector") + primaryNSObj, err := cs.CoreV1().Namespaces().Get(context.Background(), primaryNamespace, metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + if primaryNSObj.Labels == nil { + primaryNSObj.Labels = map[string]string{} + } + primaryNSObj.Labels[egressNSLabelKey] = egressNSLabelValue + _, err = cs.CoreV1().Namespaces().Update(context.Background(), primaryNSObj, metav1.UpdateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + By("selecting one schedulable node for both pod placement and EgressIP assignment") + nodes, err := e2enode.GetBoundedReadySchedulableNodes(context.TODO(), cs, 1) + Expect(err).NotTo(HaveOccurred()) + Expect(nodes.Items).NotTo(BeEmpty()) + targetNode := nodes.Items[0].Name + + By(fmt.Sprintf("labeling node %s as egress assignable", targetNode)) + labelNodeForEgress(f, targetNode) + DeferCleanup(func() { + e2ekubectl.RunKubectlOrDie("default", "label", "node", targetNode, "k8s.ovn.org/egress-assignable-") + }) + + By("creating an EgressIP object selected by the primary UDN namespace and pod label") + var egressIP string + if isIPv4Supported(cs) { + egressIPv4, allocErr := ipalloc.NewPrimaryIPv4() + Expect(allocErr).NotTo(HaveOccurred()) + egressIP = egressIPv4.String() + } else { + egressIPv6, allocErr := ipalloc.NewPrimaryIPv6() + Expect(allocErr).NotTo(HaveOccurred()) + egressIP = egressIPv6.String() + } + cleanupEIP, err := createManifest("", createEIPManifest( + egressIPName, + map[string]string{egressPodLabelKey: egressPodLabelVal}, + map[string]string{egressNSLabelKey: egressNSLabelValue}, + egressIP, + )) + Expect(err).NotTo(HaveOccurred()) + defer cleanupEIP() + + By("creating a pod, service and network policy in the primary UDN namespace") + udnPodCfg := *podConfig( + udnPodName, + withCommand(func() []string { + return httpServerContainerCmd(serviceTargetPort) + }), + withLabels(map[string]string{egressPodLabelKey: egressPodLabelVal}), + withNodeSelector(map[string]string{nodeHostnameKey: targetNode}), + withNetworkAttachment([]nadapi.NetworkSelectionElement{ + {Name: secondaryUDNName}, + }), + ) + udnPodCfg.namespace = primaryNamespace + udnPod := runUDNPod(cs, primaryNamespace, udnPodCfg, nil) + Expect(udnPod).NotTo(BeNil()) + var secondaryAttachmentStatus []nadapi.NetworkStatus + Eventually(func() ([]nadapi.NetworkStatus, error) { + udnPod, err = cs.CoreV1().Pods(primaryNamespace).Get(context.Background(), udnPod.Name, metav1.GetOptions{}) + if err != nil { + return nil, err + } + secondaryAttachmentStatus, err = podNetworkStatus(udnPod, func(status nadapi.NetworkStatus) bool { + return status.Name == namespacedName(primaryNamespace, secondaryUDNName) + }) + return secondaryAttachmentStatus, err + }, 30*time.Second, time.Second).Should(HaveLen(1)) + + By("ensuring EgressIP is assigned to the same node as the pod") + Expect(waitForEgressIPAssignedNode(egressIPName, targetNode)).To(Succeed()) + + By("creating a multi network policy for the secondary UDN") + mnpCli, err := mnpclient.NewForConfig(f.ClientConfig()) + Expect(err).NotTo(HaveOccurred()) + const secondaryUDNMNPName = "secondary-udn-default-deny" + secondaryUDNMNP := &mnpapi.MultiNetworkPolicy{ + ObjectMeta: metav1.ObjectMeta{ + Name: secondaryUDNMNPName, + Annotations: map[string]string{ + PolicyForAnnotation: secondaryUDNName, + }, + }, + Spec: mnpapi.MultiNetworkPolicySpec{ + PodSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{egressPodLabelKey: egressPodLabelVal}, + }, + PolicyTypes: []mnpapi.MultiPolicyType{ + mnpapi.PolicyTypeIngress, + mnpapi.PolicyTypeEgress, + }, + }, + } + _, err = mnpCli.MultiNetworkPolicies(primaryNamespace).Create(context.Background(), secondaryUDNMNP, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + DeferCleanup(func() { + _ = mnpCli.MultiNetworkPolicies(primaryNamespace).Delete(context.Background(), secondaryUDNMNPName, metav1.DeleteOptions{}) + }) + + _, err = cs.CoreV1().Services(primaryNamespace).Create(context.Background(), &v1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: udnServiceName, + }, + Spec: v1.ServiceSpec{ + Selector: map[string]string{egressPodLabelKey: egressPodLabelVal}, + Ports: []v1.ServicePort{ + { + Name: "http", + Port: serviceTargetPort, + Protocol: v1.ProtocolTCP, + TargetPort: intstr.FromInt(serviceTargetPort), + }, + }, + }, + }, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + _, err = makeDenyAllPolicy(f, primaryNamespace, "deny-all") + Expect(err).NotTo(HaveOccurred()) + + By("restarting each ovnkube pod and ensuring all pods recover without crash loops") + Expect(restartAllOVNKubePodsAndAssertHealthy(f)).To(Succeed()) + }) +}) + +func restartAllOVNKubePodsAndAssertHealthy(f *framework.Framework) error { + ovnNamespace := deploymentconfig.Get().OVNKubernetesNamespace() + pods, err := f.ClientSet.CoreV1().Pods(ovnNamespace).List(context.Background(), metav1.ListOptions{}) + if err != nil { + return fmt.Errorf("failed to list ovnkube pods in namespace %s: %w", ovnNamespace, err) + } + + restartedPods := 0 + for i := range pods.Items { + pod := pods.Items[i] + if !strings.HasPrefix(pod.Name, "ovnkube-") || pod.Status.Phase != v1.PodRunning { + continue + } + restartedPods++ + framework.Logf("restarting ovnkube pod %s/%s", pod.Namespace, pod.Name) + if err := deletePodWithWait(context.Background(), f.ClientSet, &pod); err != nil { + return fmt.Errorf("failed restarting ovnkube pod %s/%s: %w", pod.Namespace, pod.Name, err) + } + } + if restartedPods == 0 { + return fmt.Errorf("no running ovnkube pods found in namespace %s", ovnNamespace) + } + + if err := waitOVNKubernetesHealthy(f); err != nil { + return fmt.Errorf("ovn-kubernetes did not become healthy after restarting %d pods: %w", restartedPods, err) + } + + return wait.PollImmediate(2*time.Second, 2*time.Minute, func() (bool, error) { + if err := assertOVNKubePodsReadyAndNotCrashLooping(f.ClientSet, ovnNamespace); err != nil { + framework.Logf("ovnkube pod readiness/crashloop check still failing: %v", err) + return false, nil + } + return true, nil + }) +} + +func assertOVNKubePodsReadyAndNotCrashLooping(cs clientset.Interface, namespace string) error { + pods, err := cs.CoreV1().Pods(namespace).List(context.Background(), metav1.ListOptions{}) + if err != nil { + return fmt.Errorf("failed listing ovnkube pods: %w", err) + } + + found := 0 + for _, pod := range pods.Items { + if !strings.HasPrefix(pod.Name, "ovnkube-") { + continue + } + found++ + if pod.Status.Phase != v1.PodRunning { + return fmt.Errorf("pod %s is not running (phase=%s)", pod.Name, pod.Status.Phase) + } + + ready := false + for _, condition := range pod.Status.Conditions { + if condition.Type == v1.PodReady && condition.Status == v1.ConditionTrue { + ready = true + break + } + } + if !ready { + return fmt.Errorf("pod %s is not ready", pod.Name) + } + + for _, status := range append(pod.Status.InitContainerStatuses, pod.Status.ContainerStatuses...) { + if status.State.Waiting != nil && status.State.Waiting.Reason == "CrashLoopBackOff" { + return fmt.Errorf("pod %s container %s is in CrashLoopBackOff", pod.Name, status.Name) + } + } + } + + if found == 0 { + return fmt.Errorf("no ovnkube pods found in namespace %s", namespace) + } + return nil +} + +func waitForEgressIPAssignedNode(egressIPName, nodeName string) error { + return wait.PollImmediate(2*time.Second, 2*time.Minute, func() (bool, error) { + egressIPStdout, err := e2ekubectl.RunKubectl("", "get", "eip", egressIPName, "-o", "json") + if err != nil { + framework.Logf("failed to fetch EgressIP %s status: %v", egressIPName, err) + return false, nil + } + + var eip egressIP + if err := json.Unmarshal([]byte(egressIPStdout), &eip); err != nil { + return false, fmt.Errorf("failed to unmarshal EgressIP %s status: %w", egressIPName, err) + } + + if len(eip.Status.Items) == 0 { + framework.Logf("EgressIP %s has no status items yet", egressIPName) + return false, nil + } + + for _, status := range eip.Status.Items { + if status.Node == nodeName { + return true, nil + } + } + framework.Logf("EgressIP %s not assigned to node %s yet (statuses: %+v)", egressIPName, nodeName, eip.Status.Items) + return false, nil + }) +} diff --git a/test/e2e/network_segmentation_policy.go b/test/e2e/network_segmentation_policy.go index 44f47598b9..0fc8216c59 100644 --- a/test/e2e/network_segmentation_policy.go +++ b/test/e2e/network_segmentation_policy.go @@ -207,6 +207,123 @@ var _ = ginkgo.Describe("Network Segmentation: Network Policies", feature.Networ ), ) + ginkgo.DescribeTable( + "ICMP should bypass default deny policy for UDNs when enabled", + func( + netConfigParams networkAttachmentConfigParams, + clientPodConfig podConfiguration, + serverPodConfig podConfiguration, + ) { + if !isICMPNetworkPolicyBypassEnabled() { + ginkgo.Skip("ICMP Network Policy bypass is not enabled, skipping ICMP bypass network policy tests") + } + + ginkgo.By("Creating the attachment configuration") + netConfig := newNetworkAttachmentConfig(netConfigParams) + netConfig.namespace = f.Namespace.Name + netConfig.cidr = filterCIDRsAndJoin(cs, netConfig.cidr) + _, err := nadClient.NetworkAttachmentDefinitions(f.Namespace.Name).Create( + context.Background(), + generateNAD(netConfig, f.ClientSet), + metav1.CreateOptions{}, + ) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + ginkgo.By("creating client/server pods") + serverPodConfig.namespace = f.Namespace.Name + clientPodConfig.namespace = f.Namespace.Name + nodes, err := e2enode.GetBoundedReadySchedulableNodes(context.TODO(), cs, 2) + framework.ExpectNoError(err, "") + if len(nodes.Items) < 2 { + ginkgo.Skip("requires at least 2 Nodes") + } + serverPodConfig.nodeSelector = map[string]string{nodeHostnameKey: nodes.Items[0].GetName()} + clientPodConfig.nodeSelector = map[string]string{nodeHostnameKey: nodes.Items[1].GetName()} + runUDNPod(cs, f.Namespace.Name, serverPodConfig, nil) + runUDNPod(cs, f.Namespace.Name, clientPodConfig, nil) + + ginkgo.By("creating a \"default deny\" network policy") + _, err = makeDenyAllPolicy(f, f.Namespace.Name, "deny-all") + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + var serverIPs []string + for i, cidr := range strings.Split(netConfig.cidr, ",") { + if cidr == "" { + continue + } + serverIP, err := getPodAnnotationIPsForAttachmentByIndex( + cs, + f.Namespace.Name, + serverPodConfig.name, + namespacedName(f.Namespace.Name, netConfig.name), + i, + ) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + serverIPs = append(serverIPs, serverIP) + } + gomega.Expect(serverIPs).NotTo(gomega.BeEmpty()) + + ginkgo.By("asserting the *client* pod can ping the server pod despite the default deny policy") + for _, serverIP := range serverIPs { + gomega.Eventually(func() error { + return pingServerPodFromClient(cs, serverPodConfig, clientPodConfig, serverIP) + }, 1*time.Minute, 6*time.Second).Should(gomega.Succeed()) + } + + ginkgo.By("asserting the *client* pod can not reach the server pod HTTP endpoint due to default deny policy") + for _, serverIP := range serverIPs { + gomega.Eventually(func() error { + return reachServerPodFromClient(cs, serverPodConfig, clientPodConfig, serverIP, port) + }, 1*time.Minute, 6*time.Second).ShouldNot(gomega.Succeed()) + gomega.Consistently(func() error { + return reachServerPodFromClient(cs, serverPodConfig, clientPodConfig, serverIP, port) + }, 15*time.Second, 5*time.Second).ShouldNot(gomega.Succeed()) + } + }, + ginkgo.Entry( + "in L2 dualstack primary UDN", + networkAttachmentConfigParams{ + name: nadName, + topology: "layer2", + cidr: joinStrings(userDefinedNetworkIPv4Subnet, userDefinedNetworkIPv6Subnet), + role: "primary", + }, + *podConfig( + "client-pod", + withCommand(func() []string { + return []string{"/agnhost", "pause"} + }), + ), + *podConfig( + "server-pod", + withCommand(func() []string { + return httpServerContainerCmd(port) + }), + ), + ), + ginkgo.Entry( + "in L3 dualstack primary UDN", + networkAttachmentConfigParams{ + name: nadName, + topology: "layer3", + cidr: joinStrings(userDefinedNetworkIPv4Subnet, userDefinedNetworkIPv6Subnet), + role: "primary", + }, + *podConfig( + "client-pod", + withCommand(func() []string { + return []string{"/agnhost", "pause"} + }), + ), + *podConfig( + "server-pod", + withCommand(func() []string { + return httpServerContainerCmd(port) + }), + ), + ), + ) + ginkgo.DescribeTable( "allow ingress traffic to one pod from a particular namespace", func( diff --git a/test/e2e/route_advertisements.go b/test/e2e/route_advertisements.go index c9335c2a95..11826272e8 100644 --- a/test/e2e/route_advertisements.go +++ b/test/e2e/route_advertisements.go @@ -898,814 +898,860 @@ var _ = ginkgo.DescribeTableSubtree("BGP: isolation between advertised networks" var cudnA, cudnB *udnv1.ClusterUserDefinedNetwork var ra *rav1.RouteAdvertisements var hostNetworkPort int - ginkgo.BeforeEach(func() { - ginkgo.By("Configuring primary UDN namespaces") - var err error - udnNamespaceA, err = f.CreateNamespace(context.TODO(), f.BaseName, map[string]string{ - "e2e-framework": f.BaseName, - RequiredUDNNamespaceLabel: "", - }) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - f.Namespace = udnNamespaceA - udnNamespaceB, err = f.CreateNamespace(context.TODO(), f.BaseName, map[string]string{ - "e2e-framework": f.BaseName, - RequiredUDNNamespaceLabel: "", - }) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + ginkgo.Context("", ginkgo.Ordered, ginkgo.ContinueOnFailure, func() { + ginkgo.BeforeAll(func() { + ginkgo.By("Configuring primary UDN namespaces") + var err error + // Create namespaces directly via the API instead of f.CreateNamespace() + // to avoid framework cleaning them up in AfterEach + udnNamespaceA, err = f.ClientSet.CoreV1().Namespaces().Create(context.TODO(), &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: f.BaseName + "-", + Labels: map[string]string{ + "e2e-framework": f.BaseName, + RequiredUDNNamespaceLabel: "", + }, + }, + }, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + f.Namespace = udnNamespaceA + udnNamespaceB, err = f.ClientSet.CoreV1().Namespaces().Create(context.TODO(), &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: f.BaseName + "-", + Labels: map[string]string{ + "e2e-framework": f.BaseName, + RequiredUDNNamespaceLabel: "", + }, + }, + }, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - ginkgo.By("Configuring networks") - cudnATemplate.Spec.NamespaceSelector = metav1.LabelSelector{MatchExpressions: []metav1.LabelSelectorRequirement{{ - Key: "kubernetes.io/metadata.name", - Operator: metav1.LabelSelectorOpIn, - Values: []string{udnNamespaceA.Name}, - }}} - cudnBTemplate.Spec.NamespaceSelector = metav1.LabelSelector{MatchExpressions: []metav1.LabelSelectorRequirement{{ - Key: "kubernetes.io/metadata.name", - Operator: metav1.LabelSelectorOpIn, - Values: []string{udnNamespaceB.Name}, - }}} + ginkgo.By("Configuring networks") + cudnATemplate.Spec.NamespaceSelector = metav1.LabelSelector{MatchExpressions: []metav1.LabelSelectorRequirement{{ + Key: "kubernetes.io/metadata.name", + Operator: metav1.LabelSelectorOpIn, + Values: []string{udnNamespaceA.Name}, + }}} + cudnBTemplate.Spec.NamespaceSelector = metav1.LabelSelector{MatchExpressions: []metav1.LabelSelectorRequirement{{ + Key: "kubernetes.io/metadata.name", + Operator: metav1.LabelSelectorOpIn, + Values: []string{udnNamespaceB.Name}, + }}} - // set a common label used to advertise both networks with one RA - cudnATemplate.Labels["advertised-networks-isolation"] = "" - cudnBTemplate.Labels["advertised-networks-isolation"] = "" + // set a common label used to advertise both networks with one RA + cudnATemplate.Labels["advertised-networks-isolation"] = "" + cudnBTemplate.Labels["advertised-networks-isolation"] = "" - udnClient, err := udnclientset.NewForConfig(f.ClientConfig()) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + udnClient, err := udnclientset.NewForConfig(f.ClientConfig()) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - if cudnATemplate.Spec.Network.Layer3 != nil { - cudnATemplate.Spec.Network.Layer3.Subnets = filterL3Subnets(f.ClientSet, cudnATemplate.Spec.Network.Layer3.Subnets) - } - if cudnATemplate.Spec.Network.Layer2 != nil { - cudnATemplate.Spec.Network.Layer2.Subnets = filterDualStackCIDRs(f.ClientSet, cudnATemplate.Spec.Network.Layer2.Subnets) - } - if cudnBTemplate.Spec.Network.Layer3 != nil { - cudnBTemplate.Spec.Network.Layer3.Subnets = filterL3Subnets(f.ClientSet, cudnBTemplate.Spec.Network.Layer3.Subnets) - } - if cudnBTemplate.Spec.Network.Layer2 != nil { - cudnBTemplate.Spec.Network.Layer2.Subnets = filterDualStackCIDRs(f.ClientSet, cudnBTemplate.Spec.Network.Layer2.Subnets) - } + if cudnATemplate.Spec.Network.Layer3 != nil { + cudnATemplate.Spec.Network.Layer3.Subnets = filterL3Subnets(f.ClientSet, cudnATemplate.Spec.Network.Layer3.Subnets) + } + if cudnATemplate.Spec.Network.Layer2 != nil { + cudnATemplate.Spec.Network.Layer2.Subnets = filterDualStackCIDRs(f.ClientSet, cudnATemplate.Spec.Network.Layer2.Subnets) + } + if cudnBTemplate.Spec.Network.Layer3 != nil { + cudnBTemplate.Spec.Network.Layer3.Subnets = filterL3Subnets(f.ClientSet, cudnBTemplate.Spec.Network.Layer3.Subnets) + } + if cudnBTemplate.Spec.Network.Layer2 != nil { + cudnBTemplate.Spec.Network.Layer2.Subnets = filterDualStackCIDRs(f.ClientSet, cudnBTemplate.Spec.Network.Layer2.Subnets) + } - cudnA, err = udnClient.K8sV1().ClusterUserDefinedNetworks().Create(context.Background(), cudnATemplate, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + cudnA, err = udnClient.K8sV1().ClusterUserDefinedNetworks().Create(context.Background(), cudnATemplate, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - cudnB, err = udnClient.K8sV1().ClusterUserDefinedNetworks().Create(context.Background(), cudnBTemplate, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + cudnB, err = udnClient.K8sV1().ClusterUserDefinedNetworks().Create(context.Background(), cudnBTemplate, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - ginkgo.By("Waiting for networks to be ready") - gomega.Eventually(clusterUserDefinedNetworkReadyFunc(f.DynamicClient, cudnA.Name), 5*time.Second, time.Second).Should(gomega.Succeed()) - gomega.Eventually(clusterUserDefinedNetworkReadyFunc(f.DynamicClient, cudnB.Name), 5*time.Second, time.Second).Should(gomega.Succeed()) + ginkgo.By("Waiting for networks to be ready") + gomega.Eventually(clusterUserDefinedNetworkReadyFunc(f.DynamicClient, cudnA.Name), 5*time.Second, time.Second).Should(gomega.Succeed()) + gomega.Eventually(clusterUserDefinedNetworkReadyFunc(f.DynamicClient, cudnB.Name), 5*time.Second, time.Second).Should(gomega.Succeed()) - ginkgo.By("Selecting 3 schedulable nodes") - nodes, err = e2enode.GetReadySchedulableNodes(context.TODO(), f.ClientSet) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Expect(len(nodes.Items)).To(gomega.BeNumerically(">", 2)) - // create host networked pod - ginkgo.By("Creating host network pods on each node") - // get random port in case the test retries and port is already in use on host node - min := 25000 - max := 25999 - hostNetworkPort = rand.Intn(max-min+1) + min - framework.Logf("Random host networked port chosen: %d", hostNetworkPort) - for _, node := range nodes.Items { - // this creates a udp / http netexec listener which is able to receive the "hostname" - // command. We use this to validate that each endpoint is received at least once - args := []string{ - "netexec", - fmt.Sprintf("--http-port=%d", hostNetworkPort), - fmt.Sprintf("--udp-port=%d", hostNetworkPort), + ginkgo.By("Selecting 3 schedulable nodes") + nodes, err = e2enode.GetReadySchedulableNodes(context.TODO(), f.ClientSet) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(len(nodes.Items)).To(gomega.BeNumerically(">", 2)) + // create host networked pod + ginkgo.By("Creating host network pods on each node") + // get random port in case the test retries and port is already in use on host node + min := 25000 + max := 25999 + hostNetworkPort = rand.Intn(max-min+1) + min + framework.Logf("Random host networked port chosen: %d", hostNetworkPort) + + ginkgo.By("Setting up pods and services") + + // Create all pod specs upfront as distinct objects. + var hostNetPods []*corev1.Pod + for _, node := range nodes.Items { + p := e2epod.NewAgnhostPod(f.Namespace.Name, node.Name+"-hostnet-ep", nil, nil, nil, + "netexec", + fmt.Sprintf("--http-port=%d", hostNetworkPort), + fmt.Sprintf("--udp-port=%d", hostNetworkPort)) + p.Spec.NodeName = node.Name + p.Spec.HostNetwork = true + hostNetPods = append(hostNetPods, e2epod.NewPodClient(f).Create(context.TODO(), p)) } - // create host networked Pods - _, err := createPod(f, node.Name+"-hostnet-ep", node.Name, f.Namespace.Name, []string{}, map[string]string{}, func(p *corev1.Pod) { - p.Spec.Containers[0].Args = args - p.Spec.HostNetwork = true - }) + podNetASpecs := []*corev1.Pod{ + e2epod.NewAgnhostPod(udnNamespaceA.Name, fmt.Sprintf("pod-1-%s-net-%s", nodes.Items[0].Name, cudnA.Name), nil, nil, []corev1.ContainerPort{{ContainerPort: 8080}}, "netexec"), + e2epod.NewAgnhostPod(udnNamespaceA.Name, fmt.Sprintf("pod-2-%s-net-%s", nodes.Items[0].Name, cudnA.Name), nil, nil, []corev1.ContainerPort{{ContainerPort: 8080}}, "netexec"), + e2epod.NewAgnhostPod(udnNamespaceA.Name, fmt.Sprintf("pod-3-%s-net-%s", nodes.Items[1].Name, cudnA.Name), nil, nil, []corev1.ContainerPort{{ContainerPort: 8080}}, "netexec"), + } + for _, p := range podNetASpecs { + p.Spec.NodeName = nodes.Items[0].Name + p.Labels = map[string]string{"network": cudnA.Name} + } + podNetASpecs[2].Spec.NodeName = nodes.Items[1].Name - framework.ExpectNoError(err) - } + podNetBSpec := e2epod.NewAgnhostPod(udnNamespaceB.Name, fmt.Sprintf("pod-1-%s-net-%s", nodes.Items[1].Name, cudnB.Name), nil, nil, []corev1.ContainerPort{{ContainerPort: 8080}}, "netexec") + podNetBSpec.Spec.NodeName = nodes.Items[1].Name + podNetBSpec.Labels = map[string]string{"network": cudnB.Name} - ginkgo.By("Setting up pods and services") - podsNetA = []*corev1.Pod{} - pod := e2epod.NewAgnhostPod(udnNamespaceA.Name, fmt.Sprintf("pod-1-%s-net-%s", nodes.Items[0].Name, cudnA.Name), nil, nil, []corev1.ContainerPort{{ContainerPort: 8080}}, "netexec") - pod.Spec.NodeName = nodes.Items[0].Name - pod.Labels = map[string]string{"network": cudnA.Name} - podsNetA = append(podsNetA, e2epod.NewPodClient(f).CreateSync(context.TODO(), pod)) - - pod.Name = fmt.Sprintf("pod-2-%s-net-%s", nodes.Items[0].Name, cudnA.Name) - podsNetA = append(podsNetA, e2epod.NewPodClient(f).CreateSync(context.TODO(), pod)) - - pod.Name = fmt.Sprintf("pod-3-%s-net-%s", nodes.Items[1].Name, cudnA.Name) - pod.Spec.NodeName = nodes.Items[1].Name - podsNetA = append(podsNetA, e2epod.NewPodClient(f).CreateSync(context.TODO(), pod)) - - svc := e2eservice.CreateServiceSpec(fmt.Sprintf("service-%s", cudnA.Name), "", false, pod.Labels) - svc.Spec.Ports = []corev1.ServicePort{{Port: 8080}} - familyPolicy := corev1.IPFamilyPolicyPreferDualStack - svc.Spec.IPFamilyPolicy = &familyPolicy - svc.Spec.Type = corev1.ServiceTypeNodePort - svcNodePortNetA, err = f.ClientSet.CoreV1().Services(pod.Namespace).Create(context.Background(), svc, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + podNetDefaultSpec := e2epod.NewAgnhostPod("default", fmt.Sprintf("pod-1-%s-net-default", nodes.Items[1].Name), nil, nil, []corev1.ContainerPort{{ContainerPort: 8080}}, "netexec") + podNetDefaultSpec.Spec.NodeName = nodes.Items[1].Name + podNetDefaultSpec.Labels = map[string]string{"network": "default"} - pod.Name = fmt.Sprintf("pod-1-%s-net-%s", nodes.Items[1].Name, cudnB.Name) - pod.Namespace = udnNamespaceB.Name - pod.Labels = map[string]string{"network": cudnB.Name} - podNetB = e2epod.PodClientNS(f, udnNamespaceB.Name).CreateSync(context.TODO(), pod) - framework.Logf("created pod %s/%s", podNetB.Namespace, podNetB.Name) + // Submit all pods to the API without waiting for readiness. + podsNetA = []*corev1.Pod{} + for _, p := range podNetASpecs { + podsNetA = append(podsNetA, e2epod.NewPodClient(f).Create(context.TODO(), p)) + } + podNetB = e2epod.PodClientNS(f, udnNamespaceB.Name).Create(context.TODO(), podNetBSpec) + podNetDefault = e2epod.PodClientNS(f, "default").Create(context.TODO(), podNetDefaultSpec) - svc.Name = fmt.Sprintf("service-%s", cudnB.Name) - svc.Namespace = pod.Namespace - svc.Spec.Selector = pod.Labels - svcNodePortNetB, err = f.ClientSet.CoreV1().Services(pod.Namespace).Create(context.Background(), svc, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + // Create services (don't need pods to be ready). + familyPolicy := corev1.IPFamilyPolicyPreferDualStack - pod.Name = fmt.Sprintf("pod-1-%s-net-default", nodes.Items[1].Name) - pod.Namespace = "default" - pod.Labels = map[string]string{"network": "default"} - podNetDefault = e2epod.PodClientNS(f, "default").CreateSync(context.TODO(), pod) + svc := e2eservice.CreateServiceSpec(fmt.Sprintf("service-%s", cudnA.Name), "", false, map[string]string{"network": cudnA.Name}) + svc.Spec.Ports = []corev1.ServicePort{{Port: 8080}} + svc.Spec.IPFamilyPolicy = &familyPolicy + svc.Spec.Type = corev1.ServiceTypeNodePort + svcNodePortNetA, err = f.ClientSet.CoreV1().Services(udnNamespaceA.Name).Create(context.Background(), svc, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - svc.Name = "service-default" - svc.Namespace = "default" - svc.Spec.Selector = pod.Labels - svc.Spec.Type = corev1.ServiceTypeNodePort - svcNodePortNetDefault, err = f.ClientSet.CoreV1().Services(pod.Namespace).Create(context.Background(), svc, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + svc.Name = fmt.Sprintf("service-%s", cudnB.Name) + svc.Namespace = udnNamespaceB.Name + svc.Spec.Selector = map[string]string{"network": cudnB.Name} + svcNodePortNetB, err = f.ClientSet.CoreV1().Services(udnNamespaceB.Name).Create(context.Background(), svc, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - // create one nodePort service with externalTrafficPolicy=Local in default namespace - svc.Name = "nodeport-default-etp-local" - svc.Spec.Type = corev1.ServiceTypeNodePort - svc.Spec.ExternalTrafficPolicy = corev1.ServiceExternalTrafficPolicyTypeLocal - svcNodePortETPLocalDefault, err = f.ClientSet.CoreV1().Services(svc.Namespace).Create(context.Background(), svc, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + svc.Name = "service-default" + svc.Namespace = "default" + svc.Spec.Selector = map[string]string{"network": "default"} + svc.Spec.Type = corev1.ServiceTypeNodePort + svcNodePortNetDefault, err = f.ClientSet.CoreV1().Services("default").Create(context.Background(), svc, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - // create one nodePort service with externalTrafficPolicy=Local in udnNamespaceA - svc.Name = fmt.Sprintf("nodeport-etp-local-%s", cudnA.Name) - svc.Namespace = udnNamespaceA.Name - svc.Spec.Selector = map[string]string{"network": cudnA.Name} - svcNodePortETPLocalNetA, err = f.ClientSet.CoreV1().Services(svc.Namespace).Create(context.Background(), svc, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + // create one nodePort service with externalTrafficPolicy=Local in default namespace + svc.Name = "nodeport-default-etp-local" + svc.Spec.ExternalTrafficPolicy = corev1.ServiceExternalTrafficPolicyTypeLocal + svcNodePortETPLocalDefault, err = f.ClientSet.CoreV1().Services("default").Create(context.Background(), svc, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - ginkgo.By("Expose networks") - ra = &rav1.RouteAdvertisements{ - ObjectMeta: metav1.ObjectMeta{ - GenerateName: "advertised-networks-isolation-ra", - }, - Spec: rav1.RouteAdvertisementsSpec{ - NetworkSelectors: apitypes.NetworkSelectors{ - apitypes.NetworkSelector{ - NetworkSelectionType: apitypes.ClusterUserDefinedNetworks, - ClusterUserDefinedNetworkSelector: &apitypes.ClusterUserDefinedNetworkSelector{ - NetworkSelector: metav1.LabelSelector{ - MatchLabels: map[string]string{"advertised-networks-isolation": ""}, + // create one nodePort service with externalTrafficPolicy=Local in udnNamespaceA + svc.Name = fmt.Sprintf("nodeport-etp-local-%s", cudnA.Name) + svc.Namespace = udnNamespaceA.Name + svc.Spec.Selector = map[string]string{"network": cudnA.Name} + svcNodePortETPLocalNetA, err = f.ClientSet.CoreV1().Services(udnNamespaceA.Name).Create(context.Background(), svc, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Wait for all pods to be ready (they've been scheduling in parallel). + for _, p := range append(hostNetPods, append(podsNetA, podNetB, podNetDefault)...) { + framework.ExpectNoError(e2epod.WaitTimeoutForPodReadyInNamespace(context.TODO(), f.ClientSet, p.Name, p.Namespace, framework.PodStartTimeout)) + } + // Re-get pods to have updated status (e.g. pod IPs). + for i, p := range podsNetA { + podsNetA[i], err = f.ClientSet.CoreV1().Pods(p.Namespace).Get(context.TODO(), p.Name, metav1.GetOptions{}) + framework.ExpectNoError(err) + } + podNetB, err = f.ClientSet.CoreV1().Pods(podNetB.Namespace).Get(context.TODO(), podNetB.Name, metav1.GetOptions{}) + framework.ExpectNoError(err) + framework.Logf("created pod %s/%s", podNetB.Namespace, podNetB.Name) + podNetDefault, err = f.ClientSet.CoreV1().Pods(podNetDefault.Namespace).Get(context.TODO(), podNetDefault.Name, metav1.GetOptions{}) + framework.ExpectNoError(err) + + ginkgo.By("Expose networks") + ra = &rav1.RouteAdvertisements{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: "advertised-networks-isolation-ra", + }, + Spec: rav1.RouteAdvertisementsSpec{ + NetworkSelectors: apitypes.NetworkSelectors{ + apitypes.NetworkSelector{ + NetworkSelectionType: apitypes.ClusterUserDefinedNetworks, + ClusterUserDefinedNetworkSelector: &apitypes.ClusterUserDefinedNetworkSelector{ + NetworkSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"advertised-networks-isolation": ""}, + }, }, }, }, + NodeSelector: metav1.LabelSelector{}, + FRRConfigurationSelector: metav1.LabelSelector{}, + Advertisements: []rav1.AdvertisementType{ + rav1.PodNetwork, + }, }, - NodeSelector: metav1.LabelSelector{}, - FRRConfigurationSelector: metav1.LabelSelector{}, - Advertisements: []rav1.AdvertisementType{ - rav1.PodNetwork, - }, - }, - } - - raClient, err := raclientset.NewForConfig(f.ClientConfig()) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + } - ra, err = raClient.K8sV1().RouteAdvertisements().Create(context.TODO(), ra, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + raClient, err := raclientset.NewForConfig(f.ClientConfig()) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - ginkgo.By("ensure route advertisement matching both networks was created successfully") - gomega.Eventually(func() string { - ra, err := raClient.K8sV1().RouteAdvertisements().Get(context.TODO(), ra.Name, metav1.GetOptions{}) - if err != nil { - return "" - } - condition := meta.FindStatusCondition(ra.Status.Conditions, "Accepted") - if condition == nil { - return "" - } - return condition.Reason - }, 30*time.Second, time.Second).Should(gomega.Equal("Accepted")) + ra, err = raClient.K8sV1().RouteAdvertisements().Create(context.TODO(), ra, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - ginkgo.By("ensure routes from UDNs are learned by the external FRR router") - serverContainerIPs := getBGPServerContainerIPs(f) - for _, serverContainerIP := range serverContainerIPs { - for _, node := range nodes.Items { - if cudnA.Spec.Network.Topology == udnv1.NetworkTopologyLayer3 { - checkL3NodePodRoute(node, serverContainerIP, routerContainerName, types.CUDNPrefix+cudnATemplate.Name) - checkL3NodePodRoute(node, serverContainerIP, routerContainerName, types.CUDNPrefix+cudnBTemplate.Name) - } else { - checkL2NodePodRoute(node, serverContainerIP, routerContainerName, cudnATemplate.Spec.Network.Layer2.Subnets) - checkL2NodePodRoute(node, serverContainerIP, routerContainerName, cudnBTemplate.Spec.Network.Layer2.Subnets) + ginkgo.By("ensure route advertisement matching both networks was created successfully") + gomega.Eventually(func() string { + ra, err := raClient.K8sV1().RouteAdvertisements().Get(context.TODO(), ra.Name, metav1.GetOptions{}) + if err != nil { + return "" + } + condition := meta.FindStatusCondition(ra.Status.Conditions, "Accepted") + if condition == nil { + return "" + } + return condition.Reason + }, 30*time.Second, time.Second).Should(gomega.Equal("Accepted")) + + ginkgo.By("ensure routes from UDNs are learned by the external FRR router") + serverContainerIPs := getBGPServerContainerIPs(f) + for _, serverContainerIP := range serverContainerIPs { + for _, node := range nodes.Items { + if cudnA.Spec.Network.Topology == udnv1.NetworkTopologyLayer3 { + checkL3NodePodRoute(node, serverContainerIP, routerContainerName, types.CUDNPrefix+cudnATemplate.Name) + checkL3NodePodRoute(node, serverContainerIP, routerContainerName, types.CUDNPrefix+cudnBTemplate.Name) + } else { + checkL2NodePodRoute(node, serverContainerIP, routerContainerName, cudnATemplate.Spec.Network.Layer2.Subnets) + checkL2NodePodRoute(node, serverContainerIP, routerContainerName, cudnBTemplate.Spec.Network.Layer2.Subnets) + } } } - } - }) - - ginkgo.AfterEach(func() { - gomega.Expect(f.ClientSet.CoreV1().Pods(udnNamespaceA.Name).DeleteCollection(context.Background(), metav1.DeleteOptions{}, metav1.ListOptions{})).To(gomega.Succeed()) - gomega.Expect(f.ClientSet.CoreV1().Pods(udnNamespaceB.Name).DeleteCollection(context.Background(), metav1.DeleteOptions{}, metav1.ListOptions{})).To(gomega.Succeed()) + }) - udnClient, err := udnclientset.NewForConfig(f.ClientConfig()) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - if cudnB != nil { - err = udnClient.K8sV1().ClusterUserDefinedNetworks().Delete(context.TODO(), cudnB.Name, metav1.DeleteOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Eventually(func() bool { - _, err := udnClient.K8sV1().ClusterUserDefinedNetworks().Get(context.TODO(), cudnB.Name, metav1.GetOptions{}) - return apierrors.IsNotFound(err) - }, time.Second*60).Should(gomega.BeTrue()) - cudnB = nil - } - if cudnA != nil { - err = udnClient.K8sV1().ClusterUserDefinedNetworks().Delete(context.TODO(), cudnA.Name, metav1.DeleteOptions{}) + ginkgo.AfterAll(func() { + if udnNamespaceA != nil { + gomega.Expect(f.ClientSet.CoreV1().Pods(udnNamespaceA.Name).DeleteCollection(context.Background(), metav1.DeleteOptions{}, metav1.ListOptions{})).To(gomega.Succeed()) + } + if udnNamespaceB != nil { + gomega.Expect(f.ClientSet.CoreV1().Pods(udnNamespaceB.Name).DeleteCollection(context.Background(), metav1.DeleteOptions{}, metav1.ListOptions{})).To(gomega.Succeed()) + } + udnClient, err := udnclientset.NewForConfig(f.ClientConfig()) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Eventually(func() bool { - _, err := udnClient.K8sV1().ClusterUserDefinedNetworks().Get(context.TODO(), cudnA.Name, metav1.GetOptions{}) - return apierrors.IsNotFound(err) - }, time.Second*60).Should(gomega.BeTrue()) - cudnA = nil - } + if cudnB != nil { + err = udnClient.K8sV1().ClusterUserDefinedNetworks().Delete(context.TODO(), cudnB.Name, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(func() bool { + _, err := udnClient.K8sV1().ClusterUserDefinedNetworks().Get(context.TODO(), cudnB.Name, metav1.GetOptions{}) + return apierrors.IsNotFound(err) + }, time.Second*60).Should(gomega.BeTrue()) + cudnB = nil + } + if cudnA != nil { + err = udnClient.K8sV1().ClusterUserDefinedNetworks().Delete(context.TODO(), cudnA.Name, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(func() bool { + _, err := udnClient.K8sV1().ClusterUserDefinedNetworks().Get(context.TODO(), cudnA.Name, metav1.GetOptions{}) + return apierrors.IsNotFound(err) + }, time.Second*60).Should(gomega.BeTrue()) + cudnA = nil + } - if podNetDefault != nil { - err = f.ClientSet.CoreV1().Pods(podNetDefault.Namespace).Delete(context.Background(), podNetDefault.Name, metav1.DeleteOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - podNetDefault = nil - } + if podNetDefault != nil { + err = f.ClientSet.CoreV1().Pods(podNetDefault.Namespace).Delete(context.Background(), podNetDefault.Name, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + podNetDefault = nil + } - if svcNodePortNetDefault != nil { - err = f.ClientSet.CoreV1().Services(svcNodePortNetDefault.Namespace).Delete(context.Background(), svcNodePortNetDefault.Name, metav1.DeleteOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - svcNodePortNetDefault = nil - } - if svcNodePortETPLocalDefault != nil { - err = f.ClientSet.CoreV1().Services(svcNodePortETPLocalDefault.Namespace).Delete(context.Background(), svcNodePortETPLocalDefault.Name, metav1.DeleteOptions{}) + if svcNodePortNetDefault != nil { + err = f.ClientSet.CoreV1().Services(svcNodePortNetDefault.Namespace).Delete(context.Background(), svcNodePortNetDefault.Name, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + svcNodePortNetDefault = nil + } + if svcNodePortETPLocalDefault != nil { + err = f.ClientSet.CoreV1().Services(svcNodePortETPLocalDefault.Namespace).Delete(context.Background(), svcNodePortETPLocalDefault.Name, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + svcNodePortETPLocalDefault = nil + } + + raClient, err := raclientset.NewForConfig(f.ClientConfig()) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - svcNodePortETPLocalDefault = nil - } - raClient, err := raclientset.NewForConfig(f.ClientConfig()) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + if ra != nil { + err = raClient.K8sV1().RouteAdvertisements().Delete(context.TODO(), ra.Name, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + ra = nil + } - if ra != nil { - err = raClient.K8sV1().RouteAdvertisements().Delete(context.TODO(), ra.Name, metav1.DeleteOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - ra = nil - } - }) + // Delete the namespaces manually since they were created directly + // via the API (not via f.CreateNamespace) to avoid framework's + // AfterEach cleanup. + if udnNamespaceA != nil { + err = f.ClientSet.CoreV1().Namespaces().Delete(context.Background(), udnNamespaceA.Name, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + udnNamespaceA = nil + } + if udnNamespaceB != nil { + err = f.ClientSet.CoreV1().Namespaces().Delete(context.Background(), udnNamespaceB.Name, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + udnNamespaceB = nil + } + }) - ginkgo.DescribeTable("connectivity between networks", - func(connInfo func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool)) { - // checkConnectivity performs a curl command from a specified client (pod or node) - // to targetAddress. If clientNamespace is empty the function assumes clientName is a node that will be used as the - // client. - var checkConnectivity = func(clientName, clientNamespace, targetAddress string) (string, error) { - curlCmd := []string{"curl", "-g", "-q", "-s", "--max-time", "2", "--insecure", targetAddress} - var out string - var err error - if clientNamespace != "" { - framework.Logf("Attempting connectivity from pod: %s/%s -> %s", clientNamespace, clientName, targetAddress) - stdout, stderr, err := e2epodoutput.RunHostCmdWithFullOutput(clientNamespace, clientName, strings.Join(curlCmd, " ")) - out = stdout + "\n" + stderr - if err != nil { - return out, fmt.Errorf("connectivity check failed from Pod %s/%s to %s: %w", clientNamespace, clientName, targetAddress, err) - } - } else { - framework.Logf("Attempting connectivity from node: %s -> %s", clientName, targetAddress) - out, err = infraprovider.Get().ExecK8NodeCommand(clientName, curlCmd) - if err != nil { - // out is empty on error and error contains out... - return err.Error(), fmt.Errorf("connectivity check failed from node %s to %s: %w", clientName, targetAddress, err) + ginkgo.DescribeTable("connectivity between networks", + func(connInfo func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool)) { + // checkConnectivity performs a curl command from a specified client (pod or node) + // to targetAddress. If clientNamespace is empty the function assumes clientName is a node that will be used as the + // client. + var checkConnectivity = func(clientName, clientNamespace, targetAddress string) (string, error) { + curlCmd := []string{"curl", "-g", "-q", "-s", "--max-time", "1", "--insecure", targetAddress} + var out string + var err error + if clientNamespace != "" { + framework.Logf("Attempting connectivity from pod: %s/%s -> %s", clientNamespace, clientName, targetAddress) + stdout, stderr, err := e2epodoutput.RunHostCmdWithFullOutput(clientNamespace, clientName, strings.Join(curlCmd, " ")) + out = stdout + "\n" + stderr + if err != nil { + return out, fmt.Errorf("connectivity check failed from Pod %s/%s to %s: %w", clientNamespace, clientName, targetAddress, err) + } + } else { + framework.Logf("Attempting connectivity from node: %s -> %s", clientName, targetAddress) + out, err = infraprovider.Get().ExecK8NodeCommand(clientName, curlCmd) + if err != nil { + // out is empty on error and error contains out... + return err.Error(), fmt.Errorf("connectivity check failed from node %s to %s: %w", clientName, targetAddress, err) + } } - } - client := clientName - if clientNamespace != "" { - client = clientNamespace + "/" + client - } - framework.Logf("Connectivity check successful:'%s' -> %s", client, targetAddress) - return out, nil - } - for _, ipFamily := range getSupportedIPFamiliesSlice(f.ClientSet) { - clientName, clientNamespace, dst, expectedOutput, expectErr := connInfo(ipFamily) - asyncAssertion := gomega.Eventually - timeout := time.Second * 30 - if expectErr { - // When the connectivity check is expected to fail it should be failing consistently - asyncAssertion = gomega.Consistently - timeout = time.Second * 15 + client := clientName + if clientNamespace != "" { + client = clientNamespace + "/" + client + } + framework.Logf("Connectivity check successful:'%s' -> %s", client, targetAddress) + return out, nil } - asyncAssertion(func() error { - out, err := checkConnectivity(clientName, clientNamespace, dst) - if expectErr != (err != nil) { - return fmt.Errorf("expected connectivity check to return error(%t), got %v, output %v", expectErr, err, out) + for _, ipFamily := range getSupportedIPFamiliesSlice(f.ClientSet) { + clientName, clientNamespace, dst, expectedOutput, expectErr := connInfo(ipFamily) + asyncAssertion := gomega.Eventually + timeout := time.Second * 30 + if expectErr { + // When the connectivity check is expected to fail it should be failing consistently + asyncAssertion = gomega.Consistently + timeout = time.Second * 5 } - if expectedOutput != "" { - if !strings.Contains(out, expectedOutput) { - return fmt.Errorf("expected connectivity check to contain %q, got %q", expectedOutput, out) + asyncAssertion(func() error { + out, err := checkConnectivity(clientName, clientNamespace, dst) + if expectErr != (err != nil) { + return fmt.Errorf("expected connectivity check to return error(%t), got %v, output %v", expectErr, err, out) } - } - return nil - }, timeout).Should(gomega.BeNil()) - } - }, - ginkgo.Entry("pod to pod on the same network and same node should work", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - // podsNetA[0] and podsNetA[1] are on the same node - clientPod := podsNetA[0] - srvPod := podsNetA[1] + if expectedOutput != "" { + if !strings.Contains(out, expectedOutput) { + return fmt.Errorf("expected connectivity check to contain %q, got %q", expectedOutput, out) + } + } + return nil + }, timeout).Should(gomega.BeNil()) + } + }, + ginkgo.Entry("pod to pod on the same network and same node should work", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + // podsNetA[0] and podsNetA[1] are on the same node + clientPod := podsNetA[0] + srvPod := podsNetA[1] - clientPodStatus, err := getPodAnnotationForAttachment(clientPod, namespacedName(clientPod.Namespace, cudnATemplate.Name)) - framework.ExpectNoError(err) - srvPodStatus, err := getPodAnnotationForAttachment(srvPod, namespacedName(srvPod.Namespace, cudnATemplate.Name)) - framework.ExpectNoError(err) - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(getFirstCIDROfFamily(ipFamily, srvPodStatus.IPs).IP.String(), "8080") + "/clientip", - getFirstCIDROfFamily(ipFamily, clientPodStatus.IPs).IP.String(), false - }), - ginkgo.Entry("pod to pod on the same network and different nodes should work", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - // podsNetA[0] and podsNetA[2] are on different nodes - clientPod := podsNetA[0] - srvPod := podsNetA[2] - - clientPodStatus, err := getPodAnnotationForAttachment(clientPod, namespacedName(clientPod.Namespace, cudnATemplate.Name)) - framework.ExpectNoError(err) - srvPodStatus, err := getPodAnnotationForAttachment(srvPod, namespacedName(srvPod.Namespace, cudnATemplate.Name)) - framework.ExpectNoError(err) - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(getFirstCIDROfFamily(ipFamily, srvPodStatus.IPs).IP.String(), "8080") + "/clientip", - getFirstCIDROfFamily(ipFamily, clientPodStatus.IPs).IP.String(), false - }), - ginkgo.Entry("pod to pod connectivity on different networks and same node", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - // podsNetA[2] and podNetB are on the same node - clientPod := podsNetA[2] - srvPod := podNetB - - srvPodStatus, err := getPodAnnotationForAttachment(srvPod, namespacedName(srvPod.Namespace, cudnBTemplate.Name)) - framework.ExpectNoError(err) - var ( - curlOutput string - curlErr bool - ) - // Test behavior depends on the ADVERTISED_UDN_ISOLATION_MODE environment variable: - // - "loose": Pod connectivity is allowed, test expects success - // - anything else (including unset): Treated as "strict", pod connectivity is blocked - if os.Getenv("ADVERTISED_UDN_ISOLATION_MODE") == "loose" { clientPodStatus, err := getPodAnnotationForAttachment(clientPod, namespacedName(clientPod.Namespace, cudnATemplate.Name)) framework.ExpectNoError(err) + srvPodStatus, err := getPodAnnotationForAttachment(srvPod, namespacedName(srvPod.Namespace, cudnATemplate.Name)) + framework.ExpectNoError(err) + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(getFirstCIDROfFamily(ipFamily, srvPodStatus.IPs).IP.String(), "8080") + "/clientip", + getFirstCIDROfFamily(ipFamily, clientPodStatus.IPs).IP.String(), false + }), + ginkgo.Entry("pod to pod on the same network and different nodes should work", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + // podsNetA[0] and podsNetA[2] are on different nodes + clientPod := podsNetA[0] + srvPod := podsNetA[2] - // With the above underlay routing configuration client pod can reach server pod. - curlOutput = getFirstCIDROfFamily(ipFamily, clientPodStatus.IPs).IP.String() - curlErr = false - } else { - curlOutput = curlConnectionTimeoutCode - curlErr = true - } - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(getFirstCIDROfFamily(ipFamily, srvPodStatus.IPs).IP.String(), "8080") + "/clientip", - curlOutput, curlErr - }), - - ginkgo.Entry("pod to pod connectivity on different networks and different nodes", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - // podsNetA[0] and podNetB are on different nodes - clientPod := podsNetA[0] - srvPod := podNetB - - srvPodStatus, err := getPodAnnotationForAttachment(srvPod, namespacedName(srvPod.Namespace, cudnBTemplate.Name)) - framework.ExpectNoError(err) - var ( - curlOutput string - curlErr bool - ) - if os.Getenv("ADVERTISED_UDN_ISOLATION_MODE") == "loose" { clientPodStatus, err := getPodAnnotationForAttachment(clientPod, namespacedName(clientPod.Namespace, cudnATemplate.Name)) framework.ExpectNoError(err) + srvPodStatus, err := getPodAnnotationForAttachment(srvPod, namespacedName(srvPod.Namespace, cudnATemplate.Name)) + framework.ExpectNoError(err) + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(getFirstCIDROfFamily(ipFamily, srvPodStatus.IPs).IP.String(), "8080") + "/clientip", + getFirstCIDROfFamily(ipFamily, clientPodStatus.IPs).IP.String(), false + }), + ginkgo.Entry("pod to pod connectivity on different networks and same node", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + // podsNetA[2] and podNetB are on the same node + clientPod := podsNetA[2] + srvPod := podNetB + + srvPodStatus, err := getPodAnnotationForAttachment(srvPod, namespacedName(srvPod.Namespace, cudnBTemplate.Name)) + framework.ExpectNoError(err) + var ( + curlOutput string + curlErr bool + ) + // Test behavior depends on the ADVERTISED_UDN_ISOLATION_MODE environment variable: + // - "loose": Pod connectivity is allowed, test expects success + // - anything else (including unset): Treated as "strict", pod connectivity is blocked + if os.Getenv("ADVERTISED_UDN_ISOLATION_MODE") == "loose" { + clientPodStatus, err := getPodAnnotationForAttachment(clientPod, namespacedName(clientPod.Namespace, cudnATemplate.Name)) + framework.ExpectNoError(err) + + // With the above underlay routing configuration client pod can reach server pod. + curlOutput = getFirstCIDROfFamily(ipFamily, clientPodStatus.IPs).IP.String() + curlErr = false + } else { + curlOutput = curlConnectionTimeoutCode + curlErr = true + } + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(getFirstCIDROfFamily(ipFamily, srvPodStatus.IPs).IP.String(), "8080") + "/clientip", + curlOutput, curlErr + }), - curlOutput = getFirstCIDROfFamily(ipFamily, clientPodStatus.IPs).IP.String() - curlErr = false - } else { - curlOutput = curlConnectionTimeoutCode - curlErr = true - } - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(getFirstCIDROfFamily(ipFamily, srvPodStatus.IPs).IP.String(), "8080") + "/clientip", - curlOutput, curlErr - }), - ginkgo.Entry("pod in the default network should not be able to access an advertised UDN pod on the same node", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - // podNetDefault and podNetB are on the same node - clientPod := podNetDefault - srvPod := podNetB - - srvPodStatus, err := getPodAnnotationForAttachment(srvPod, namespacedName(srvPod.Namespace, cudnBTemplate.Name)) - framework.ExpectNoError(err) - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(getFirstCIDROfFamily(ipFamily, srvPodStatus.IPs).IP.String(), "8080") + "/clientip", - curlConnectionTimeoutCode, true - }), - ginkgo.Entry("pod in the default network should not be able to access an advertised UDN pod on a different node", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - // podNetDefault and podsNetA[0] are on different nodes - clientPod := podNetDefault - srvPod := podsNetA[0] - - srvPodStatus, err := getPodAnnotationForAttachment(srvPod, namespacedName(srvPod.Namespace, cudnATemplate.Name)) - framework.ExpectNoError(err) - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(getFirstCIDROfFamily(ipFamily, srvPodStatus.IPs).IP.String(), "8080") + "/clientip", - curlConnectionTimeoutCode, true - }), - ginkgo.Entry("pod in the default network should not be able to access a UDN service", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - return podNetDefault.Name, podNetDefault.Namespace, net.JoinHostPort(getFirstIPStringOfFamily(ipFamily, svcNodePortNetA.Spec.ClusterIPs), "8080") + "/clientip", - curlConnectionTimeoutCode, true - }), - ginkgo.Entry("pod in the UDN should be able to access a service in the same network", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - return podsNetA[0].Name, podsNetA[0].Namespace, net.JoinHostPort(getFirstIPStringOfFamily(ipFamily, svcNodePortNetA.Spec.ClusterIPs), "8080") + "/clientip", "", false - }), - ginkgo.Entry("pod in the UDN should not be able to access a default network service", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - err := true - out := curlConnectionTimeoutCode - if cudnATemplate.Spec.Network.Topology == udnv1.NetworkTopologyLayer2 { - // FIXME: prevent looping of traffic in L2 UDNs - // bad behaviour: packet is looping from management port -> breth0 -> GR -> management port -> breth0 and so on - // which is a never ending loop - // this causes curl timeout with code 7 host unreachable instead of code 28 - out = "" - } - return podsNetA[0].Name, podsNetA[0].Namespace, net.JoinHostPort(getFirstIPStringOfFamily(ipFamily, svcNodePortNetDefault.Spec.ClusterIPs), "8080") + "/clientip", out, err - }), - ginkgo.Entry("pod in the UDN should be able to access kapi in default network service", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - return podsNetA[0].Name, podsNetA[0].Namespace, "https://kubernetes.default/healthz", "", false - }), - ginkgo.Entry("pod in the UDN should be able to access kapi service cluster IP directly", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - // Get kubernetes service from default namespace - kubernetesService, err := f.ClientSet.CoreV1().Services("default").Get(context.TODO(), "kubernetes", metav1.GetOptions{}) - framework.ExpectNoError(err, "should be able to get kubernetes service") - - // NOTE: See https://github.com/kubernetes/enhancements/tree/master/keps/sig-network/2438-dual-stack-apiserver - // Today the kubernetes.default service is single-stack and cannot be dual-stack. - if isDualStackCluster(nodes) && ipFamily == utilnet.IPv6 { - e2eskipper.Skipf("Dual stack kubernetes.default service is not supported in kubernetes") - } - // Get the cluster IP for the specified IP family - clusterIP := getFirstIPStringOfFamily(ipFamily, kubernetesService.Spec.ClusterIPs) - gomega.Expect(clusterIP).NotTo(gomega.BeEmpty(), fmt.Sprintf("no cluster IP available for IP family %v", ipFamily)) - - // Access the kubernetes API at the cluster IP directly on port 443 - return podsNetA[0].Name, podsNetA[0].Namespace, fmt.Sprintf("https://%s/healthz", net.JoinHostPort(clusterIP, "443")), "", false - }), - ginkgo.Entry("pod in the UDN should not be able to access a service in a different UDN", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - return podsNetA[0].Name, podsNetA[0].Namespace, net.JoinHostPort(getFirstIPStringOfFamily(ipFamily, svcNodePortNetB.Spec.ClusterIPs), "8080") + "/clientip", - curlConnectionTimeoutCode, true - }), - ginkgo.Entry("host to a local UDN pod should not work", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - clientNode := podsNetA[0].Spec.NodeName - srvPod := podsNetA[0] - - srvPodStatus, err := getPodAnnotationForAttachment(srvPod, namespacedName(srvPod.Namespace, cudnATemplate.Name)) - framework.ExpectNoError(err) - return clientNode, "", net.JoinHostPort(getFirstCIDROfFamily(ipFamily, srvPodStatus.IPs).IP.String(), "8080") + "/clientip", - curlConnectionTimeoutCode, true - }), - ginkgo.Entry("host to a different node UDN pod should not work", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - // podsNetA[0] and podsNetA[2] are on different nodes - clientNode := podsNetA[2].Spec.NodeName - srvPod := podsNetA[0] - - srvPodStatus, err := getPodAnnotationForAttachment(srvPod, namespacedName(srvPod.Namespace, cudnATemplate.Name)) - framework.ExpectNoError(err) - return clientNode, "", net.JoinHostPort(getFirstCIDROfFamily(ipFamily, srvPodStatus.IPs).IP.String(), "8080") + "/clientip", - curlConnectionTimeoutCode, true - }), - ginkgo.Entry("UDN pod to local node should not work", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - clientPod := podsNetA[0] - node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), clientPod.Spec.NodeName, metav1.GetOptions{}) - framework.ExpectNoError(err) - nodeIPv4, nodeIPv6 := getNodeAddresses(node) - nodeIP := nodeIPv4 - if ipFamily == utilnet.IPv6 { - nodeIP = nodeIPv6 - } - // FIXME: add the host process socket to the VRF for this test to work. - // This scenario is something that is not supported yet. So the test will continue to fail. - // This works the same on both normal UDNs and advertised UDNs. - // So because the process is not bound to the VRF, packet reaches the host but kernel sends a RESET. So its not code 28 but code7. - // 10:59:55.351067 319594f193d4d_3 P ifindex 191 0a:58:5d:5d:01:05 ethertype IPv4 (0x0800), length 80: (tos 0x0, ttl 64, id 57264, - // offset 0, flags [DF], proto TCP (6), length 60) - // 93.93.1.5.36363 > 172.18.0.2.25022: Flags [S], cksum 0x0aa5 (incorrect -> 0xe0b7), seq 3879759281, win 65280, - // options [mss 1360,sackOK,TS val 3006752321 ecr 0,nop,wscale 7], length 0 - // 10:59:55.352404 ovn-k8s-mp87 In ifindex 186 0a:58:5d:5d:01:01 ethertype IPv4 (0x0800), length 80: (tos 0x0, ttl 63, id 57264, - // offset 0, flags [DF], proto TCP (6), length 60) - // 169.154.169.12.36363 > 172.18.0.2.25022: Flags [S], cksum 0xe0b7 (correct), seq 3879759281, win 65280, - // options [mss 1360,sackOK,TS val 3006752321 ecr 0,nop,wscale 7], length 0 - // 10:59:55.352461 ovn-k8s-mp87 Out ifindex 186 0a:58:5d:5d:01:02 ethertype IPv4 (0x0800), length 60: (tos 0x0, ttl 64, id 0, - // offset 0, flags [DF], proto TCP (6), length 40) - // 172.18.0.2.25022 > 169.154.169.12.36363: Flags [R.], cksum 0x609d (correct), seq 0, ack 3879759282, win 0, length 0 - // 10:59:55.352927 319594f193d4d_3 Out ifindex 191 0a:58:5d:5d:01:02 ethertype IPv4 (0x0800), length 60: (tos 0x0, ttl 64, id 0, - // offset 0, flags [DF], proto TCP (6), length 40) - // 172.18.0.2.25022 > 93.93.1.5.36363: Flags [R.], cksum 0x609d (correct), seq 0, ack 1, win 0, length 0 - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(hostNetworkPort)) + "/hostname", "", true - }), - ginkgo.Entry("UDN pod to a different node should work", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - clientPod := podsNetA[0] - // podsNetA[0] and podsNetA[2] are on different nodes so we can pick the node of podsNetA[2] as the different node destination - node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), podsNetA[2].Spec.NodeName, metav1.GetOptions{}) - framework.ExpectNoError(err) - nodeIPv4, nodeIPv6 := getNodeAddresses(node) - nodeIP := nodeIPv4 - if ipFamily == utilnet.IPv6 { - nodeIP = nodeIPv6 - } + ginkgo.Entry("pod to pod connectivity on different networks and different nodes", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + // podsNetA[0] and podNetB are on different nodes + clientPod := podsNetA[0] + srvPod := podNetB - clientNode, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), clientPod.Spec.NodeName, metav1.GetOptions{}) - framework.ExpectNoError(err) - clientNodeIPv4, clientNodeIPv6 := getNodeAddresses(clientNode) - clientNodeIP := clientNodeIPv4 - if ipFamily == utilnet.IPv6 { - clientNodeIP = clientNodeIPv6 - } - // pod -> node traffic should use the node's IP as the source for advertised UDNs. - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(hostNetworkPort)) + "/clientip", clientNodeIP, false - }), - ginkgo.Entry("[ETP=Cluster] UDN pod to the same node nodeport service in default network should not work", - // FIXME: https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5410 - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - clientPod := podsNetA[0] - // podsNetA[0] is on nodes[0]. We need the same node. Let's hit the nodeport on nodes[0]. - node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), nodes.Items[0].Name, metav1.GetOptions{}) - framework.ExpectNoError(err) - nodeIPv4, nodeIPv6 := getNodeAddresses(node) - nodeIP := nodeIPv4 - if ipFamily == utilnet.IPv6 { - nodeIP = nodeIPv6 - } - nodePort := svcNodePortNetDefault.Spec.Ports[0].NodePort - - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePort)) + "/hostname", curlConnectionTimeoutCode, true - }), - ginkgo.Entry("[ETP=Cluster] UDN pod to a different node nodeport service in default network should work", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - clientPod := podsNetA[0] - // podsNetA[0] is on nodes[0]. We need a different node. podNetDefault is on nodes[1]. - // The service is backed by podNetDefault. Let's hit the nodeport on nodes[2]. - node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), nodes.Items[2].Name, metav1.GetOptions{}) - framework.ExpectNoError(err) - nodeIPv4, nodeIPv6 := getNodeAddresses(node) - nodeIP := nodeIPv4 - if ipFamily == utilnet.IPv6 { - nodeIP = nodeIPv6 - } - nodePort := svcNodePortNetDefault.Spec.Ports[0].NodePort - - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePort)) + "/hostname", "", false - }), - ginkgo.Entry("[ETP=Cluster] UDN pod to the same node nodeport service in same UDN network should work", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - clientPod := podsNetA[0] - // The service is backed by pods in podsNetA. - // We want to hit the nodeport on the same node. - // client is on nodes[0]. Let's hit nodeport on nodes[0]. - node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), nodes.Items[0].Name, metav1.GetOptions{}) - framework.ExpectNoError(err) - nodeIPv4, nodeIPv6 := getNodeAddresses(node) - nodeIP := nodeIPv4 - if ipFamily == utilnet.IPv6 { - nodeIP = nodeIPv6 - } - nodePort := svcNodePortNetA.Spec.Ports[0].NodePort - - // The service can be backed by any of the pods in podsNetA, so we can't reliably check the output hostname. - // Just check that the connection is successful. - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePort)) + "/hostname", "", false - }), - ginkgo.Entry("[ETP=Cluster] UDN pod to a different node nodeport service in same UDN network should work", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - clientPod := podsNetA[0] - // The service is backed by pods in podsNetA. - // We want to hit the nodeport on a different node. - // client is on nodes[0]. Let's hit nodeport on nodes[2]. - node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), nodes.Items[2].Name, metav1.GetOptions{}) - framework.ExpectNoError(err) - nodeIPv4, nodeIPv6 := getNodeAddresses(node) - nodeIP := nodeIPv4 - if ipFamily == utilnet.IPv6 { - nodeIP = nodeIPv6 - } - nodePort := svcNodePortNetA.Spec.Ports[0].NodePort - - // sourceIP will be joinSubnetIP for nodeports, so only using hostname endpoint - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePort)) + "/hostname", "", false - }), - ginkgo.Entry("[ETP=Cluster] UDN pod to the same node nodeport service in different UDN network should not work", - // FIXME: This test should work: https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5419 - // This traffic flow is expected to work eventually but doesn't work today on Layer3 (v4 and v6) and Layer2 (v4 and v6) networks. - // Reason it doesn't work today is because UDN networks don't have MAC bindings for masqueradeIPs of other networks. - // Traffic flow: UDN pod in network A -> samenode nodeIP:nodePort service of networkB - // UDN pod in networkA -> ovn-switch -> ovn-cluster-router (SNAT to masqueradeIP of networkA) -> mpX interface -> - // enters the host and hits IPTables rules to DNAT to clusterIP:Port of service of networkB. - // Then it hits the pkt_mark flows on breth0 and get's sent into networkB's patchport where it hits the GR. - // On the GR we DNAT to backend pod and SNAT to joinIP. - // Reply: Pod replies and now OVN in networkB tries to ARP for the masqueradeIP of networkA which is the source and simply - // fails as it doesn't know how to reach this masqueradeIP. - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - clientPod := podsNetA[0] - node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), nodes.Items[0].Name, metav1.GetOptions{}) - framework.ExpectNoError(err) - nodeIPv4, nodeIPv6 := getNodeAddresses(node) - nodeIP := nodeIPv4 - if ipFamily == utilnet.IPv6 { - nodeIP = nodeIPv6 - } - nodePort := svcNodePortNetB.Spec.Ports[0].NodePort - // sourceIP will be joinSubnetIP for nodeports, so only using hostname endpoint - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePort)) + "/hostname", curlConnectionTimeoutCode, true - }), - ginkgo.Entry("[ETP=Cluster] UDN pod to a different node nodeport service in different UDN network should work", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - clientPod := podsNetA[0] - // The service is backed by podNetB. - // We want to hit the nodeport on a different node from the client. - // client is on nodes[0]. Let's hit nodeport on nodes[2]. - node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), nodes.Items[2].Name, metav1.GetOptions{}) - framework.ExpectNoError(err) - nodeIPv4, nodeIPv6 := getNodeAddresses(node) - nodeIP := nodeIPv4 - if ipFamily == utilnet.IPv6 { - nodeIP = nodeIPv6 - } - nodePort := svcNodePortNetB.Spec.Ports[0].NodePort - - // sourceIP will be joinSubnetIP for nodeports, so only using hostname endpoint - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePort)) + "/hostname", "", false - }), - ginkgo.Entry("[ETP=LOCAL] UDN pod to the same node nodeport service in same UDN network should work", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - clientPod := podsNetA[0] - node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), clientPod.Spec.NodeName, metav1.GetOptions{}) - framework.ExpectNoError(err) - nodeIPv4, nodeIPv6 := getNodeAddresses(node) - nodeIP := nodeIPv4 - if ipFamily == utilnet.IPv6 { - nodeIP = nodeIPv6 - } - nodePortA := svcNodePortETPLocalNetA.Spec.Ports[0].NodePort - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePortA)) + "/hostname", "", false - }), - - ginkgo.Entry("[ETP=LOCAL] UDN pod to a different node nodeport service in same UDN network should work", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - clientPod := podsNetA[0] - node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), podsNetA[2].Spec.NodeName, metav1.GetOptions{}) - framework.ExpectNoError(err) - nodeIPv4, nodeIPv6 := getNodeAddresses(node) - nodeIP := nodeIPv4 - if ipFamily == utilnet.IPv6 { - nodeIP = nodeIPv6 - } - nodePortA := svcNodePortETPLocalNetA.Spec.Ports[0].NodePort - out := "" - errBool := false - // FIXME https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5531#issuecomment-3749407414 - // There is a new option on ovn 25.03 and further called "ct-commit-all" that can be set for each LR. - // This should avoid the mentioned issue. - if IsGatewayModeLocal(f.ClientSet) { - // FIXME: https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5846 - // its supposed to fail with 56 error code which is fine - // but due to this fwmark bug it ends up failing wtih 28 error code that's not expected. - out = curlConnectionTimeoutCode - errBool = true - if ipFamily == utilnet.IPv4 || (ipFamily == utilnet.IPv6 && !isIPv4Supported(f.ClientSet)) { - out = curlConnectionResetCode + srvPodStatus, err := getPodAnnotationForAttachment(srvPod, namespacedName(srvPod.Namespace, cudnBTemplate.Name)) + framework.ExpectNoError(err) + var ( + curlOutput string + curlErr bool + ) + if os.Getenv("ADVERTISED_UDN_ISOLATION_MODE") == "loose" { + clientPodStatus, err := getPodAnnotationForAttachment(clientPod, namespacedName(clientPod.Namespace, cudnATemplate.Name)) + framework.ExpectNoError(err) + + curlOutput = getFirstCIDROfFamily(ipFamily, clientPodStatus.IPs).IP.String() + curlErr = false + } else { + curlOutput = curlConnectionTimeoutCode + curlErr = true + } + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(getFirstCIDROfFamily(ipFamily, srvPodStatus.IPs).IP.String(), "8080") + "/clientip", + curlOutput, curlErr + }), + ginkgo.Entry("pod in the default network should not be able to access an advertised UDN pod on the same node", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + // podNetDefault and podNetB are on the same node + clientPod := podNetDefault + srvPod := podNetB + + srvPodStatus, err := getPodAnnotationForAttachment(srvPod, namespacedName(srvPod.Namespace, cudnBTemplate.Name)) + framework.ExpectNoError(err) + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(getFirstCIDROfFamily(ipFamily, srvPodStatus.IPs).IP.String(), "8080") + "/clientip", + curlConnectionTimeoutCode, true + }), + ginkgo.Entry("pod in the default network should not be able to access an advertised UDN pod on a different node", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + // podNetDefault and podsNetA[0] are on different nodes + clientPod := podNetDefault + srvPod := podsNetA[0] + + srvPodStatus, err := getPodAnnotationForAttachment(srvPod, namespacedName(srvPod.Namespace, cudnATemplate.Name)) + framework.ExpectNoError(err) + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(getFirstCIDROfFamily(ipFamily, srvPodStatus.IPs).IP.String(), "8080") + "/clientip", + curlConnectionTimeoutCode, true + }), + ginkgo.Entry("pod in the default network should not be able to access a UDN service", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + return podNetDefault.Name, podNetDefault.Namespace, net.JoinHostPort(getFirstIPStringOfFamily(ipFamily, svcNodePortNetA.Spec.ClusterIPs), "8080") + "/clientip", + curlConnectionTimeoutCode, true + }), + ginkgo.Entry("pod in the UDN should be able to access a service in the same network", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + return podsNetA[0].Name, podsNetA[0].Namespace, net.JoinHostPort(getFirstIPStringOfFamily(ipFamily, svcNodePortNetA.Spec.ClusterIPs), "8080") + "/clientip", "", false + }), + ginkgo.Entry("pod in the UDN should not be able to access a default network service", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + err := true + out := curlConnectionTimeoutCode + if cudnATemplate.Spec.Network.Topology == udnv1.NetworkTopologyLayer2 { + // FIXME: prevent looping of traffic in L2 UDNs + // bad behaviour: packet is looping from management port -> breth0 -> GR -> management port -> breth0 and so on + // which is a never ending loop + // this causes curl timeout with code 7 host unreachable instead of code 28 + out = "" + } + return podsNetA[0].Name, podsNetA[0].Namespace, net.JoinHostPort(getFirstIPStringOfFamily(ipFamily, svcNodePortNetDefault.Spec.ClusterIPs), "8080") + "/clientip", out, err + }), + ginkgo.Entry("pod in the UDN should be able to access kapi in default network service", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + return podsNetA[0].Name, podsNetA[0].Namespace, "https://kubernetes.default/healthz", "", false + }), + ginkgo.Entry("pod in the UDN should be able to access kapi service cluster IP directly", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + // Get kubernetes service from default namespace + kubernetesService, err := f.ClientSet.CoreV1().Services("default").Get(context.TODO(), "kubernetes", metav1.GetOptions{}) + framework.ExpectNoError(err, "should be able to get kubernetes service") + + // NOTE: See https://github.com/kubernetes/enhancements/tree/master/keps/sig-network/2438-dual-stack-apiserver + // Today the kubernetes.default service is single-stack and cannot be dual-stack. + if isDualStackCluster(nodes) && ipFamily == utilnet.IPv6 { + e2eskipper.Skipf("Dual stack kubernetes.default service is not supported in kubernetes") + } + // Get the cluster IP for the specified IP family + clusterIP := getFirstIPStringOfFamily(ipFamily, kubernetesService.Spec.ClusterIPs) + gomega.Expect(clusterIP).NotTo(gomega.BeEmpty(), fmt.Sprintf("no cluster IP available for IP family %v", ipFamily)) + + // Access the kubernetes API at the cluster IP directly on port 443 + return podsNetA[0].Name, podsNetA[0].Namespace, fmt.Sprintf("https://%s/healthz", net.JoinHostPort(clusterIP, "443")), "", false + }), + ginkgo.Entry("pod in the UDN should not be able to access a service in a different UDN", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + return podsNetA[0].Name, podsNetA[0].Namespace, net.JoinHostPort(getFirstIPStringOfFamily(ipFamily, svcNodePortNetB.Spec.ClusterIPs), "8080") + "/clientip", + curlConnectionTimeoutCode, true + }), + ginkgo.Entry("host to a local UDN pod should not work", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + clientNode := podsNetA[0].Spec.NodeName + srvPod := podsNetA[0] + + srvPodStatus, err := getPodAnnotationForAttachment(srvPod, namespacedName(srvPod.Namespace, cudnATemplate.Name)) + framework.ExpectNoError(err) + return clientNode, "", net.JoinHostPort(getFirstCIDROfFamily(ipFamily, srvPodStatus.IPs).IP.String(), "8080") + "/clientip", + curlConnectionTimeoutCode, true + }), + ginkgo.Entry("host to a different node UDN pod should not work", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + // podsNetA[0] and podsNetA[2] are on different nodes + clientNode := podsNetA[2].Spec.NodeName + srvPod := podsNetA[0] + + srvPodStatus, err := getPodAnnotationForAttachment(srvPod, namespacedName(srvPod.Namespace, cudnATemplate.Name)) + framework.ExpectNoError(err) + return clientNode, "", net.JoinHostPort(getFirstCIDROfFamily(ipFamily, srvPodStatus.IPs).IP.String(), "8080") + "/clientip", + curlConnectionTimeoutCode, true + }), + ginkgo.Entry("UDN pod to local node should not work", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + clientPod := podsNetA[0] + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), clientPod.Spec.NodeName, metav1.GetOptions{}) + framework.ExpectNoError(err) + nodeIPv4, nodeIPv6 := getNodeAddresses(node) + nodeIP := nodeIPv4 + if ipFamily == utilnet.IPv6 { + nodeIP = nodeIPv6 + } + // FIXME: add the host process socket to the VRF for this test to work. + // This scenario is something that is not supported yet. So the test will continue to fail. + // This works the same on both normal UDNs and advertised UDNs. + // So because the process is not bound to the VRF, packet reaches the host but kernel sends a RESET. So its not code 28 but code7. + // 10:59:55.351067 319594f193d4d_3 P ifindex 191 0a:58:5d:5d:01:05 ethertype IPv4 (0x0800), length 80: (tos 0x0, ttl 64, id 57264, + // offset 0, flags [DF], proto TCP (6), length 60) + // 93.93.1.5.36363 > 172.18.0.2.25022: Flags [S], cksum 0x0aa5 (incorrect -> 0xe0b7), seq 3879759281, win 65280, + // options [mss 1360,sackOK,TS val 3006752321 ecr 0,nop,wscale 7], length 0 + // 10:59:55.352404 ovn-k8s-mp87 In ifindex 186 0a:58:5d:5d:01:01 ethertype IPv4 (0x0800), length 80: (tos 0x0, ttl 63, id 57264, + // offset 0, flags [DF], proto TCP (6), length 60) + // 169.154.169.12.36363 > 172.18.0.2.25022: Flags [S], cksum 0xe0b7 (correct), seq 3879759281, win 65280, + // options [mss 1360,sackOK,TS val 3006752321 ecr 0,nop,wscale 7], length 0 + // 10:59:55.352461 ovn-k8s-mp87 Out ifindex 186 0a:58:5d:5d:01:02 ethertype IPv4 (0x0800), length 60: (tos 0x0, ttl 64, id 0, + // offset 0, flags [DF], proto TCP (6), length 40) + // 172.18.0.2.25022 > 169.154.169.12.36363: Flags [R.], cksum 0x609d (correct), seq 0, ack 3879759282, win 0, length 0 + // 10:59:55.352927 319594f193d4d_3 Out ifindex 191 0a:58:5d:5d:01:02 ethertype IPv4 (0x0800), length 60: (tos 0x0, ttl 64, id 0, + // offset 0, flags [DF], proto TCP (6), length 40) + // 172.18.0.2.25022 > 93.93.1.5.36363: Flags [R.], cksum 0x609d (correct), seq 0, ack 1, win 0, length 0 + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(hostNetworkPort)) + "/hostname", "", true + }), + ginkgo.Entry("UDN pod to a different node should work", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + clientPod := podsNetA[0] + // podsNetA[0] and podsNetA[2] are on different nodes so we can pick the node of podsNetA[2] as the different node destination + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), podsNetA[2].Spec.NodeName, metav1.GetOptions{}) + framework.ExpectNoError(err) + nodeIPv4, nodeIPv6 := getNodeAddresses(node) + nodeIP := nodeIPv4 + if ipFamily == utilnet.IPv6 { + nodeIP = nodeIPv6 } - } - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePortA)) + "/hostname", out, errBool - }), - ginkgo.Entry("[ETP=LOCAL] UDN pod to the same node nodeport service in different UDN network should not work", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - // FIXME: This test should work: https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5419 - clientPod := podNetB - node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), clientPod.Spec.NodeName, metav1.GetOptions{}) - framework.ExpectNoError(err) - nodeIPv4, nodeIPv6 := getNodeAddresses(node) - nodeIP := nodeIPv4 - if ipFamily == utilnet.IPv6 { - nodeIP = nodeIPv6 - } - nodePortA := svcNodePortETPLocalNetA.Spec.Ports[0].NodePort - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePortA)) + "/hostname", curlConnectionTimeoutCode, true - }), - ginkgo.Entry("[ETP=LOCAL] UDN pod to a different node nodeport service in different UDN network should work", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - clientPod := podNetB - node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), podsNetA[0].Spec.NodeName, metav1.GetOptions{}) - framework.ExpectNoError(err) - nodeIPv4, nodeIPv6 := getNodeAddresses(node) - nodeIP := nodeIPv4 - if ipFamily == utilnet.IPv6 { - nodeIP = nodeIPv6 - } - nodePortA := svcNodePortETPLocalNetA.Spec.Ports[0].NodePort - out := "" - errBool := false - - // FIXME https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5531#issuecomment-3749407414 - // There is a new option on ovn 25.03 and further called "ct-commit-all" that can be set for each LR. - // This should avoid the mentioned issue. - if IsGatewayModeLocal(f.ClientSet) { - // FIXME: https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5846 - // its supposed to fail with 56 error code which is fine - // but due to this fwmark bug it ends up failing wtih 28 error code that's not expected. - out = curlConnectionTimeoutCode - errBool = true - if ipFamily == utilnet.IPv4 || (ipFamily == utilnet.IPv6 && !isIPv4Supported(f.ClientSet)) { - out = curlConnectionResetCode + clientNode, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), clientPod.Spec.NodeName, metav1.GetOptions{}) + framework.ExpectNoError(err) + clientNodeIPv4, clientNodeIPv6 := getNodeAddresses(clientNode) + clientNodeIP := clientNodeIPv4 + if ipFamily == utilnet.IPv6 { + clientNodeIP = clientNodeIPv6 } - } - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePortA)) + "/hostname", out, errBool - }), - ginkgo.Entry("[ETP=LOCAL] UDN pod to the same node nodeport service in default network should not work", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - // FIXME: This test should work: https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5419 - clientPod := podNetB - node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), clientPod.Spec.NodeName, metav1.GetOptions{}) - framework.ExpectNoError(err) - nodeIPv4, nodeIPv6 := getNodeAddresses(node) - nodeIP := nodeIPv4 - if ipFamily == utilnet.IPv6 { - nodeIP = nodeIPv6 - } - nodePortB := svcNodePortETPLocalDefault.Spec.Ports[0].NodePort - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePortB)) + "/hostname", curlConnectionTimeoutCode, true - }), - ginkgo.Entry("[ETP=LOCAL] UDN pod to a different node nodeport service in default network should work", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - // podsNetA[0] is on nodes[0]. We need a different node. podNetDefault is on nodes[1]. - // So we hit nodeport on nodes[1]. - clientPod := podsNetA[0] - node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), podNetDefault.Spec.NodeName, metav1.GetOptions{}) - framework.ExpectNoError(err) - nodeIPv4, nodeIPv6 := getNodeAddresses(node) - nodeIP := nodeIPv4 - if ipFamily == utilnet.IPv6 { - nodeIP = nodeIPv6 - } - nodePortB := svcNodePortETPLocalDefault.Spec.Ports[0].NodePort - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePortB)) + "/hostname", "", false - }), - ginkgo.Entry("[ETP=LOCAL] Default network pod to same node nodeport service in UDN network should not work", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + // pod -> node traffic should use the node's IP as the source for advertised UDNs. + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(hostNetworkPort)) + "/clientip", clientNodeIP, false + }), + ginkgo.Entry("[ETP=Cluster] UDN pod to the same node nodeport service in default network should not work", + // FIXME: https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5410 + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + clientPod := podsNetA[0] + // podsNetA[0] is on nodes[0]. We need the same node. Let's hit the nodeport on nodes[0]. + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), nodes.Items[0].Name, metav1.GetOptions{}) + framework.ExpectNoError(err) + nodeIPv4, nodeIPv6 := getNodeAddresses(node) + nodeIP := nodeIPv4 + if ipFamily == utilnet.IPv6 { + nodeIP = nodeIPv6 + } + nodePort := svcNodePortNetDefault.Spec.Ports[0].NodePort + + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePort)) + "/hostname", curlConnectionTimeoutCode, true + }), + ginkgo.Entry("[ETP=Cluster] UDN pod to a different node nodeport service in default network should work", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + clientPod := podsNetA[0] + // podsNetA[0] is on nodes[0]. We need a different node. podNetDefault is on nodes[1]. + // The service is backed by podNetDefault. Let's hit the nodeport on nodes[2]. + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), nodes.Items[2].Name, metav1.GetOptions{}) + framework.ExpectNoError(err) + nodeIPv4, nodeIPv6 := getNodeAddresses(node) + nodeIP := nodeIPv4 + if ipFamily == utilnet.IPv6 { + nodeIP = nodeIPv6 + } + nodePort := svcNodePortNetDefault.Spec.Ports[0].NodePort + + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePort)) + "/hostname", "", false + }), + ginkgo.Entry("[ETP=Cluster] UDN pod to the same node nodeport service in same UDN network should work", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + clientPod := podsNetA[0] + // The service is backed by pods in podsNetA. + // We want to hit the nodeport on the same node. + // client is on nodes[0]. Let's hit nodeport on nodes[0]. + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), nodes.Items[0].Name, metav1.GetOptions{}) + framework.ExpectNoError(err) + nodeIPv4, nodeIPv6 := getNodeAddresses(node) + nodeIP := nodeIPv4 + if ipFamily == utilnet.IPv6 { + nodeIP = nodeIPv6 + } + nodePort := svcNodePortNetA.Spec.Ports[0].NodePort + + // The service can be backed by any of the pods in podsNetA, so we can't reliably check the output hostname. + // Just check that the connection is successful. + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePort)) + "/hostname", "", false + }), + ginkgo.Entry("[ETP=Cluster] UDN pod to a different node nodeport service in same UDN network should work", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + clientPod := podsNetA[0] + // The service is backed by pods in podsNetA. + // We want to hit the nodeport on a different node. + // client is on nodes[0]. Let's hit nodeport on nodes[2]. + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), nodes.Items[2].Name, metav1.GetOptions{}) + framework.ExpectNoError(err) + nodeIPv4, nodeIPv6 := getNodeAddresses(node) + nodeIP := nodeIPv4 + if ipFamily == utilnet.IPv6 { + nodeIP = nodeIPv6 + } + nodePort := svcNodePortNetA.Spec.Ports[0].NodePort + + // sourceIP will be joinSubnetIP for nodeports, so only using hostname endpoint + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePort)) + "/hostname", "", false + }), + ginkgo.Entry("[ETP=Cluster] UDN pod to the same node nodeport service in different UDN network should not work", // FIXME: This test should work: https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5419 - clientPod := podNetDefault - node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), clientPod.Spec.NodeName, metav1.GetOptions{}) - framework.ExpectNoError(err) - nodeIPv4, nodeIPv6 := getNodeAddresses(node) - nodeIP := nodeIPv4 - if ipFamily == utilnet.IPv6 { - nodeIP = nodeIPv6 - } - nodePortA := svcNodePortETPLocalNetA.Spec.Ports[0].NodePort - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePortA)) + "/hostname", curlConnectionTimeoutCode, true - }), - ginkgo.Entry("[ETP=LOCAL] Default network pod to different node nodeport service in UDN network should work", - func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { - // podNetDefault is on nodes[1]. We need a different node. podsNetA[0] is on nodes[0]. - // So we hit nodeport on nodes[0]. - clientPod := podNetDefault - node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), podsNetA[0].Spec.NodeName, metav1.GetOptions{}) - framework.ExpectNoError(err) - nodeIPv4, nodeIPv6 := getNodeAddresses(node) - nodeIP := nodeIPv4 - if ipFamily == utilnet.IPv6 { - nodeIP = nodeIPv6 - } - nodePortA := svcNodePortETPLocalNetA.Spec.Ports[0].NodePort - out := "" - errBool := false - - // FIXME https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5531#issuecomment-3749407414 - // There is a new option on ovn 25.03 and further called "ct-commit-all" that can be set for each LR. - // This should avoid the mentioned issue. - if IsGatewayModeLocal(f.ClientSet) { - // FIXME: https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5846 - // its supposed to fail with 56 error code which is fine - // but due to this fwmark bug it ends up failing wtih 28 error code that's not expected. - out = curlConnectionTimeoutCode - errBool = true - if ipFamily == utilnet.IPv4 || (ipFamily == utilnet.IPv6 && !isIPv4Supported(f.ClientSet)) { - out = curlConnectionResetCode + // This traffic flow is expected to work eventually but doesn't work today on Layer3 (v4 and v6) and Layer2 (v4 and v6) networks. + // Reason it doesn't work today is because UDN networks don't have MAC bindings for masqueradeIPs of other networks. + // Traffic flow: UDN pod in network A -> samenode nodeIP:nodePort service of networkB + // UDN pod in networkA -> ovn-switch -> ovn-cluster-router (SNAT to masqueradeIP of networkA) -> mpX interface -> + // enters the host and hits IPTables rules to DNAT to clusterIP:Port of service of networkB. + // Then it hits the pkt_mark flows on breth0 and get's sent into networkB's patchport where it hits the GR. + // On the GR we DNAT to backend pod and SNAT to joinIP. + // Reply: Pod replies and now OVN in networkB tries to ARP for the masqueradeIP of networkA which is the source and simply + // fails as it doesn't know how to reach this masqueradeIP. + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + clientPod := podsNetA[0] + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), nodes.Items[0].Name, metav1.GetOptions{}) + framework.ExpectNoError(err) + nodeIPv4, nodeIPv6 := getNodeAddresses(node) + nodeIP := nodeIPv4 + if ipFamily == utilnet.IPv6 { + nodeIP = nodeIPv6 } - } - return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePortA)) + "/hostname", out, errBool - }), - ) + nodePort := svcNodePortNetB.Spec.Ports[0].NodePort + // sourceIP will be joinSubnetIP for nodeports, so only using hostname endpoint + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePort)) + "/hostname", curlConnectionTimeoutCode, true + }), + ginkgo.Entry("[ETP=Cluster] UDN pod to a different node nodeport service in different UDN network should work", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + clientPod := podsNetA[0] + // The service is backed by podNetB. + // We want to hit the nodeport on a different node from the client. + // client is on nodes[0]. Let's hit nodeport on nodes[2]. + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), nodes.Items[2].Name, metav1.GetOptions{}) + framework.ExpectNoError(err) + nodeIPv4, nodeIPv6 := getNodeAddresses(node) + nodeIP := nodeIPv4 + if ipFamily == utilnet.IPv6 { + nodeIP = nodeIPv6 + } + nodePort := svcNodePortNetB.Spec.Ports[0].NodePort + + // sourceIP will be joinSubnetIP for nodeports, so only using hostname endpoint + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePort)) + "/hostname", "", false + }), + ginkgo.Entry("[ETP=LOCAL] UDN pod to the same node nodeport service in same UDN network should work", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + clientPod := podsNetA[0] + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), clientPod.Spec.NodeName, metav1.GetOptions{}) + framework.ExpectNoError(err) + nodeIPv4, nodeIPv6 := getNodeAddresses(node) + nodeIP := nodeIPv4 + if ipFamily == utilnet.IPv6 { + nodeIP = nodeIPv6 + } + nodePortA := svcNodePortETPLocalNetA.Spec.Ports[0].NodePort + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePortA)) + "/hostname", "", false + }), + + ginkgo.Entry("[ETP=LOCAL] UDN pod to a different node nodeport service in same UDN network should work", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + clientPod := podsNetA[0] + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), podsNetA[2].Spec.NodeName, metav1.GetOptions{}) + framework.ExpectNoError(err) + nodeIPv4, nodeIPv6 := getNodeAddresses(node) + nodeIP := nodeIPv4 + if ipFamily == utilnet.IPv6 { + nodeIP = nodeIPv6 + } + nodePortA := svcNodePortETPLocalNetA.Spec.Ports[0].NodePort + out := "" + errBool := false + // FIXME https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5531#issuecomment-3749407414 + // There is a new option on ovn 25.03 and further called "ct-commit-all" that can be set for each LR. + // This should avoid the mentioned issue. + if IsGatewayModeLocal(f.ClientSet) { + // FIXME: https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5846 + // its supposed to fail with 56 error code which is fine + // but due to this fwmark bug it ends up failing wtih 28 error code that's not expected. + out = curlConnectionTimeoutCode + errBool = true + if ipFamily == utilnet.IPv4 || (ipFamily == utilnet.IPv6 && !isIPv4Supported(f.ClientSet)) { + out = curlConnectionResetCode + } + } + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePortA)) + "/hostname", out, errBool + }), + + ginkgo.Entry("[ETP=LOCAL] UDN pod to the same node nodeport service in different UDN network should not work", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + // FIXME: This test should work: https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5419 + clientPod := podNetB + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), clientPod.Spec.NodeName, metav1.GetOptions{}) + framework.ExpectNoError(err) + nodeIPv4, nodeIPv6 := getNodeAddresses(node) + nodeIP := nodeIPv4 + if ipFamily == utilnet.IPv6 { + nodeIP = nodeIPv6 + } + nodePortA := svcNodePortETPLocalNetA.Spec.Ports[0].NodePort + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePortA)) + "/hostname", curlConnectionTimeoutCode, true + }), + ginkgo.Entry("[ETP=LOCAL] UDN pod to a different node nodeport service in different UDN network should work", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + clientPod := podNetB + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), podsNetA[0].Spec.NodeName, metav1.GetOptions{}) + framework.ExpectNoError(err) + nodeIPv4, nodeIPv6 := getNodeAddresses(node) + nodeIP := nodeIPv4 + if ipFamily == utilnet.IPv6 { + nodeIP = nodeIPv6 + } + nodePortA := svcNodePortETPLocalNetA.Spec.Ports[0].NodePort + out := "" + errBool := false + + // FIXME https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5531#issuecomment-3749407414 + // There is a new option on ovn 25.03 and further called "ct-commit-all" that can be set for each LR. + // This should avoid the mentioned issue. + if IsGatewayModeLocal(f.ClientSet) { + // FIXME: https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5846 + // its supposed to fail with 56 error code which is fine + // but due to this fwmark bug it ends up failing wtih 28 error code that's not expected. + out = curlConnectionTimeoutCode + errBool = true + if ipFamily == utilnet.IPv4 || (ipFamily == utilnet.IPv6 && !isIPv4Supported(f.ClientSet)) { + out = curlConnectionResetCode + } + } + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePortA)) + "/hostname", out, errBool + }), + ginkgo.Entry("[ETP=LOCAL] UDN pod to the same node nodeport service in default network should not work", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + // FIXME: This test should work: https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5419 + clientPod := podNetB + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), clientPod.Spec.NodeName, metav1.GetOptions{}) + framework.ExpectNoError(err) + nodeIPv4, nodeIPv6 := getNodeAddresses(node) + nodeIP := nodeIPv4 + if ipFamily == utilnet.IPv6 { + nodeIP = nodeIPv6 + } + nodePortB := svcNodePortETPLocalDefault.Spec.Ports[0].NodePort + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePortB)) + "/hostname", curlConnectionTimeoutCode, true + }), + ginkgo.Entry("[ETP=LOCAL] UDN pod to a different node nodeport service in default network should work", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + // podsNetA[0] is on nodes[0]. We need a different node. podNetDefault is on nodes[1]. + // So we hit nodeport on nodes[1]. + clientPod := podsNetA[0] + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), podNetDefault.Spec.NodeName, metav1.GetOptions{}) + framework.ExpectNoError(err) + nodeIPv4, nodeIPv6 := getNodeAddresses(node) + nodeIP := nodeIPv4 + if ipFamily == utilnet.IPv6 { + nodeIP = nodeIPv6 + } + nodePortB := svcNodePortETPLocalDefault.Spec.Ports[0].NodePort + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePortB)) + "/hostname", "", false + }), + ginkgo.Entry("[ETP=LOCAL] Default network pod to same node nodeport service in UDN network should not work", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + // FIXME: This test should work: https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5419 + clientPod := podNetDefault + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), clientPod.Spec.NodeName, metav1.GetOptions{}) + framework.ExpectNoError(err) + nodeIPv4, nodeIPv6 := getNodeAddresses(node) + nodeIP := nodeIPv4 + if ipFamily == utilnet.IPv6 { + nodeIP = nodeIPv6 + } + nodePortA := svcNodePortETPLocalNetA.Spec.Ports[0].NodePort + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePortA)) + "/hostname", curlConnectionTimeoutCode, true + }), + ginkgo.Entry("[ETP=LOCAL] Default network pod to different node nodeport service in UDN network should work", + func(ipFamily utilnet.IPFamily) (clientName string, clientNamespace string, dst string, expectedOutput string, expectErr bool) { + // podNetDefault is on nodes[1]. We need a different node. podsNetA[0] is on nodes[0]. + // So we hit nodeport on nodes[0]. + clientPod := podNetDefault + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), podsNetA[0].Spec.NodeName, metav1.GetOptions{}) + framework.ExpectNoError(err) + nodeIPv4, nodeIPv6 := getNodeAddresses(node) + nodeIP := nodeIPv4 + if ipFamily == utilnet.IPv6 { + nodeIP = nodeIPv6 + } + nodePortA := svcNodePortETPLocalNetA.Spec.Ports[0].NodePort + out := "" + errBool := false + + // FIXME https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5531#issuecomment-3749407414 + // There is a new option on ovn 25.03 and further called "ct-commit-all" that can be set for each LR. + // This should avoid the mentioned issue. + if IsGatewayModeLocal(f.ClientSet) { + // FIXME: https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5846 + // its supposed to fail with 56 error code which is fine + // but due to this fwmark bug it ends up failing wtih 28 error code that's not expected. + out = curlConnectionTimeoutCode + errBool = true + if ipFamily == utilnet.IPv4 || (ipFamily == utilnet.IPv6 && !isIPv4Supported(f.ClientSet)) { + out = curlConnectionResetCode + } + } + return clientPod.Name, clientPod.Namespace, net.JoinHostPort(nodeIP, fmt.Sprint(nodePortA)) + "/hostname", out, errBool + }), + ) + }) }, ginkgo.Entry("Layer3", @@ -1879,6 +1925,19 @@ var _ = ginkgo.Describe("BGP: For a VRF-Lite configured network", feature.RouteA // isolation doesn't cut it. macvlan driver might be a better option. bgpServerSubnetIPv4 = "172.38.0.0/16" bgpServerSubnetIPv6 = "fc00:f853:ccd:38::/64" + // Additional subnets used in nested "When there is other network" tests + otherBGPPeerSubnetIPv4 = "172.136.0.0/16" + otherBGPPeerSubnetIPv6 = "fc00:f853:ccd:136::/64" + otherBGPServerSubnetIPv4 = "172.138.0.0/16" + otherBGPServerSubnetIPv6 = "fc00:f853:ccd:138::/64" + ) + + // staleSubnets lists all subnets that may be left behind if a test times out during cleanup. + staleSubnets := sets.New( + bgpPeerSubnetIPv4, bgpPeerSubnetIPv6, + bgpServerSubnetIPv4, bgpServerSubnetIPv6, + otherBGPPeerSubnetIPv4, otherBGPPeerSubnetIPv6, + otherBGPServerSubnetIPv4, otherBGPServerSubnetIPv6, ) f := wrappedTestFramework(baseName) @@ -1898,6 +1957,21 @@ var _ = ginkgo.Describe("BGP: For a VRF-Lite configured network", feature.RouteA testNetworkName = testBaseName bgpServerName = testNetworkName + "-bgpserver" + // Clean up any stale networks from previous test attempts that may have failed during cleanup. + networkNames, err := infraprovider.Get().ListNetworks() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + for _, name := range networkNames { + network, err := infraprovider.Get().GetNetwork(name) + if err != nil { + continue + } + v4, v6, _ := network.IPv4IPv6Subnets() + if staleSubnets.Has(v4) || staleSubnets.Has(v6) { + framework.Logf("Cleaning up stale network %q with subnets %s/%s", name, v4, v6) + gomega.Expect(ictx.DeleteNetwork(network)).To(gomega.Succeed()) + } + } + // we will create a agnhost server on an extra network peered with BGP ginkgo.By("Running a BGP network with an agnhost server") bgpPeerCIDRs := []string{bgpPeerSubnetIPv4, bgpPeerSubnetIPv6} @@ -2233,12 +2307,8 @@ var _ = ginkgo.Describe("BGP: For a VRF-Lite configured network", feature.RouteA ginkgo.Describe("When there is other network", func() { const ( - otherBGPPeerSubnetIPv4 = "172.136.0.0/16" - otherBGPPeerSubnetIPv6 = "fc00:f853:ccd:136::/64" - otherBGPServerSubnetIPv4 = "172.138.0.0/16" - otherBGPServerSubnetIPv6 = "fc00:f853:ccd:138::/64" - otherUDNCIDRv4 = "103.203.0.0/16" - otherUDNCIDRv6 = "2014:200:200::0/60" + otherUDNCIDRv4 = "103.203.0.0/16" + otherUDNCIDRv6 = "2014:200:200::0/60" ) var ( diff --git a/test/e2e/util.go b/test/e2e/util.go index 5a9715d4a8..df01c07b24 100644 --- a/test/e2e/util.go +++ b/test/e2e/util.go @@ -1440,6 +1440,12 @@ func isNetworkSegmentationEnabled() bool { return present && val == "true" } +func isICMPNetworkPolicyBypassEnabled() bool { + ovnKubeNamespace := deploymentconfig.Get().OVNKubernetesNamespace() + val := getTemplateContainerEnv(ovnKubeNamespace, "daemonset/ovnkube-node", getNodeContainerName(), "OVN_ALLOW_ICMP_NETPOL") + return val == "true" +} + func isLocalGWModeEnabled() bool { val, present := os.LookupEnv("OVN_GATEWAY_MODE") return present && val == "local"