From 5717018f227bc2374d9739605fb7c0f491293e10 Mon Sep 17 00:00:00 2001 From: Mathieu CARBONNEAUX Date: Wed, 31 Dec 2025 19:36:24 +0100 Subject: [PATCH 1/2] add support for externalIPs and loadBalancerIP in services. --- README.md | 320 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ entry | 28 ++++- 2 files changed, 346 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 5179fc1..08eb50e 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,326 @@ iptables to forward the request to the cluster IP. The regular k8s scheduler wil find a free host port. If there are no free host ports, the service load balancer will stay in pending. +## Configuration + +The load balancer behavior can be configured using environment variables: + +- `SRC_IPS`: Comma-separated list of external IPs or loadBalancerIP to listen on. If set, the load balancer will only accept traffic destined for these specific IPs. If not set, the load balancer will listen on all interfaces (default behavior). +- `SRC_RANGES`: Comma-separated list of source IP ranges to accept traffic from (implements `.spec.loadBalancerSourceRanges`). +- `DEST_IPS`: Comma-separated list of destination cluster IPs to forward traffic to. +- `DEST_PROTO`: Protocol (tcp/udp). +- `DEST_PORT`: Destination port on the cluster IP. +- `SRC_PORT`: Source port to listen on. + +## Repository Structure + +### This Repository (klipper-lb) + +This repository contains **only the load balancer runtime image** - the container that runs in DaemonSet pods on each node. + +**What's here:** +- `entry`: Shell script that configures iptables rules based on environment variables +- `Dockerfile`: Builds the minimal Alpine-based container image +- Runtime logic for DNAT/SNAT iptables configuration + +**What it does:** +- Reads environment variables (`SRC_IPS`, `DEST_IPS`, `SRC_PORT`, etc.) +- Configures iptables PREROUTING rules to forward traffic +- Runs as a privileged pod with `NET_ADMIN` capability +- One instance per node, per service (DaemonSet model) + +### K3s Repository (Controller) + +The **ServiceLB controller** lives in the K3s repository at [`pkg/cloudprovider/`](https://github.com/k3s-io/k3s/tree/master/pkg/cloudprovider). + +**What's there:** +- `servicelb.go`: Controller that watches Services and manages DaemonSets +- `loadbalancer.go`: Cloud provider interface implementation +- Logic for creating/updating/deleting DaemonSets per LoadBalancer service +- Status management (populating `status.loadBalancer.ingress`) + +**What it does:** +- Watches for LoadBalancer services +- Creates one DaemonSet per service +- Populates DaemonSet environment variables from Service spec +- Updates Service status with node IPs + +### How They Work Together + +``` +┌─────────────────────────────────────────────────────────────┐ +│ K3s Repository (github.com/k3s-io/k3s) │ +│ │ +│ pkg/cloudprovider/servicelb.go │ +│ ┌────────────────────────────────────────┐ │ +│ │ ServiceLB Controller │ │ +│ │ - Watches LoadBalancer Services │ │ +│ │ - Creates DaemonSet per service │ │ +│ │ - Sets ENV vars (SRC_IPS, DEST_IPS...) │ │ +│ └────────────────┬───────────────────────┘ │ +│ │ │ +└───────────────────┼─────────────────────────────────────────┘ + │ + │ Creates DaemonSet with + │ image: rancher/klipper-lb:vX.Y.Z + │ + ▼ +┌─────────────────────────────────────────────────────────────┐ +│ This Repository (github.com/k3s-io/klipper-lb) │ +│ │ +│ entry script │ +│ ┌────────────────────────────────────────┐ │ +│ │ Load Balancer Runtime │ │ +│ │ - Reads ENV vars from DaemonSet │ │ +│ │ - Configures iptables PREROUTING │ │ +│ │ - Sets up DNAT rules │ │ +│ └────────────────────────────────────────┘ │ +│ │ +│ Runs as: DaemonSet pod on each node │ +└─────────────────────────────────────────────────────────────┘ +``` + +### Development Workflow + +**To modify load balancer behavior (iptables rules):** +- Edit files in **this repository** (klipper-lb) +- Build new Docker image +- Update K3s to use new image version + +**To modify controller behavior (DaemonSet creation, ENV vars):** +- Edit files in **K3s repository** (`pkg/cloudprovider/`) +- Rebuild K3s binary +- May require new klipper-lb image if new ENV vars are added + +### Example: Adding SRC_IPS Support + +This demonstrates the two-repository workflow: + +**Step 1: klipper-lb (this repo)** +- Modify `entry` script to read `SRC_IPS` environment variable +- Add `-d ` flag to iptables rules when `SRC_IPS` is set +- Build and publish new image: `rancher/klipper-lb:v0.5.0` + +**Step 2: K3s repository** +- Modify `pkg/cloudprovider/servicelb.go` +- Update `newDaemonSet()` to set `SRC_IPS` env var from `service.spec.externalIPs` +- Update default image to `rancher/klipper-lb:v0.5.0` +- Rebuild K3s + +Both changes are needed for the feature to work end-to-end. + +## Architecture + +### Overview + +K3s ServiceLB uses a **declarative** architecture based on Kubernetes. There is **no direct communication** between the controller and klipper-lb pods. Everything goes through the Kubernetes API. + +### Communication Flow + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ Kubernetes API Server │ +└────────────┬────────────────────────────────┬───────────────────┘ + │ │ + │ Watch Events │ Watch Events + ▼ ▼ +┌────────────────────────┐ ┌──────────────────────────┐ +│ ServiceLB Controller │ │ Kubelet on each Node │ +│ (pkg/cloudprovider) │ │ │ +└────────────┬───────────┘ └──────────┬───────────────┘ + │ │ + │ Create/Update DaemonSet │ Deploy Pods + ▼ ▼ +┌────────────────────────┐ ┌──────────────────────────┐ +│ DaemonSet │────────>│ klipper-lb Pods │ +│ svclb-myservice-xyz │ │ (with ENV variables) │ +└────────────────────────┘ └──────────────────────────┘ + │ + │ Configure iptables + ▼ + ┌──────────────────────────┐ + │ iptables PREROUTING │ + │ DNAT rules │ + └──────────────────────────┘ +``` + +### DaemonSet Architecture + +#### One DaemonSet per Service + +klipper-lb creates **one DaemonSet per LoadBalancer service**, which means **multiple pods per service** (one pod per eligible node). + +Each service gets a unique DaemonSet named `svclb--`. + +**Example with 2 services on 3 nodes:** + +``` +Service "web" Service "api" +┌─────────────────────┐ ┌─────────────────────┐ +│ DaemonSet: │ │ DaemonSet: │ +│ svclb-web-abc12345 │ │ svclb-api-def67890 │ +└─────────────────────┘ └─────────────────────┘ + │ │ + ┌────┴────┬────────┐ ┌───────┴────┬────────┐ + │ │ │ │ │ │ +┌───▼──┐ ┌──▼───┐ ┌──▼───┐ ┌──▼───┐ ┌──▼───┐ ┌──▼───┐ +│Node1 │ │Node2 │ │Node3 │ │Node1 │ │Node2 │ │Node3 │ +│pod-1 │ │pod-2 │ │pod-3 │ │pod-1 │ │pod-2 │ │pod-3 │ +└──────┘ └──────┘ └──────┘ └──────┘ └──────┘ └──────┘ +``` + +#### Destination IP Mapping + +The **DEST_IPS** (destination IPs) depend on `externalTrafficPolicy`: + +**Mode 1: externalTrafficPolicy: Cluster (default)** + +Traffic is forwarded to the service **ClusterIP**: + +```yaml +# Service web with ClusterIP 10.43.0.50 +Service web: + ClusterIP: 10.43.0.50 + externalIPs: [192.168.1.100] + Port: 80 + +# All klipper-lb pods (Node1, Node2, Node3) have: +DEST_IPS=10.43.0.50 +DEST_PORT=80 + +# iptables rule on each node: +iptables -t nat -A PREROUTING -d 192.168.1.100 -p TCP --dport 80 \ + -j DNAT --to-destination 10.43.0.50:80 +``` + +Traffic can arrive on **any node** and will be forwarded to the ClusterIP. Kubernetes (kube-proxy) then load-balances to the backend pods. + +**Mode 2: externalTrafficPolicy: Local** + +Traffic is forwarded to the **local node IPs**: + +```yaml +# Service web with externalTrafficPolicy: Local +Service web: + ClusterIP: 10.43.0.50 + externalIPs: [192.168.1.100] + Port: 80 + NodePort: 30080 + externalTrafficPolicy: Local + +# On Node1 (IP: 10.0.0.1): +DEST_IPS=10.0.0.1 +DEST_PORT=30080 + +# iptables rule on Node1: +iptables -t nat -A PREROUTING -d 192.168.1.100 -p TCP --dport 80 \ + -j DNAT --to-destination 10.0.0.1:30080 +``` + +Traffic is forwarded to the **NodePort of the local node**, and Kubernetes routes only to pods **local to that node** (no inter-node hop). + +#### Complete Mapping Flow + +``` +┌─────────────────────────────────────────────────────────────┐ +│ Service "web" │ +│ - ClusterIP: 10.43.0.50:80 │ +│ - externalIPs: [192.168.1.100] │ +│ - externalTrafficPolicy: Cluster (default) │ +└─────────────────────────────────────────────────────────────┘ + │ + ┌─────────────┴──────────────┐ + │ DaemonSet: svclb-web-xyz │ + └─────────────┬──────────────┘ + │ + ┌───────────────────┼───────────────────┐ + │ │ │ + ┌───▼─────┐ ┌────▼────┐ ┌────▼────┐ + │ Node1 │ │ Node2 │ │ Node3 │ + │ pod-web │ │ pod-web │ │ pod-web │ + └─────────┘ └─────────┘ └─────────┘ + │ │ │ + iptables iptables iptables + │ │ │ + PREROUTING PREROUTING PREROUTING + -d 192.168.1.100 -d 192.168.1.100 -d 192.168.1.100 + -p TCP -p TCP -p TCP + --dport 80 --dport 80 --dport 80 + -j DNAT -j DNAT -j DNAT + --to 10.43.0.50:80 --to 10.43.0.50:80 --to 10.43.0.50:80 + │ │ │ + └───────────────────┼───────────────────┘ + │ + ┌───────▼────────┐ + │ ClusterIP │ + │ 10.43.0.50:80 │ + └────────────────┘ + │ + kube-proxy IPTABLES + │ + ┌───────────────────┼───────────────────┐ + │ │ │ + ┌───▼────┐ ┌────▼────┐ ┌────▼────┐ + │ Pod 1 │ │ Pod 2 │ │ Pod 3 │ + │ web │ │ web │ │ web │ + └────────┘ └─────────┘ └─────────┘ +``` + +#### Key Points + +1. **One DaemonSet per service** → Each LoadBalancer service has its own DaemonSet +2. **One pod per node** → The DaemonSet deploys one klipper-lb pod on each eligible node +3. **Destination IP mapping**: + - **Cluster mode**: `DEST_IPS = ClusterIP` → Traffic load-balanced across all pods + - **Local mode**: `DEST_IPS = Node IP` + `DEST_PORT = NodePort` → Traffic to local pods only + +This architecture provides high availability: if one node fails, the others continue to serve traffic. + +### How It Works + +#### Change Detection + +The ServiceLB controller watches several Kubernetes resources: + +1. **Services** (via cloud provider interface): + - Creation of a Service with type `LoadBalancer` + - Modification of an existing Service + - Deletion of a Service + +2. **Pods**: When a klipper-lb pod starts and gets an IP (used to update the Service status) + +3. **Nodes**: When node labels change (triggers update of DaemonSet NodeSelectors) + +4. **EndpointSlices**: For `ExternalTrafficPolicy: Local` (allows including only nodes with ready pods) + +#### Pod Lifecycle + +When a DaemonSet is created/modified: + +1. **Kubelet** on each eligible node detects the change +2. **If environment variables have changed** → pods are restarted +3. Each klipper-lb pod starts with the new ENV variables +4. The `entry` script executes, reads environment variables, and configures iptables rules + +#### Communication Model + +Everything goes through the Kubernetes API: +``` +Service change → Controller detects → Update DaemonSet → +Kubernetes restarts pods → New pods read ENV → Configure iptables +``` + +The controller **cannot**: +- Send commands directly to pods +- Modify iptables rules on the fly +- Communicate via network with pods + +This architecture is **robust** because: +- Survives controller restarts +- Desired state stored in the Kubernetes API +- Kubernetes guarantees convergence to desired state +- No network dependency between controller and pods + ## Building `make` diff --git a/entry b/entry index 7d8b35e..e70116d 100755 --- a/entry +++ b/entry @@ -68,13 +68,37 @@ start_proxy() { if echo ${dest_ip} | grep -Eq ":"; then if [ $(cat /proc/sys/net/ipv6/conf/all/forwarding) == 1 ]; then ip6tables -t filter -A FORWARD -d ${dest_ip}/128 -p ${DEST_PROTO} --dport ${DEST_PORT} -j DROP - ip6tables -t nat -I PREROUTING -p ${DEST_PROTO} --dport ${SRC_PORT} -j DNAT --to [${dest_ip}]:${DEST_PORT} + + # If SRC_IPS is set, only listen on specified IPs + if [ -n "${SRC_IPS}" ]; then + for src_ip in ${SRC_IPS//,/ }; do + if echo ${src_ip} | grep -Eq ":"; then + ip6tables -t nat -I PREROUTING -d ${src_ip} -p ${DEST_PROTO} --dport ${SRC_PORT} -j DNAT --to [${dest_ip}]:${DEST_PORT} + fi + done + else + # Default behavior: listen on all interfaces + ip6tables -t nat -I PREROUTING -p ${DEST_PROTO} --dport ${SRC_PORT} -j DNAT --to [${dest_ip}]:${DEST_PORT} + fi + ip6tables -t nat -I POSTROUTING -d ${dest_ip}/128 -p ${DEST_PROTO} -j MASQUERADE fi else if [ $(cat /proc/sys/net/ipv4/ip_forward) == 1 ]; then iptables -t filter -A FORWARD -d ${dest_ip}/32 -p ${DEST_PROTO} --dport ${DEST_PORT} -j DROP - iptables -t nat -I PREROUTING -p ${DEST_PROTO} --dport ${SRC_PORT} -j DNAT --to ${dest_ip}:${DEST_PORT} + + # If SRC_IPS is set, only listen on specified IPs + if [ -n "${SRC_IPS}" ]; then + for src_ip in ${SRC_IPS//,/ }; do + if ! echo ${src_ip} | grep -Eq ":"; then + iptables -t nat -I PREROUTING -d ${src_ip} -p ${DEST_PROTO} --dport ${SRC_PORT} -j DNAT --to ${dest_ip}:${DEST_PORT} + fi + done + else + # Default behavior: listen on all interfaces + iptables -t nat -I PREROUTING -p ${DEST_PROTO} --dport ${SRC_PORT} -j DNAT --to ${dest_ip}:${DEST_PORT} + fi + iptables -t nat -I POSTROUTING -d ${dest_ip}/32 -p ${DEST_PROTO} -j MASQUERADE fi fi From 84567575c8542daa3078ac98f0d7e0e5e8419133 Mon Sep 17 00:00:00 2001 From: Mathieu CARBONNEAUX Date: Wed, 31 Dec 2025 19:59:01 +0100 Subject: [PATCH 2/2] add support for externalIPs and loadBalancerIP in services. --- IMPLEMENTATION_GUIDE.md | 194 ++++++++++++++++++++ README.md | 359 +++++++++++++++++++++++++++++++++---- k3s-servicelb-srcips.patch | 51 ++++++ 3 files changed, 568 insertions(+), 36 deletions(-) create mode 100644 IMPLEMENTATION_GUIDE.md create mode 100644 k3s-servicelb-srcips.patch diff --git a/IMPLEMENTATION_GUIDE.md b/IMPLEMENTATION_GUIDE.md new file mode 100644 index 0000000..2258bb2 --- /dev/null +++ b/IMPLEMENTATION_GUIDE.md @@ -0,0 +1,194 @@ +# Implementation Guide for SRC_IPS Support in klipper-lb + +This guide explains how to implement full support for `loadBalancerIP` and `externalIPs` in klipper-lb and K3s. + +## Overview + +Currently, klipper-lb listens on **all interfaces** even when `externalIPs` are specified. This implementation allows: + +1. Listening on all interfaces by default (current behavior) +2. Listening only on specified IPs when `loadBalancerIP`, `externalIPs`, or `status.loadBalancer.ingress` are configured + +## Required Modifications + +### 1. klipper-lb (this repository) ✅ DONE + +**File**: `entry` + +**Changes**: Modified the `start_proxy()` function to: +- Check if the `SRC_IPS` environment variable is set +- If set: create PREROUTING rules with the `-d` flag for each IP +- If not set: default behavior (no `-d` flag, listens on all interfaces) + +**Support**: IPv4 and IPv6 + +### 2. K3s ServiceLB Controller ⏳ TODO + +**Repository**: https://github.com/k3s-io/k3s + +**File**: `pkg/cloudprovider/servicelb.go` + +**Function to modify**: `newDaemonSet()` (line ~432) + +#### Required changes: + +1. **Collect source IPs** (after line 441): + ```go + // Collect source IPs from externalIPs, loadBalancerIP, and status ingress IPs + var srcIPs []string + + // Add externalIPs from service spec + if len(svc.Spec.ExternalIPs) > 0 { + srcIPs = append(srcIPs, svc.Spec.ExternalIPs...) + } + + // Add deprecated loadBalancerIP if set + if svc.Spec.LoadBalancerIP != "" { + srcIPs = append(srcIPs, svc.Spec.LoadBalancerIP) + } + + // Add IPs from status.loadBalancer.ingress + for _, ingress := range svc.Status.LoadBalancer.Ingress { + if ingress.IP != "" { + srcIPs = append(srcIPs, ingress.IP) + } + } + ``` + +2. **Add the SRC_IPS environment variable** (after line 539): + ```go + // Add SRC_IPS environment variable if any source IPs are configured + if len(srcIPs) > 0 { + container.Env = append(container.Env, core.EnvVar{ + Name: "SRC_IPS", + Value: strings.Join(srcIPs, ","), + }) + } + ``` + +## Applying the Changes + +### For klipper-lb + +The changes are already applied in this repository. To use this modified version: + +1. Build the Docker image: + ```bash + docker build -t your-registry/klipper-lb:custom . + ``` + +2. Configure K3s to use your image: + ```bash + k3s server --servicelb-image=your-registry/klipper-lb:custom + ``` + +### For K3s + +1. Clone the K3s repository: + ```bash + git clone https://github.com/k3s-io/k3s.git + cd k3s + ``` + +2. Apply the patch: + ```bash + cd /path/to/klipper-lb + patch -p1 < k3s-servicelb-srcips.patch + ``` + + Or manually modify the `pkg/cloudprovider/servicelb.go` file according to the instructions above. + +3. Build K3s: + ```bash + make + ``` + +## Usage Examples + +### Example 1: Using externalIPs + +```yaml +apiVersion: v1 +kind: Service +metadata: + name: my-service +spec: + type: LoadBalancer + externalIPs: + - 192.168.1.100 + - 192.168.1.101 + ports: + - port: 80 + targetPort: 8080 + selector: + app: my-app +``` + +Result: klipper-lb will listen **only** on 192.168.1.100:80 and 192.168.1.101:80 + +### Example 2: Using loadBalancerIP (deprecated but supported) + +```yaml +apiVersion: v1 +kind: Service +metadata: + name: my-service +spec: + type: LoadBalancer + loadBalancerIP: 192.168.1.100 + ports: + - port: 80 + targetPort: 8080 + selector: + app: my-app +``` + +Result: klipper-lb will listen **only** on 192.168.1.100:80 + +### Example 3: Default behavior (no IPs specified) + +```yaml +apiVersion: v1 +kind: Service +metadata: + name: my-service +spec: + type: LoadBalancer + ports: + - port: 80 + targetPort: 8080 + selector: + app: my-app +``` + +Result: klipper-lb will listen on **all interfaces** (current behavior) + +## Environment Variables + +The klipper-lb container now accepts these variables: + +| Variable | Description | K3s Source | +|----------|-------------|------------| +| `SRC_IPS` | Comma-separated list of IPs to listen on | `externalIPs`, `loadBalancerIP`, or `status.loadBalancer.ingress` | +| `SRC_PORT` | Source port to listen on | `service.spec.ports[].port` | +| `SRC_RANGES` | Allowed source IP ranges | `service.spec.loadBalancerSourceRanges` | +| `DEST_IPS` | Destination IPs (cluster IPs) | `service.spec.clusterIPs` or `status.hostIPs` | +| `DEST_PORT` | Destination port | `service.spec.ports[].port` or `nodePort` | +| `DEST_PROTO` | Protocol (TCP/UDP) | `service.spec.ports[].protocol` | + +## Testing + +To test that the modifications work correctly: + +1. Create a service with `externalIPs` +2. Check the iptables rules in the klipper-lb pod: + ```bash + kubectl exec -n kube-system -- iptables -t nat -L PREROUTING -n -v + ``` +3. Verify that the DNAT rules contain `-d ` for the specified IPs + +## References + +- klipper-lb repository: https://github.com/k3s-io/klipper-lb +- K3s repository: https://github.com/k3s-io/k3s +- K3s ServiceLB documentation: https://docs.k3s.io/networking/networking-services diff --git a/README.md b/README.md index 08eb50e..a549711 100644 --- a/README.md +++ b/README.md @@ -14,14 +14,319 @@ will stay in pending. ## Configuration -The load balancer behavior can be configured using environment variables: +The load balancer behavior is configured via environment variables that are automatically set by the K3s ServiceLB controller when creating DaemonSet pods. -- `SRC_IPS`: Comma-separated list of external IPs or loadBalancerIP to listen on. If set, the load balancer will only accept traffic destined for these specific IPs. If not set, the load balancer will listen on all interfaces (default behavior). -- `SRC_RANGES`: Comma-separated list of source IP ranges to accept traffic from (implements `.spec.loadBalancerSourceRanges`). -- `DEST_IPS`: Comma-separated list of destination cluster IPs to forward traffic to. -- `DEST_PROTO`: Protocol (tcp/udp). -- `DEST_PORT`: Destination port on the cluster IP. -- `SRC_PORT`: Source port to listen on. +### Environment Variables + +| Variable | Description | K3s Source | Example | +|----------|-------------|------------|---------| +| `SRC_IPS` | Comma-separated list of IPs to listen on. If set, klipper-lb only accepts traffic destined for these specific IPs (using `-d` in iptables). If not set, listens on all interfaces. | `service.spec.externalIPs`, `service.spec.loadBalancerIP`, or `service.status.loadBalancer.ingress` | `192.168.1.100,192.168.1.101` | +| `SRC_PORT` | Source port to listen on | `service.spec.ports[].port` | `80` | +| `SRC_RANGES` | Comma-separated list of source IP ranges allowed to connect (implements `.spec.loadBalancerSourceRanges`). Uses `-s` in iptables to filter source addresses. | `service.spec.loadBalancerSourceRanges` | `10.0.0.0/8,192.168.0.0/16` | +| `DEST_IPS` | Destination IPs to forward traffic to (DNAT target). Either cluster IPs or node IPs depending on `externalTrafficPolicy`. | `service.spec.clusterIPs` (Cluster mode) or `status.hostIPs` (Local mode) | `10.43.0.50` | +| `DEST_PORT` | Destination port to forward traffic to | `service.spec.ports[].port` (Cluster mode) or `service.spec.ports[].nodePort` (Local mode) | `8080` | +| `DEST_PROTO` | Protocol for forwarding | `service.spec.ports[].protocol` | `TCP` or `UDP` | + +### How Environment Variables are Set + +These variables are **not** set manually. They are automatically populated by the K3s ServiceLB controller when it creates the DaemonSet for your LoadBalancer service. + +**Example DaemonSet Pod Env:** +```yaml +env: + - name: SRC_IPS + value: "192.168.1.100" + - name: SRC_PORT + value: "80" + - name: SRC_RANGES + value: "0.0.0.0/0" + - name: DEST_PROTO + value: "TCP" + - name: DEST_PORT + value: "8080" + - name: DEST_IPS + value: "10.43.0.50" +``` + +**Resulting iptables rule:** +```bash +iptables -t nat -A PREROUTING -d 192.168.1.100 -s 0.0.0.0/0 -p TCP --dport 80 \ + -j DNAT --to-destination 10.43.0.50:8080 +``` + +## Service Configuration + +### Overview + +There are three ways to configure a LoadBalancer service with klipper-lb, each with different behavior regarding which IPs the load balancer listens on. + +### Option 1: Using `externalIPs` (Recommended with SRC_IPS) + +Specify **multiple external IPs** that should be used for the service. This is the key difference from `loadBalancerIP` which only accepts a single IP. + +```yaml +apiVersion: v1 +kind: Service +metadata: + name: my-service +spec: + type: LoadBalancer + externalIPs: + - 192.168.1.100 + - 192.168.1.101 + - 192.168.1.102 + ports: + - port: 80 + targetPort: 8080 + protocol: TCP + selector: + app: my-app +``` + +**What happens:** +- ✅ **With SRC_IPS support**: klipper-lb listens **only** on 192.168.1.100:80, 192.168.1.101:80, and 192.168.1.102:80 +- ❌ **Without SRC_IPS** (current upstream): klipper-lb listens on **all IPs** on port 80 (security issue) +- The specified IPs must already exist on the nodes or be routed to them +- Traffic to other IPs on port 80 is ignored (with SRC_IPS support) +- You can specify a **single IP** or **multiple IPs** (unlike `loadBalancerIP`) + +**Use cases:** +- You have specific external IPs assigned to your nodes +- You're using a VIP system (Keepalived) and want failover IPs +- You want to expose services on specific IPs only +- Multiple services on different IPs +- **Load balancing across multiple IPs** (e.g., DNS round-robin) + +**Status field:** +```yaml +status: + loadBalancer: + ingress: + - ip: + - ip: + - ip: +``` + +**iptables rules (with SRC_IPS):** +```bash +# Each node gets these rules (one per externalIP): +iptables -t nat -A PREROUTING -d 192.168.1.100 -p TCP --dport 80 -j DNAT --to :8080 +iptables -t nat -A PREROUTING -d 192.168.1.101 -p TCP --dport 80 -j DNAT --to :8080 +iptables -t nat -A PREROUTING -d 192.168.1.102 -p TCP --dport 80 -j DNAT --to :8080 +``` + +### Option 2: Using `loadBalancerIP` (Deprecated in k8s but Supported) + +Specify a single IP for the load balancer. This field is deprecated in Kubernetes but still supported. + +```yaml +apiVersion: v1 +kind: Service +metadata: + name: my-service +spec: + type: LoadBalancer + loadBalancerIP: 192.168.1.100 + ports: + - port: 80 + targetPort: 8080 + protocol: TCP + selector: + app: my-app +``` + +**What happens:** +- ✅ **With SRC_IPS support**: klipper-lb listens **only** on 192.168.1.100:80 +- ❌ **Without SRC_IPS**: klipper-lb listens on **all IPs** on port 80 +- Same behavior as `externalIPs` with a single IP +- The IP must already exist on the nodes or be routed to them + +**Use cases:** +- Legacy configurations using the old Kubernetes API +- Simple single-IP deployments +- Migration from cloud providers that used this field + +**Note:** Prefer using `externalIPs` for new deployments as `loadBalancerIP` is deprecated. + +**Status field:** +```yaml +status: + loadBalancer: + ingress: + - ip: + - ip: + - ip: +``` + +**iptables rules (with SRC_IPS):** +```bash +iptables -t nat -A PREROUTING -d 192.168.1.100 -p TCP --dport 80 -j DNAT --to :8080 +``` + +### Option 3: Default (No IPs specified) + +Create a LoadBalancer service without specifying any external IPs. + +```yaml +apiVersion: v1 +kind: Service +metadata: + name: my-service +spec: + type: LoadBalancer + ports: + - port: 80 + targetPort: 8080 + protocol: TCP + selector: + app: my-app +``` + +**What happens:** +- klipper-lb listens on **all interfaces** on port 80 (both with and without SRC_IPS support) +- No `-d ` filter in iptables rules +- Traffic to ANY IP on port 80 will be forwarded to the service +- The service is accessible via all node IPs + +**Use cases:** +- Simple deployments where you want the service accessible on all node IPs +- Development/testing environments +- When you don't care which IP is used to access the service +- Default K3s behavior + +**Status field:** +```yaml +status: + loadBalancer: + ingress: + - ip: + - ip: + - ip: +``` + +**iptables rules:** +```bash +# No -d flag, matches all destination IPs +iptables -t nat -A PREROUTING -p TCP --dport 80 -j DNAT --to :8080 +``` + +### Comparison Table + +| Configuration | IPs Supported | SRC_IPS ENV | iptables Filter | Security | Use Case | +|---------------|---------------|-------------|-----------------|------------------------------------|-----------------------------------| +| `externalIPs: [192.168.1.100, 192.168.1.101, 192.168.1.102]` | **Multiple** ✅ | `192.168.1.100,192.168.1.101,192.168.1.102` | `-d ` (one rule per IP) | ✅ Listen only on the specified ips | Multiple IPs | +| `loadBalancerIP: 192.168.1.100` | **Single only** | `192.168.1.100` | `-d 192.168.1.100` | ✅ Listen only on the specified ip | K8s Legacy single IP (deprecated) | +| No IPs specified | N/A | (not set) | No `-d` flag (matches **any** IP) | ⚠️ Listen on all ips | k3s Legacy, simple | + + +### Combined Configuration + +You can combine multiple options, though this is uncommon: + +```yaml +apiVersion: v1 +kind: Service +metadata: + name: my-service +spec: + type: LoadBalancer + loadBalancerIP: 192.168.1.100 + externalIPs: + - 192.168.1.101 + - 192.168.1.102 + ports: + - port: 80 + targetPort: 8080 +``` + +**What happens:** +- All IPs are collected: `[192.168.1.100, 192.168.1.101, 192.168.1.102]` +- klipper-lb listens on all three IPs +- Useful for migration scenarios or complex setups + +### IP Requirements + +**Important:** The IPs you specify must be reachable on the nodes. klipper-lb does **not** assign IPs; it only listens on them. + +**How to configure IPs on nodes:** + +1. **Manual assignment** (loopback): + ```bash + # On each node + ip addr add 192.168.1.100/32 dev lo + ``` + +2. **Manual assignment** (interface): + ```bash + # On each node + ip addr add 192.168.1.100/24 dev eth0 + ``` + +3. **VIP with Keepalived** (VRRP): + - uses VRRP protocol + - IP is active on one node (master), standby on others + + +4. **No configuration** (default): + - Don't specify any IPs + - Service accessible via node IPs automatically + +### Example Workflows + +#### Workflow 1: Multiple Services, Different IPs + +```yaml +# Service 1 - Web on .100 +apiVersion: v1 +kind: Service +metadata: + name: web +spec: + type: LoadBalancer + externalIPs: + - 192.168.1.100 + ports: + - port: 80 + targetPort: 8080 + selector: + app: web +--- +# Service 2 - API on .101 +apiVersion: v1 +kind: Service +metadata: + name: api +spec: + type: LoadBalancer + externalIPs: + - 192.168.1.101 + ports: + - port: 80 + targetPort: 3000 + selector: + app: api +``` + +Result: Web accessible on 192.168.1.100:80, API on 192.168.1.101:80 (same port, different IPs) + +#### Workflow 2: Simple Default + +```yaml +# No IP configuration needed +apiVersion: v1 +kind: Service +metadata: + name: web +spec: + type: LoadBalancer + ports: + - port: 80 + targetPort: 8080 + selector: + app: web +``` + +Result: Accessible on all node IPs on port 80 ## Repository Structure @@ -91,35 +396,6 @@ The **ServiceLB controller** lives in the K3s repository at [`pkg/cloudprovider/ └─────────────────────────────────────────────────────────────┘ ``` -### Development Workflow - -**To modify load balancer behavior (iptables rules):** -- Edit files in **this repository** (klipper-lb) -- Build new Docker image -- Update K3s to use new image version - -**To modify controller behavior (DaemonSet creation, ENV vars):** -- Edit files in **K3s repository** (`pkg/cloudprovider/`) -- Rebuild K3s binary -- May require new klipper-lb image if new ENV vars are added - -### Example: Adding SRC_IPS Support - -This demonstrates the two-repository workflow: - -**Step 1: klipper-lb (this repo)** -- Modify `entry` script to read `SRC_IPS` environment variable -- Add `-d ` flag to iptables rules when `SRC_IPS` is set -- Build and publish new image: `rancher/klipper-lb:v0.5.0` - -**Step 2: K3s repository** -- Modify `pkg/cloudprovider/servicelb.go` -- Update `newDaemonSet()` to set `SRC_IPS` env var from `service.spec.externalIPs` -- Update default image to `rancher/klipper-lb:v0.5.0` -- Rebuild K3s - -Both changes are needed for the feature to work end-to-end. - ## Architecture ### Overview @@ -336,6 +612,17 @@ This architecture is **robust** because: `make` +## References + +### Official Documentation +- [K3s ServiceLB Documentation](https://docs.k3s.io/networking/networking-services) - Official K3s documentation on ServiceLB +- [Kubernetes Services](https://kubernetes.io/docs/concepts/services-networking/service/) - Kubernetes Service concepts + +### Source Code +- [klipper-lb Repository](https://github.com/k3s-io/klipper-lb) - This repository (load balancer runtime) +- [K3s Repository](https://github.com/k3s-io/k3s) - K3s main repository +- [K3s ServiceLB Controller](https://github.com/k3s-io/k3s/tree/master/pkg/cloudprovider) - Controller source code + ## License Copyright (c) 2024 [K3s Authors](http://github.com/k3s-io) diff --git a/k3s-servicelb-srcips.patch b/k3s-servicelb-srcips.patch new file mode 100644 index 0000000..3c2a87d --- /dev/null +++ b/k3s-servicelb-srcips.patch @@ -0,0 +1,51 @@ +--- a/pkg/cloudprovider/servicelb.go ++++ b/pkg/cloudprovider/servicelb.go +@@ -432,6 +432,7 @@ func (k *k3s) newDaemonSet(svc *core.Service) (*apps.DaemonSet, error) { + name := generateName(svc) + oneInt := intstr.FromInt(1) + priorityClassName := k.getPriorityClassName(svc) + localTraffic := servicehelper.RequestsOnlyLocalTraffic(svc) + sourceRangesSet, err := servicehelper.GetLoadBalancerSourceRanges(svc) + if err != nil { +@@ -439,6 +440,24 @@ func (k *k3s) newDaemonSet(svc *core.Service) (*apps.DaemonSet, error) { + } + sourceRanges := strings.Join(sourceRangesSet.StringSlice(), ",") + securityContext := &core.PodSecurityContext{} ++ ++ // Collect source IPs from externalIPs, loadBalancerIP, and status ingress IPs ++ var srcIPs []string ++ ++ // Add externalIPs from service spec ++ if len(svc.Spec.ExternalIPs) > 0 { ++ srcIPs = append(srcIPs, svc.Spec.ExternalIPs...) ++ } ++ ++ // Add deprecated loadBalancerIP if set ++ if svc.Spec.LoadBalancerIP != "" { ++ srcIPs = append(srcIPs, svc.Spec.LoadBalancerIP) ++ } ++ ++ // Add IPs from status.loadBalancer.ingress ++ for _, ingress := range svc.Status.LoadBalancer.Ingress { ++ if ingress.IP != "" { ++ srcIPs = append(srcIPs, ingress.IP) ++ } ++ } + + for _, ipFamily := range svc.Spec.IPFamilies { + switch ipFamily { +@@ -539,6 +558,14 @@ func (k *k3s) newDaemonSet(svc *core.Service) (*apps.DaemonSet, error) { + Value: string(port.Protocol), + }, + } ++ ++ // Add SRC_IPS environment variable if any source IPs are configured ++ if len(srcIPs) > 0 { ++ container.Env = append(container.Env, core.EnvVar{ ++ Name: "SRC_IPS", ++ Value: strings.Join(srcIPs, ","), ++ }) ++ } + + if localTraffic { + container.Env = append(container.Env,