diff --git a/xdp-forward/Makefile b/xdp-forward/Makefile index 17aa4317..3bde295c 100644 --- a/xdp-forward/Makefile +++ b/xdp-forward/Makefile @@ -1,5 +1,15 @@ # SPDX-License-Identifier: GPL-2.0 +ifdef VLANS_PATCHED +CFLAGS += -DVLANS_PATCHED +BPF_CFLAGS += -DVLANS_PATCHED +endif + +ifdef VLANS_USERSPACE +CFLAGS += -DVLANS_USERSPACE +BPF_CFLAGS += -DVLANS_USERSPACE +endif + XDP_TARGETS := xdp_forward.bpf xdp_flowtable.bpf xdp_flowtable_sample.bpf BPF_SKEL_TARGETS := $(XDP_TARGETS) diff --git a/xdp-forward/README.org b/xdp-forward/README.org index ab30906b..a15ac328 100644 --- a/xdp-forward/README.org +++ b/xdp-forward/README.org @@ -164,6 +164,33 @@ EOF #xdp-forward load -f flowtable n0 n1 #+end_src + +* VLAN support +Vlan support adds ~4% overhead for packet processing. Therefore, it's disabled +by default. To enable it, compilation flags are required, based on desired mode. + +** Userspace mode +This can be enabled by compiling =xdp-forward= with the =-DVLANS_USERSPACE= flag: + +#+begin_src sh +VLANS_USERSPACE=1 make +#+end_src + +In this mode, userspace program will query netlink for VLAN devices on top of +defined physical interfaces. This is then provided to the XDP program via +BPF map, which is not updated during runtime. Which means, that if VLAN interface +is changed, these changes are not propagated to the =xdp-forward=. + +** Kernel mode +This mode requires specific kernel patch, that extends FIB lookup to support +VLANS. This is not yet upstream, therefore this mode needs to be manually enabled +by compilation vlag =-DVLANS_PATCHED=: + +#+begin_src sh +VLANs_PATCHED=1 make +#+end_src + + * SEE ALSO =libxdp(3)= for details on the XDP loading semantics and kernel compatibility requirements. diff --git a/xdp-forward/xdp-forward.c b/xdp-forward/xdp-forward.c index 0b02c068..6fb4c89d 100644 --- a/xdp-forward/xdp-forward.c +++ b/xdp-forward/xdp-forward.c @@ -10,6 +10,8 @@ #include "logging.h" #include "compat.h" +#include "xdp-userspace-vlans.c" + #include "xdp_forward.skel.h" #include "xdp_flowtable.skel.h" #include "xdp_flowtable_sample.skel.h" @@ -145,6 +147,9 @@ static int do_load(const void *cfg, __unused const char *pin_root_path) struct xdp_program *xdp_prog = NULL; const struct load_opts *opt = cfg; struct bpf_program *prog = NULL; +#ifdef VLANS_USERSPACE + struct bpf_map *vlan_map_obj = NULL; +#endif struct bpf_map *map = NULL; struct bpf_object *obj; int ret = EXIT_FAILURE; @@ -189,6 +194,9 @@ static int do_load(const void *cfg, __unused const char *pin_root_path) goto end; } map = xdp_forward_skel->maps.xdp_tx_ports; +#ifdef VLANS_USERSPACE + vlan_map_obj = xdp_forward_skel->maps.vlan_map; +#endif obj = xdp_forward_skel->obj; skel = (void *)xdp_forward_skel; } @@ -240,6 +248,21 @@ static int do_load(const void *cfg, __unused const char *pin_root_path) strerror(errno)); goto end_detach; } +#ifdef VLANS_USERSPACE + struct vlan_info vlan_list[MAX_VLANS_PER_IFACE]; + int vlans = find_vlan_interfaces(iface->ifindex, vlan_list); + if (vlan_map_obj) { + for (int i = 0; i < vlans; i++) { + ret = bpf_map_update_elem(bpf_map__fd(vlan_map_obj), + &(vlan_list[i].vlan_ifindex), &vlan_list[i], 0); + if (ret) { + pr_warn("Failed to update VLAN map value: %s\n", + strerror(errno)); + goto end_detach; + } + } + } +#endif pr_info("Loaded on interface %s\n", iface->ifname); } diff --git a/xdp-forward/xdp-userspace-vlans.c b/xdp-forward/xdp-userspace-vlans.c new file mode 100644 index 00000000..791e31e5 --- /dev/null +++ b/xdp-forward/xdp-userspace-vlans.c @@ -0,0 +1,235 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include + +#define MAX_VLANS_PER_IFACE 16 +struct vlan_info { + __u16 vlan_id; // VLAN ID + int phys_ifindex; // Physical interface index + int vlan_ifindex; // VLAN interface index +}; + +static int init_netlink_socket() +{ + struct sockaddr_nl addr; + int sock; + + sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (sock < 0) { + perror("Failed to open netlink socket"); + return -1; + } + + memset(&addr, 0, sizeof(addr)); + addr.nl_family = AF_NETLINK; + addr.nl_groups = 0; + + if (bind(sock, (struct sockaddr *)&addr, sizeof(addr)) < 0) { + perror("Failed to bind netlink socket"); + close(sock); + return -1; + } + return sock; +} + +static int send_getlink_request(int sock) +{ + struct sockaddr_nl addr; + struct { + struct nlmsghdr nlh; + struct ifinfomsg ifm; + } req; + + memset(&addr, 0, sizeof(addr)); + addr.nl_family = AF_NETLINK; + + memset(&req, 0, sizeof(req)); + req.nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); + req.nlh.nlmsg_type = RTM_GETLINK; + req.nlh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; + req.nlh.nlmsg_seq = 1; + req.nlh.nlmsg_pid = getpid(); + req.ifm.ifi_family = AF_PACKET; // request all interfaces + + if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0) { + perror("Failed to send netlink message"); + return -1; + } + return 0; +} + +static int parse_vlan_id_from_data(struct rtattr *li_attr) +{ + struct rtattr *vlan_attr; + int vlan_remaining = RTA_PAYLOAD(li_attr); + int vlan_id = 0; + + for (vlan_attr = (struct rtattr *)RTA_DATA(li_attr); + RTA_OK(vlan_attr, vlan_remaining); + vlan_attr = RTA_NEXT(vlan_attr, vlan_remaining)) { + if (vlan_attr->rta_type == IFLA_VLAN_ID) { + vlan_id = *(uint16_t *)RTA_DATA(vlan_attr); + break; // vid found + } + } + return vlan_id; +} + +static void parse_link_info_attr(struct rtattr *attr, int *is_vlan, + int *vlan_id) +{ + struct rtattr *li_attr; + int li_remaining = RTA_PAYLOAD(attr); + + for (li_attr = (struct rtattr *)RTA_DATA(attr); + RTA_OK(li_attr, li_remaining); + li_attr = RTA_NEXT(li_attr, li_remaining)) { + if (li_attr->rta_type == IFLA_INFO_KIND) { + char *kind = RTA_DATA(li_attr); + if (strncmp(kind, "vlan", 4) == 0) + *is_vlan = 1; + + } else if (li_attr->rta_type == IFLA_INFO_DATA) { + int vid = parse_vlan_id_from_data(li_attr); + if (vid) // first valid vid is 1 + *vlan_id = vid; + } + } +} + +static void parse_interface_attributes(struct nlmsghdr *nlmsg, int *is_vlan, + int *vlan_id, int *link_ifindex) +{ + struct ifinfomsg *ifinfo = NLMSG_DATA(nlmsg); + struct rtattr *attr; + int remaining = + nlmsg->nlmsg_len - NLMSG_LENGTH(sizeof(struct ifinfomsg)); + + *is_vlan = 0; + *vlan_id = 0; + *link_ifindex = -1; + + for (attr = IFLA_RTA(ifinfo); RTA_OK(attr, remaining); + attr = RTA_NEXT(attr, remaining)) { + if (attr->rta_type == IFLA_LINKINFO) + parse_link_info_attr(attr, is_vlan, vlan_id); + else if (attr->rta_type == IFLA_LINK) + *link_ifindex = *(int *)RTA_DATA(attr); + } +} + +static void handle_found_vlan(struct ifinfomsg *ifinfo, int vlan_id, + int link_ifindex, struct vlan_info *vlan_list, + int *found_vlans) +{ + if (*found_vlans < 1024) { + printf("%d\t%d\n", vlan_id, ifinfo->ifi_index); + vlan_list[*found_vlans].vlan_id = vlan_id; + vlan_list[*found_vlans].phys_ifindex = link_ifindex; + vlan_list[*found_vlans].vlan_ifindex = ifinfo->ifi_index; + (*found_vlans)++; + } else + fprintf(stderr, "Warning: VLAN list capacity exceeded.\n"); +} + +static int process_netlink_message(struct nlmsghdr *nlmsg, int target_ifindex, + struct vlan_info *vlan_list, + int *found_vlans) +{ + struct ifinfomsg *ifinfo; + int is_vlan, vlan_id, link_ifindex; + + if (nlmsg->nlmsg_type == NLMSG_DONE) + return NLMSG_DONE; // all msgs processed + + if (nlmsg->nlmsg_type == NLMSG_ERROR) { + struct nlmsgerr *err = (struct nlmsgerr *)NLMSG_DATA(nlmsg); + // If the error field is zero, it's an ACK, not an error. Ignore. + if (err->error == 0) + return NLMSG_DONE; + + errno = -err->error; + perror("Netlink error"); + return NLMSG_ERROR; + } + + if (nlmsg->nlmsg_type != RTM_NEWLINK) + return 0; + + ifinfo = NLMSG_DATA(nlmsg); + parse_interface_attributes(nlmsg, &is_vlan, &vlan_id, &link_ifindex); + + if (is_vlan && link_ifindex == target_ifindex && vlan_id) + handle_found_vlan(ifinfo, vlan_id, link_ifindex, vlan_list, + found_vlans); + + return 0; +} + +static int receive_and_process_responses(int sock, int target_ifindex, + struct vlan_info *vlan_list) +{ + char buf[8192]; + int found_vlans = 0; + int status; + + printf("VLAN interfaces using physical ifindex %d:\n", target_ifindex); + printf("VLAN ID\tVLAN ifindex\n"); + + while (1) { + int len = recv(sock, buf, sizeof(buf), 0); + if (len < 0) { + perror("Failed to receive netlink message"); + return -1; + } + + struct nlmsghdr *nlmsg; + for (nlmsg = (struct nlmsghdr *)buf; NLMSG_OK(nlmsg, len); + nlmsg = NLMSG_NEXT(nlmsg, len)) { + status = process_netlink_message( + nlmsg, target_ifindex, vlan_list, &found_vlans); + + if (status == NLMSG_DONE) + return found_vlans; + + if (status == NLMSG_ERROR) + return -1; + } + if (len > 0 && !NLMSG_OK(nlmsg, len)) + fprintf(stderr, + "Warning: Potentially incomplete netlink message processed.\n"); + } + return found_vlans; +} + +int find_vlan_interfaces(int target_ifindex, struct vlan_info *vlan_list) +{ + /** + * find_vlan_interfaces - Find VLAN interfaces linked + * to a given physical interface as well as VLAN id + * assigned to that VLAN interface. netlink socket is + * used. + * This function is called for each physical interface, + * where our program should be attached, their VLAN + * interfaces are found and added to the map. + */ + int sock; + int result = -1; + + sock = init_netlink_socket(); + if (sock < 0) + return -1; + + if (send_getlink_request(sock) < 0) { + close(sock); + return -1; + } + + result = receive_and_process_responses(sock, target_ifindex, vlan_list); + + close(sock); + return result; // Returns count of found VLANs or -1 on error +} diff --git a/xdp-forward/xdp_forward.bpf.c b/xdp-forward/xdp_forward.bpf.c index ae2b0dcd..36209bc7 100644 --- a/xdp-forward/xdp_forward.bpf.c +++ b/xdp-forward/xdp_forward.bpf.c @@ -4,6 +4,7 @@ #include #include +#include #include #include @@ -12,6 +13,22 @@ #define IPV6_FLOWINFO_MASK bpf_htons(0x0FFFFFFF) +#define BPF_FIB_LOOKUP_VLAN (1U << 6) +#define BPF_FIB_LOOKUP_RESOLVE_VLAN (1U << 7) + +struct vlan_info { + __u16 vlan_id; // VLAN ID + int phys_ifindex; // Physical interface index + int vlan_ifindex; // VLAN interface index +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); // it's read only, no need for locks at bpf side + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(struct vlan_info)); + __uint(max_entries, 16*64); // 16 vlans per interface, 64 interfaces +} vlan_map SEC(".maps"); + struct { __uint(type, BPF_MAP_TYPE_DEVMAP_HASH); __uint(key_size, sizeof(int)); @@ -29,11 +46,37 @@ static __always_inline int ip_decrease_ttl(struct iphdr *iph) return --iph->ttl; } +#ifdef VLANS_USERSPACE +static __always_inline int set_vlan_params(struct bpf_fib_lookup *fib_params, __u32 vlan_ifindex) +{ + /** + * set_vlan_params - When unpatched kernel is used, routing + * lookup for VLANed networks returns ifindex of VLAN interface. + * XDP doesn't support VLAN interfaces, physical needs to be used. + * This functions lookups the physical interface and VLAN id + * in the map (provided by userspace) and sets them in the + * fib_params struct, which are set to 0 by bpf_fib_lookup(). + */ + struct vlan_info *vinfo; + vinfo = bpf_map_lookup_elem(&vlan_map, &vlan_ifindex); + if (!vinfo) + return -1; + + fib_params->ifindex = vinfo->phys_ifindex; + fib_params->h_vlan_TCI = vinfo->vlan_id; + + return 0; +} +#endif + static __always_inline int xdp_fwd_flags(struct xdp_md *ctx, __u32 flags) { void *data_end = (void *)(long)ctx->data_end; void *data = (void *)(long)ctx->data; struct bpf_fib_lookup fib_params; +#if defined(VLANS_USERSPACE) || defined(VLANS_PATCHED) + struct vlan_hdr *vhdr = NULL; +#endif struct ethhdr *eth = data; struct ipv6hdr *ip6h; struct iphdr *iph; @@ -48,6 +91,22 @@ static __always_inline int xdp_fwd_flags(struct xdp_md *ctx, __u32 flags) __builtin_memset(&fib_params, 0, sizeof(fib_params)); h_proto = eth->h_proto; +#if defined(VLANS_USERSPACE) || defined(VLANS_PATCHED) + if (h_proto == bpf_htons(ETH_P_8021Q)) { + vhdr = data + nh_off; + if (vhdr + 1 > data_end) + return XDP_DROP; + + fib_params.h_vlan_proto = bpf_ntohs(h_proto); + fib_params.h_vlan_TCI = bpf_ntohs(vhdr->h_vlan_TCI); + + h_proto = vhdr->h_vlan_encapsulated_proto; + nh_off += sizeof(struct vlan_hdr); + } +#endif +#ifdef VLANS_PATCHED + flags |= BPF_FIB_LOOKUP_RESOLVE_VLAN; // works for all the inf type combinations +#endif if (h_proto == bpf_htons(ETH_P_IP)) { iph = data + nh_off; @@ -116,6 +175,10 @@ static __always_inline int xdp_fwd_flags(struct xdp_md *ctx, __u32 flags) * If not supported will fail with: * cannot pass map_type 14 into func bpf_map_lookup_elem#1: */ +#ifdef VLANS_USERSPACE + set_vlan_params(&fib_params, fib_params.ifindex); +#endif + if (!bpf_map_lookup_elem(&xdp_tx_ports, &fib_params.ifindex)) return XDP_PASS; @@ -124,6 +187,51 @@ static __always_inline int xdp_fwd_flags(struct xdp_md *ctx, __u32 flags) else if (h_proto == bpf_htons(ETH_P_IPV6)) ip6h->hop_limit--; +#if defined(VLANS_USERSPACE) || defined(VLANS_PATCHED) + if (vhdr && fib_params.h_vlan_TCI) { + // case: tagged inf to tagged inf, requires just rewritting vlan hdr + vhdr->h_vlan_TCI = bpf_htons(fib_params.h_vlan_TCI); // TODO: why??? shouldnt h_vlan_TCI be in network order? + vhdr->h_vlan_encapsulated_proto = fib_params.h_vlan_proto; + + } else if (vhdr && !fib_params.h_vlan_TCI) { + // case: tagged inf to untagged inf, requires removing vlan hdr + __be16 inner_proto = vhdr->h_vlan_encapsulated_proto; + + if (bpf_xdp_adjust_head(ctx, sizeof(struct vlan_hdr))) + return XDP_PASS; // can't remove header + + data = (void *)(long)ctx->data; // ptrs are now invalid, re-evaluate + data_end = (void *)(long)ctx->data_end; + + if (data + sizeof(struct ethhdr) > data_end) + return XDP_PASS; + + eth = data; + eth->h_proto = inner_proto; + + } else if (!vhdr && fib_params.h_vlan_TCI) { + // case: untagged inf to tagged inf, requires adding vlan hdr + __be16 orig_proto = eth->h_proto; + + // Negative value adds space at the beginning + if (bpf_xdp_adjust_head(ctx, -((__s32)sizeof(struct vlan_hdr)))) + return XDP_PASS; // can't add header + + data = (void *)(long)ctx->data; // ptrs are now invalid, re-evaluate + data_end = (void *)(long)ctx->data_end; + + if (data + sizeof(struct ethhdr) + sizeof(struct vlan_hdr) > data_end) + return XDP_PASS; // not enough space for vhdr, let kernel process it + + eth = data; + eth->h_proto = bpf_htons(ETH_P_8021Q); + + vhdr = data + sizeof(struct ethhdr); + vhdr->h_vlan_TCI = bpf_htons(fib_params.h_vlan_TCI); + vhdr->h_vlan_encapsulated_proto = orig_proto; + } +#endif + __builtin_memcpy(eth->h_dest, fib_params.dmac, ETH_ALEN); __builtin_memcpy(eth->h_source, fib_params.smac, ETH_ALEN); return bpf_redirect_map(&xdp_tx_ports, fib_params.ifindex, 0);