diff --git a/test/plugins/linux/linux.py b/test/plugins/linux/linux.py index e81277087f..225378d7ba 100644 --- a/test/plugins/linux/linux.py +++ b/test/plugins/linux/linux.py @@ -620,6 +620,34 @@ def test_linux_specific_pscallstack(self, volatility, python): ) +class TestLinuxIptables: + def test_linux_generic_iptables(self, image, volatility, python): + rc, out, _err = test_volatility.runvol_plugin( + "linux.iptables.IPTables", image, volatility, python + ) + + # The test image may have no active iptables rules. + # This validates that plugin requirements are met and exceptions are not raised. + assert rc == 0 + assert b"NetNS" in out + assert b"Chain" in out + assert b"Target" in out + + +class TestLinuxIptablesNft: + def test_linux_generic_iptables_nft(self, image, volatility, python): + rc, out, _err = test_volatility.runvol_plugin( + "linux.iptables_nft.IPTablesNFT", image, volatility, python + ) + + # The test image may have no active nftables rules. + # This validates that plugin requirements are met and exceptions are not raised. + assert rc == 0 + assert b"NetNS" in out + assert b"Chain" in out + assert b"Target" in out + + class TestLinuxSockscan: def test_linux_sockscan(self, volatility, python): # designed for linux-sample-1.dmp SHA1:1C3A4627EDCA94A7ADE3414592BEF0E62D7D3BB6 diff --git a/volatility3/framework/plugins/linux/iptables.py b/volatility3/framework/plugins/linux/iptables.py new file mode 100644 index 0000000000..5e0e376b09 --- /dev/null +++ b/volatility3/framework/plugins/linux/iptables.py @@ -0,0 +1,2872 @@ +# This file is Copyright 2024 Volatility Foundation and licensed under the +# Volatility Software License 1.0 which is available at +# https://www.volatilityfoundation.org/license/vsl-v1.0 +# +"""Linux iptables / ip6tables rule extraction plugin for Volatility3. + +Reconstructs netfilter x_tables rules from a Linux memory image. + +WHY NOT net.xt.tables +===================== +On most modern distros (Ubuntu, Debian, etc.) iptables support is compiled as +kernel modules (ip_tables.ko / ip6_tables.ko). The module registers its +per-namespace data through net_generic(), so there is no ``net.xt`` member in +the compiled-in ``struct net`` — and therefore no ``netns_xt`` type in the ISF. + +ACTUAL PATH +=========== +The netfilter hook infrastructure IS compiled into the kernel and IS in the ISF: + + net_namespace_list + └─ struct net.nf (netns_nf — always in ISF) + └─ hooks_ipv4[0..4] / hooks_ipv6[0..4] + └─ nf_hook_entries (always in ISF) + └─ hooks[i].priv (void * → xt_table *) + +When ip_tables.ko registers a hook it stores a pointer to the xt_table as the +hook's ``priv`` field. We iterate over every hook entry, try to interpret +``priv`` as an xt_table by validating the ``name`` field, and if it matches a +known iptables table name ("filter", "nat", "mangle", "raw", "security") we +parse the table rules. + +STRUCT LAYOUTS (NOT IN ISF — HARDCODED FOR LINUX 5.4 x86-64) +============================================================== +struct xt_table (include/linux/netfilter/x_tables.h): + [ 0:16] struct list_head list + [16:48] char name[XT_TABLE_MAXNAMELEN] (32 bytes) + [48:56] struct xt_table_info *private + [56:64] struct module *me + [64:65] u8 af + [65:68] padding + [68:72] int priority + [72:76] spinlock_t lock + [76:80] unsigned int valid_hooks + +struct xt_table_info (include/linux/netfilter/x_tables.h): + [ 0: 4] unsigned int size ← total bytes of entries blob + [ 4: 8] unsigned int number + [ 8:12] unsigned int initial_number + [12:32] unsigned int hook_entry[5] ← byte offsets of chain starts + [32:52] unsigned int underflow[5] + [52:56] unsigned int stacksize + [56:64] void ***jumpstack + [64: ] unsigned char entries[] ← blob starts here + +ipt_entry (include/uapi/linux/netfilter_ipv4/ip_tables.h): + [ 0:84] struct ipt_ip + [84:88] unsigned int nfcache + [88:90] __u16 target_offset + [90:92] __u16 next_offset + [92:96] unsigned int comefrom + [96:112] struct xt_counters (pcnt:u64, bcnt:u64) + [112: ] unsigned char elems[] sizeof = 112 + +ip6t_entry (include/uapi/linux/netfilter_ipv6/ip6_tables.h): + [ 0:136] struct ip6t_ip6 + [136:140] unsigned int nfcache + [140:142] __u16 target_offset + [142:144] __u16 next_offset + [144:148] unsigned int comefrom + [148:152] padding (4 bytes for 8-byte alignment) + [152:168] struct xt_counters + [168: ] unsigned char elems[] sizeof = 168 + +Extension match/target struct layouts fetched from: + https://git.netfilter.org/ / github.com/torvalds/linux +""" + +import base64 +import logging +import re +import socket +import struct +import urllib.error +import urllib.request +from typing import Dict, Iterator, List, NamedTuple, Optional, Set, Tuple + +from volatility3.framework import constants, exceptions, interfaces, renderers +from volatility3.framework.configuration import requirements +from volatility3.framework.interfaces import plugins +from volatility3.framework.layers import scanners +from volatility3.framework.symbols.linux import network + +vollog = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# Kernel version detection +# +# The hardcoded struct layouts below target Linux 4.x – 6.x x86-64 where +# xt_table.name is at offset 16 (right after list_head). This layout has +# been stable since ~4.0. Kernels older than 4.0 had valid_hooks at offset +# 16 and name at the end of the struct — they are not supported. +# +# To verify or extend layouts for a specific kernel: +# 1. Run the banners plugin to identify the exact kernel version: +# python3 vol.py -f image.lime banners +# 2. For Ubuntu, download the matching linux-modules package. Example for +# Ubuntu 20.04 focal (kernel 5.4.0-42): +# https://launchpad.net/ubuntu/focal/+source/linux +# → find build 5.4.0-42.46 → linux-modules-5.4.0-42-generic .deb +# Extract with: +# dpkg-deb -x linux-modules-5.4.0-42-generic_*.deb /tmp/modules +# The xt_*.ko files are under: +# /tmp/modules/lib/modules/5.4.0-42-generic/kernel/net/netfilter/ +# 3. Generate ISF from a .ko file using dwarf2json: +# dwarf2json linux --elf /tmp/modules/lib/modules/.../xt_conntrack.ko \ +# > xt_conntrack.json +# This lets you inspect exact struct offsets for match/target extensions. +# --------------------------------------------------------------------------- + + +# --------------------------------------------------------------------------- +# Netfilter constants +# --------------------------------------------------------------------------- + +NFPROTO_IPV4 = 2 +NFPROTO_IPV6 = 10 + +NF_INET_HOOKS: Tuple[str, ...] = ( + "PREROUTING", "INPUT", "FORWARD", "OUTPUT", "POSTROUTING", +) + +STANDARD_VERDICTS: Dict[int, str] = { + -1: "DROP", # -NF_DROP - 1 (NF_DROP = 0) + -2: "ACCEPT", # -NF_ACCEPT - 1 (NF_ACCEPT = 1) + -5: "RETURN", # XT_RETURN = -NF_REPEAT - 1 +} + +KNOWN_TABLE_NAMES: Set[str] = {"filter", "nat", "mangle", "raw", "security"} + +MATCH_HEADER_SIZE = 32 # xt_entry_match / xt_entry_target user-union header +XT_FUNCTION_MAXNAMELEN = 30 # name[29] + implicit NUL +IFNAMSIZ = 16 + +# --------------------------------------------------------------------------- +# Dynamic struct layout resolution +# +# The plugin fetches include/linux/netfilter/x_tables.h from the matching +# kernel tag on GitHub, parses struct xt_table using x86-64 ABI alignment +# rules, and computes the exact byte offset of every field. A hard-coded +# fallback table covers the offline / fetch-failure case. +# --------------------------------------------------------------------------- + +class XtTableLayout(NamedTuple): + name_off: int # offset of name[32] inside xt_table + valid_hooks_off: int # offset of valid_hooks + private_off: int # offset of *private (xt_table_info) + me_off: int # offset of *me (struct module) + af_off: int # offset of af (u_int8_t) + read_size: int # bytes to read for full validation + + +class XtTableInfoLayout(NamedTuple): + size_off: int # offset of unsigned int size + hook_entry_off: int # offset of hook_entry[NF_INET_NUMHOOKS] + underflow_off: int # offset of underflow[NF_INET_NUMHOOKS] + entries_off: int # offset of entries[] flex array (= sizeof xt_table_info) + + +# Fallback table: (major, minor_min, minor_max) → XtTableLayout +# `table_init` was added in kernel 4.15; its 8-byte fn-pointer shifts name[]. +_XT_TABLE_LAYOUT_FALLBACKS: List[Tuple] = [ + ((4, 0, 14), XtTableLayout(name_off=48, valid_hooks_off=16, private_off=24, me_off=32, af_off=40, read_size=84)), + ((4, 15, 99), XtTableLayout(name_off=56, valid_hooks_off=16, private_off=24, me_off=32, af_off=40, read_size=92)), + ((5, 0, 99), XtTableLayout(name_off=56, valid_hooks_off=16, private_off=24, me_off=32, af_off=40, read_size=92)), + ((6, 0, 99), XtTableLayout(name_off=56, valid_hooks_off=16, private_off=24, me_off=32, af_off=40, read_size=92)), +] + +_layout_cache: Dict[Tuple[int, int], XtTableLayout] = {} +_XT_TABLE_INFO_FALLBACK = XtTableInfoLayout( + size_off=0, hook_entry_off=12, underflow_off=32, entries_off=64 +) +_table_info_layout_cache: Dict[Tuple[int, int], XtTableInfoLayout] = {} +_GITHUB_RAW = "https://raw.githubusercontent.com/torvalds/linux" +_XT_TABLE_HEADER = "include/linux/netfilter/x_tables.h" + +# --------------------------------------------------------------------------- +# x86-64 C struct layout parser +# --------------------------------------------------------------------------- + +# (size_bytes, natural_alignment) for scalar C types on x86-64 +_C_TYPE_SIZES: Dict[str, Tuple[int, int]] = { + "char": (1, 1), + "unsigned char": (1, 1), + "u8": (1, 1), "u_int8_t": (1, 1), "__u8": (1, 1), + "short": (2, 2), + "unsigned short": (2, 2), + "u16": (2, 2), "u_int16_t": (2, 2), "__u16": (2, 2), + "int": (4, 4), + "unsigned int": (4, 4), + "u32": (4, 4), "u_int32_t": (4, 4), "__u32": (4, 4), + "spinlock_t": (4, 4), + "atomic_t": (4, 4), + "long": (8, 8), + "unsigned long": (8, 8), + "long long": (8, 8), + "unsigned long long": (8, 8), + "u64": (8, 8), "u_int64_t": (8, 8), "__u64": (8, 8), + # Known compound types + "struct list_head": (16, 8), # 2 × pointer + "struct hlist_head": ( 8, 8), # 1 × pointer + "struct hlist_node": (16, 8), # 2 × pointer +} + + +def _field_size_align(decl: str) -> Tuple[int, int]: + """Return (size, alignment) for a C field declaration on x86-64.""" + decl = decl.strip() + # Function pointer: return_type (*name)(params) + if re.search(r'\(\s*\*', decl): + return 8, 8 + # Any plain pointer: type *name + if '*' in decl: + return 8, 8 + # Array: type name[N] or type name[MACRO] + m = re.search(r'\[\s*(\w+)\s*\]', decl) + if m: + count_str = m.group(1) + # Element declaration = everything before the last identifier + bracket + elem_decl = re.sub(r'\s*\w+\s*\[\w+\].*', '', decl).strip() + try: + count = int(count_str) + except ValueError: + count = 32 # XT_TABLE_MAXNAMELEN and similar macros + esz, ealign = _field_size_align(elem_decl) + return esz * count, ealign + # Scalar — try known types (longest key first to avoid prefix collisions) + for type_str, (sz, al) in sorted(_C_TYPE_SIZES.items(), key=lambda x: -len(x[0])): + if type_str in decl: + return sz, al + # Unknown struct / enum / typedef → assume pointer-sized + return 8, 8 + + +def _parse_struct_offsets(body: str) -> Dict[str, int]: + """Compute field offsets for a C struct on x86-64 from its body text. + + Handles simple fields, pointer fields, function-pointer fields, and + one-dimensional arrays. Applies standard x86-64 ABI padding. + Returns {field_name: byte_offset}. + """ + # Strip C comments + body = re.sub(r'/\*.*?\*/', '', body, flags=re.DOTALL) + body = re.sub(r'//[^\n]*', '', body) + + offsets: Dict[str, int] = {} + cur = 0 + + for stmt in body.split(';'): + stmt = stmt.strip() + if not stmt: + continue + + # Function-pointer field: ret_type (*name)(params) + m = re.search(r'\(\s*\*\s*(\w+)\s*\)', stmt) + if m: + field_name = m.group(1) + else: + # Normal / array field — last identifier, optionally followed by [...] + m = re.search(r'(\w+)\s*(?:\[\w+\])?\s*(?:__[a-z_]+(?:\([^)]*\))?)?\s*$', stmt) + if not m: + continue + field_name = m.group(1) + if field_name in ('const', 'volatile', 'static', 'unsigned', 'signed'): + continue + + size, align = _field_size_align(stmt) + cur = (cur + align - 1) & ~(align - 1) # align up + offsets[field_name] = cur + cur += size + + return offsets + + +def _parse_struct_fields(body: str, macros: Optional[Dict[str, int]] = None) -> Dict[str, Tuple[int, int]]: + """Like _parse_struct_offsets but returns {field_name: (offset, size)}. + + The extra size information lets the generic decoder know how many bytes to + read for each field without needing a separate type-lookup pass. + """ + if macros is None: + macros = {} + body = re.sub(r'/\*.*?\*/', '', body, flags=re.DOTALL) + body = re.sub(r'//[^\n]*', '', body) + + result: Dict[str, Tuple[int, int]] = {} + cur = 0 + + for stmt in body.split(';'): + stmt = stmt.strip() + if not stmt: + continue + # Substitute known macros so the array-size parser sees a literal number + for macro, val in macros.items(): + stmt = re.sub(r'\b' + re.escape(macro) + r'\b', str(val), stmt) + + m = re.search(r'\(\s*\*\s*(\w+)\s*\)', stmt) + if m: + field_name = m.group(1) + else: + m = re.search(r'(\w+)\s*(?:\[\w+\])?\s*(?:__[a-z_]+(?:\([^)]*\))?)?\s*$', stmt) + if not m: + continue + field_name = m.group(1) + if field_name in ('const', 'volatile', 'static', 'unsigned', 'signed'): + continue + + size, align = _field_size_align(stmt) + cur = (cur + align - 1) & ~(align - 1) + result[field_name] = (cur, size) + cur += size + + return result + + +# --------------------------------------------------------------------------- +# Tier 3: dynamic extension struct fetch & generic decoder +# --------------------------------------------------------------------------- + +# Registry: extension name (match or target) → +# (uapi_header_path, [struct_name_templates_in_priority_order]) +# Templates may use {rev} which is substituted with the match/target revision. +_EXT_HEADERS: Dict[str, Tuple[str, List[str]]] = { + # ---- matches ---- + "connmark": ("include/uapi/linux/netfilter/xt_connmark.h", + ["xt_connmark_mtinfo{rev}", "xt_connmark_info"]), + "conntrack": ("include/uapi/linux/netfilter/xt_conntrack.h", + ["xt_conntrack_mtinfo{rev}", "xt_conntrack_mtinfo"]), + "iprange": ("include/uapi/linux/netfilter/xt_iprange.h", + ["xt_iprange_mtinfo", "xt_iprange_info"]), + "hashlimit": ("include/uapi/linux/netfilter/xt_hashlimit.h", + ["xt_hashlimit_mtinfo{rev}", "xt_hashlimit_mtinfo"]), + "recent": ("include/uapi/linux/netfilter/xt_recent.h", + ["xt_recent_mtinfo_v{rev}", "xt_recent_mtinfo"]), + "string": ("include/uapi/linux/netfilter/xt_string.h", + ["xt_string_info"]), + "length": ("include/uapi/linux/netfilter/xt_length.h", + ["xt_length_info"]), + "mac": ("include/uapi/linux/netfilter/xt_mac.h", + ["xt_mac_info"]), + "physdev": ("include/uapi/linux/netfilter/xt_physdev.h", + ["xt_physdev_info"]), + "pkttype": ("include/uapi/linux/netfilter/xt_pkttype.h", + ["xt_pkttype_info"]), + "statistic": ("include/uapi/linux/netfilter/xt_statistic.h", + ["xt_statistic_info"]), + "time": ("include/uapi/linux/netfilter/xt_time.h", + ["xt_time_info"]), + "tos": ("include/uapi/linux/netfilter/xt_tos.h", + ["xt_tos_match_info", "xt_tos_info"]), + "dscp": ("include/uapi/linux/netfilter/xt_dscp.h", + ["xt_dscp_info"]), + "ttl": ("include/uapi/linux/netfilter/xt_ttl.h", + ["xt_ttl_info"]), + "sctp": ("include/uapi/linux/netfilter/xt_sctp.h", + ["xt_sctp_info"]), + "dccp": ("include/uapi/linux/netfilter/xt_dccp.h", + ["xt_dccp_info"]), + "tcpmss": ("include/uapi/linux/netfilter/xt_tcpmss.h", + ["xt_tcpmss_match_info"]), + "helper": ("include/uapi/linux/netfilter/xt_helper.h", + ["xt_helper_info"]), + "cgroup": ("include/uapi/linux/netfilter/xt_cgroup.h", + ["xt_cgroup_info_v{rev}", "xt_cgroup_info"]), + "connbytes": ("include/uapi/linux/netfilter/xt_connbytes.h", + ["xt_connbytes_info"]), + "connlimit": ("include/uapi/linux/netfilter/xt_connlimit.h", + ["xt_connlimit_info"]), + "rateest": ("include/uapi/linux/netfilter/xt_rateest.h", + ["xt_rateest_match_info"]), + "realm": ("include/uapi/linux/netfilter/xt_realm.h", + ["xt_realm_info"]), + "ecn": ("include/uapi/linux/netfilter/xt_ecn.h", + ["xt_ecn_info"]), + "quota": ("include/uapi/linux/netfilter/xt_quota.h", + ["xt_quota_info"]), + "bpf": ("include/uapi/linux/netfilter/xt_bpf.h", + ["xt_bpf_info_v{rev}", "xt_bpf_info"]), + # ---- targets ---- + "NFQUEUE": ("include/uapi/linux/netfilter/xt_NFQUEUE.h", + ["xt_NFQ_info_v{rev}", "xt_NFQ_info"]), + "TCPMSS": ("include/uapi/linux/netfilter/xt_TCPMSS.h", + ["xt_tcpmss_info"]), + "TPROXY": ("include/uapi/linux/netfilter/xt_TPROXY.h", + ["xt_tproxy_target_info_v{rev}", "xt_tproxy_target_info"]), + "SET": ("include/uapi/linux/netfilter/xt_set.h", + ["xt_set_info_target_v{rev}", "xt_set_info_target"]), + "TOS": ("include/uapi/linux/netfilter/xt_TOS.h", + ["xt_tos_target_info"]), + "DSCP": ("include/uapi/linux/netfilter/xt_DSCP.h", + ["xt_dscp_target_info_v{rev}", "xt_dscp_target_info"]), + "TTL": ("include/uapi/linux/netfilter/xt_HL.h", + ["xt_TTL_info"]), + "HL": ("include/uapi/linux/netfilter/xt_HL.h", + ["xt_HL_info"]), + "SYNPROXY": ("include/uapi/linux/netfilter/xt_SYNPROXY.h", + ["xt_synproxy_info"]), + "CT": ("include/uapi/linux/netfilter/xt_CT.h", + ["xt_ct_target_info_v{rev}", "xt_ct_target_info"]), + "CLASSIFY": ("include/uapi/linux/netfilter/xt_CLASSIFY.h", + ["xt_classify_target_info"]), + "CHECKSUM": ("include/uapi/linux/netfilter/xt_CHECKSUM.h", + ["xt_CHECKSUM_info"]), + "AUDIT": ("include/uapi/linux/netfilter/xt_AUDIT.h", + ["xt_audit_info"]), + "SECMARK": ("include/uapi/linux/netfilter/xt_SECMARK.h", + ["xt_secmark_target_info_v{rev}", "xt_secmark_target_info"]), + "HMARK": ("include/uapi/linux/netfilter/xt_HMARK.h", + ["xt_hmark_info"]), + "TEE": ("include/uapi/linux/netfilter/xt_TEE.h", + ["xt_tee_tginfo"]), + "RATEEST": ("include/uapi/linux/netfilter/xt_RATEEST.h", + ["xt_rateest_target_info"]), + "IDLETIMER": ("include/uapi/linux/netfilter/xt_IDLETIMER.h", + ["idletimer_tg_info"]), + "NETMAP": ("include/uapi/linux/netfilter/nf_nat.h", + ["nf_nat_ipv4_multi_range_compat"]), +} + +# Cache: (header_path, rev, major, minor) → {field_name: (offset, size)} or None +_ext_struct_cache: Dict[Tuple, Optional[Dict[str, Tuple[int, int]]]] = {} + + +def _fetch_extension_struct( + major: int, + minor: int, + header_path: str, + struct_names: List[str], + rev: int = 0, +) -> Optional[Dict[str, Tuple[int, int]]]: + """Fetch an xtables extension header from the kernel tag on GitHub and parse + the struct fields into {field_name: (offset, size)}. + + Tries each template in struct_names in order, substituting {rev}. + Results are cached per (header_path, rev, major, minor). + Returns None if the header cannot be fetched or no struct is found. + """ + cache_key = (header_path, rev, major, minor) + if cache_key in _ext_struct_cache: + return _ext_struct_cache[cache_key] + + tag = f"v{major}.{minor}" + url = f"{_GITHUB_RAW}/{tag}/{header_path}" + vollog.debug("Fetching extension struct from %s", url) + + try: + req = urllib.request.Request(url, headers={"User-Agent": "volatility3-iptables-plugin"}) + with urllib.request.urlopen(req, timeout=10) as resp: + source = resp.read().decode("utf-8", errors="replace") + except Exception as exc: + vollog.debug("Cannot fetch %s (%s): %s", header_path, tag, exc) + _ext_struct_cache[cache_key] = None + return None + + # Collect #define NAME value macros for array-size substitution + macros: Dict[str, int] = {} + for mm in re.finditer(r'#define\s+(\w+)\s+(\d+)', source): + try: + macros[mm.group(1)] = int(mm.group(2)) + except ValueError: + pass + + for tmpl in struct_names: + sname = tmpl.format(rev=rev) + m = re.search( + r'struct\s+' + re.escape(sname) + r'\s*\{([^}]+)\}', + source, re.DOTALL, + ) + if m: + fields = _parse_struct_fields(m.group(1), macros) + if fields: + vollog.debug("Parsed %s from %s (kernel %s): %d fields", + sname, header_path, tag, len(fields)) + _ext_struct_cache[cache_key] = fields + return fields + + vollog.debug("No matching struct in %s for kernel %s (tried: %s)", + header_path, tag, [t.format(rev=rev) for t in struct_names]) + _ext_struct_cache[cache_key] = None + return None + + +def _generic_decode_fields(data: bytes, fields: Dict[str, Tuple[int, int]]) -> str: + """Format struct fields as 'key=value' pairs using heuristic type detection. + + Per-field heuristics based on the field name (case-insensitive): + - *port* → decimal (ports may be big-endian in xt structs, try both) + - *ip*, *addr*, *src*, *dst* (4 bytes) → inet_ntoa + - *ip6*, *saddr*, *daddr* (16 bytes) → inet_ntop AF_INET6 + - *name*, *iface*, *dev*, *helper*, *prefix*, *comment* → C string + - flags / masks → 0xhex + - small integers → decimal + """ + parts: List[str] = [] + for fname, (off, sz) in fields.items(): + if off + sz > len(data): + break + raw = data[off: off + sz] + fl = fname.lower() + + # Skip empty / padding fields by name convention + if fl in ('pad', '_pad', '__pad', 'padding', 'reserved', '__res', '__unused'): + continue + + try: + if sz == 1: + v = raw[0] + parts.append(f"{fname}={v}") + + elif sz == 2: + v = struct.unpack_from("H", raw)[0] + parts.append(f"{fname}={vbe}" if 1 <= vbe <= 65535 else f"{fname}={v}") + else: + parts.append(f"{fname}=0x{v:x}" if v > 255 else f"{fname}={v}") + + elif sz == 4: + v = struct.unpack_from("H", raw)[0] + parts.append(f"{fname}={vbe}" if 1 <= vbe <= 65535 else f"{fname}={v}") + elif "mask" in fl or "flag" in fl or "mode" in fl: + parts.append(f"{fname}=0x{v:x}") + elif v > 0xFFFF: + parts.append(f"{fname}=0x{v:x}") + else: + parts.append(f"{fname}={v}") + + elif sz == 8: + v = struct.unpack_from(" 0xFFFF else f"{fname}={v}") + + elif sz == 16: + if any(k in fl for k in ("ip6", "saddr", "daddr", "laddr", + "in6_addr", "addr6")): + try: + parts.append(f"{fname}={socket.inet_ntop(socket.AF_INET6, raw)}") + except Exception: + parts.append(f"{fname}={raw.hex()}") + else: + s = _cstr(raw) + parts.append(f"{fname}={s!r}" if s else f"{fname}={raw.hex()}") + + elif sz <= 256: + # Larger field — try as C string if name suggests text + if any(k in fl for k in ("name", "iface", "dev", "helper", + "prefix", "comment", "label")): + s = _cstr(raw) + if s: + parts.append(f"{fname}={s!r}") + elif sz <= 32: + parts.append(f"{fname}={raw.hex()}") + # skip very large arrays (e.g. sctp chunk-types bitfields) + except Exception: + pass + + return " ".join(parts) + + +def _fetch_xt_table_layout_from_source(major: int, minor: int) -> Optional[XtTableLayout]: + """Fetch include/linux/netfilter/x_tables.h from the kernel tag on GitHub, + parse struct xt_table using _parse_struct_offsets(), and return an + XtTableLayout with the COMPUTED field offsets. + + Logs at WARNING level so the result is always visible. + Returns None on fetch / parse failure. + """ + tag = f"v{major}.{minor}" + url = f"{_GITHUB_RAW}/{tag}/{_XT_TABLE_HEADER}" + vollog.warning("Fetching struct xt_table definition from: %s", url) + try: + req = urllib.request.Request(url, headers={"User-Agent": "volatility3-iptables-plugin"}) + with urllib.request.urlopen(req, timeout=15) as resp: + source = resp.read().decode("utf-8", errors="replace") + except urllib.error.HTTPError as exc: + vollog.warning("HTTP error fetching kernel source (%s): %s — using fallback offsets", tag, exc) + return None + except Exception as exc: + vollog.warning("Cannot fetch kernel source (%s): %s — using fallback offsets", tag, exc) + return None + + # Extract XT_TABLE_MAXNAMELEN (used for the name[] array size) + m_macro = re.search(r'#define\s+XT_TABLE_MAXNAMELEN\s+(\d+)', source) + maxnamelen = int(m_macro.group(1)) if m_macro else 32 + + # Extract struct xt_table body + m_struct = re.search(r'struct\s+xt_table\s*\{([^}]+)\}', source, re.DOTALL) + if not m_struct: + vollog.warning("struct xt_table not found in %s — using fallback offsets", url) + return None + + # Replace the macro in the body so the parser sees a literal number + body = m_struct.group(1).replace("XT_TABLE_MAXNAMELEN", str(maxnamelen)) + offsets = _parse_struct_offsets(body) + + required = ("valid_hooks", "private", "me", "af", "name") + missing = [f for f in required if f not in offsets] + if missing: + vollog.warning( + "struct xt_table parse incomplete for %s (missing: %s) — using fallback", + tag, ", ".join(missing), + ) + return None + + layout = XtTableLayout( + name_off = offsets["name"], + valid_hooks_off = offsets["valid_hooks"], + private_off = offsets["private"], + me_off = offsets["me"], + af_off = offsets["af"], + read_size = offsets["name"] + maxnamelen, + ) + vollog.warning( + "Fetched struct xt_table offsets for kernel %s: " + "valid_hooks=%d private=%d me=%d af=%d name=%d (read_size=%d)", + tag, + layout.valid_hooks_off, layout.private_off, + layout.me_off, layout.af_off, layout.name_off, + layout.read_size, + ) + + # Also parse xt_table_info from the same source + m_info = re.search(r'struct\s+xt_table_info\s*\{([^}]+)\}', source, re.DOTALL) + if m_info: + m_nhooks = re.search(r'#define\s+NF_INET_NUMHOOKS\s+(\d+)', source) + nhooks = int(m_nhooks.group(1)) if m_nhooks else 5 + info_body = m_info.group(1).replace("NF_INET_NUMHOOKS", str(nhooks)) + info_offs = _parse_struct_offsets(info_body) + required_info = ("size", "hook_entry", "underflow", "jumpstack") + if all(f in info_offs for f in required_info): + # entries[] starts right after jumpstack (void***, 8 bytes on x86-64) + entries_off = info_offs["jumpstack"] + 8 + tbl_info_layout = XtTableInfoLayout( + size_off = info_offs["size"], + hook_entry_off = info_offs["hook_entry"], + underflow_off = info_offs["underflow"], + entries_off = entries_off, + ) + _table_info_layout_cache[(major, minor)] = tbl_info_layout + vollog.warning( + "Fetched xt_table_info offsets for kernel %s: " + "size=%d hook_entry=%d underflow=%d entries=%d", + tag, + tbl_info_layout.size_off, tbl_info_layout.hook_entry_off, + tbl_info_layout.underflow_off, tbl_info_layout.entries_off, + ) + + return layout + + +def _get_xt_table_layout(major: int, minor: int) -> XtTableLayout: + """Return the XtTableLayout for the given kernel version. + + Resolution order: + 1. In-process cache (instant, avoids duplicate fetches). + 2. Dynamic fetch from kernel source on GitHub. + 3. Hard-coded fallback table. + 4. Default to the >= 4.15 layout if nothing else matches. + """ + key = (major, minor) + if key in _layout_cache: + return _layout_cache[key] + + layout = _fetch_xt_table_layout_from_source(major, minor) + + if layout is None: + for (maj, mn_min, mn_max), fallback in _XT_TABLE_LAYOUT_FALLBACKS: + if major == maj and mn_min <= minor <= mn_max: + layout = fallback + vollog.warning( + "Fetch failed; using fallback layout for kernel %d.%d: " + "valid_hooks=%d private=%d me=%d af=%d name=%d", + major, minor, + layout.valid_hooks_off, layout.private_off, + layout.me_off, layout.af_off, layout.name_off, + ) + break + + if layout is None: + layout = XtTableLayout(name_off=56, valid_hooks_off=16, private_off=24, me_off=32, af_off=40, read_size=92) + vollog.warning( + "No known layout for kernel %d.%d; defaulting to >= 4.15 layout (name_off=56).", + major, minor, + ) + + _layout_cache[key] = layout + return layout + + +def _get_xt_table_info_layout(major: int, minor: int) -> XtTableInfoLayout: + """Return XtTableInfoLayout for the given kernel version. + Populated as a side-effect of _get_xt_table_layout (same source fetch). + Falls back to hard-coded 4.x/5.x/6.x layout on failure. + """ + key = (major, minor) + if key not in _table_info_layout_cache: + # Trigger the combined fetch (which populates _table_info_layout_cache) + _get_xt_table_layout(major, minor) + return _table_info_layout_cache.get(key, _XT_TABLE_INFO_FALLBACK) + + +# xt_table_info offsets — replaced by dynamic XtTableInfoLayout / _XT_TABLE_INFO_FALLBACK. +# Kept as documentation only; no longer used in code. +# _XT_INFO_SIZE_OFF = 0 # unsigned int size (entries blob length) +# _XT_INFO_HOOK_ENTRY_OFF = 12 # unsigned int hook_entry[5] +# _XT_INFO_UNDERFLOW_OFF = 32 # unsigned int underflow[5] +# _XT_INFO_STRUCT_SIZE = 64 # sizeof(xt_table_info); entries[] starts here + +# ipt_entry +_IPT_ENTRY_TARGET_OFF_OFF = 88 # __u16 target_offset +_IPT_ENTRY_NEXT_OFF_OFF = 90 # __u16 next_offset +_IPT_ENTRY_PCNT_OFF = 96 # __u64 pcnt +_IPT_ENTRY_BCNT_OFF = 104 # __u64 bcnt +_IPT_ENTRY_SIZE = 112 # sizeof(ipt_entry) — elems[] starts here +_IPT_IP_SIZE = 84 # sizeof(ipt_ip) + +# ip6t_entry +_IP6T_ENTRY_TARGET_OFF_OFF = 140 +_IP6T_ENTRY_NEXT_OFF_OFF = 142 +_IP6T_ENTRY_PCNT_OFF = 152 +_IP6T_ENTRY_BCNT_OFF = 160 +_IP6T_ENTRY_SIZE = 168 # sizeof(ip6t_entry) +_IP6T_IP6_SIZE = 136 # sizeof(ip6t_ip6) + +# nf_hook_entry layout (IS in ISF, but handy as constants) +_NF_HOOK_ENTRY_SIZE = 16 # confirmed from ISF +_NF_HOOK_ENTRY_PRIV_OFF = 8 # void *priv + +# --------------------------------------------------------------------------- +# ipt_ip invflags bits +# --------------------------------------------------------------------------- +IPT_INV_VIA_IN = 0x01 +IPT_INV_VIA_OUT = 0x02 +IPT_INV_SRCIP = 0x08 +IPT_INV_DSTIP = 0x10 +IPT_INV_PROTO = 0x40 + +# --------------------------------------------------------------------------- +# Protocol / misc tables +# --------------------------------------------------------------------------- + +TCP_FLAGS: Dict[int, str] = { + 0x01: "FIN", 0x02: "SYN", 0x04: "RST", + 0x08: "PSH", 0x10: "ACK", 0x20: "URG", +} + +REJECT_WITH: Dict[int, str] = { + 0: "icmp-net-unreachable", 1: "icmp-host-unreachable", + 2: "icmp-proto-unreachable", 3: "icmp-port-unreachable", + 4: "icmp-echo-reply", 5: "icmp-net-prohibited", + 6: "icmp-host-prohibited", 7: "tcp-reset", + 8: "icmp-admin-prohibited", +} + +PROTO_NAMES: Dict[int, str] = { + 0: "all", 1: "icmp", 2: "igmp", 4: "ipencap", + 6: "tcp", 17: "udp", 33: "dccp", 41: "ipv6", + 47: "gre", 50: "esp", 51: "ah", 58: "ipv6-icmp", + 89: "ospf", 94: "ipip", 103: "pim", 132: "sctp", 136: "udplite", +} + + +# --------------------------------------------------------------------------- +# Generic helpers +# --------------------------------------------------------------------------- + +def _flags_str(mask: int, table: Dict[int, str]) -> str: + return ",".join(name for bit, name in sorted(table.items()) if mask & bit) or "NONE" + + +def _cstr(raw: bytes) -> str: + """Return a C string from a bytes object, stopping at the first NUL byte.""" + end = raw.find(b"\x00") + return raw[:end].decode("ascii", errors="replace") if end >= 0 else raw.decode("ascii", errors="replace") + + +def _proto_name(proto: int) -> str: + return PROTO_NAMES.get(proto, str(proto)) if proto else "all" + + +def _ipv4_cidr(addr4: bytes, mask4: bytes) -> str: + try: + addr = socket.inet_ntoa(addr4) + prefix = bin(struct.unpack(">I", mask4)[0]).count("1") + return "0.0.0.0/0" if prefix == 0 else (addr if prefix == 32 else f"{addr}/{prefix}") + except Exception: + return addr4.hex() + + +def _ipv6_cidr(addr16: bytes, mask16: bytes) -> str: + try: + addr = socket.inet_ntop(socket.AF_INET6, addr16) + prefix = sum(bin(b).count("1") for b in mask16) + return addr if prefix == 128 else f"{addr}/{prefix}" + except Exception: + return addr16.hex() + + +# --------------------------------------------------------------------------- +# Raw memory readers (used for module structs not in ISF) +# --------------------------------------------------------------------------- + +def _read_u64(layer, addr: int) -> int: + return struct.unpack_from(" int: + return struct.unpack_from(" bool: + """Heuristic: kernel virtual addresses on x86-64 start at 0xffff…""" + return addr > 0xFFFF_0000_0000_0000 + + +def _get_kernel_version(vmlinux) -> Tuple[str, int, int]: + """Return (banner_str, major, minor) for the running kernel. + + Tries two sources in order: + 1. The ISF's constant_data for the ``linux_banner`` symbol (zero-cost). + 2. Reading the symbol's address directly from the translated layer. + + Returns ("unknown", 0, 0) if neither source yields a parseable version. + """ + banner_str = "" + + # Source 1: ISF constant_data (pre-decoded by dwarf2json) + try: + sym = vmlinux.get_symbol("linux_banner") + cd = getattr(sym, "constant_data", None) + if cd: + banner_str = base64.b64decode(cd).rstrip(b"\x00").decode("ascii", errors="replace") + except Exception: + pass + + # Source 2: read from kernel virtual address space + if not banner_str: + try: + sym_off = vmlinux.get_symbol("linux_banner").address + layer = vmlinux.context.layers[vmlinux.layer_name] + raw = layer.read(vmlinux.offset + sym_off, 512) + end = raw.find(b"\x00") + if end > 0: + banner_str = raw[:end].decode("ascii", errors="replace") + except Exception: + pass + + if not banner_str: + return ("unknown", 0, 0) + + m = re.search(r"Linux version (\d+)\.(\d+)", banner_str) + if m: + return (banner_str, int(m.group(1)), int(m.group(2))) + return (banner_str, 0, 0) + + +def _validate_raw_xt_table( + raw: bytes, layout: XtTableLayout +) -> Optional[Tuple[str, int, int, int]]: + """Validate raw bytes as an xt_table struct. + + Works directly on bytes read from physical memory — no virtual address + translation required. The pointer fields (list.next, list.prev, *private, + *me) are kernel virtual addresses stored inside the struct; we validate + their range without dereferencing them. + + Returns (name, private_ptr, af, valid_hooks) or None. + """ + if len(raw) < layout.read_size: + return None + + # list_head.next and list_head.prev live at offsets 0 and 8. + # They must both be kernel virtual addresses. + list_next = struct.unpack_from(" 0xFF: + vollog.debug( + " xt_table candidate '%s': valid_hooks=0x%x out of range — skip", + name, valid_hooks, + ) + return None + # af must be a known NFPROTO_* value (NFPROTO_NUMPROTO = 13) + if af > 13: + vollog.debug( + " xt_table candidate '%s': af=%d > 13 — skip", name, af, + ) + return None + + return name, private_ptr, af, valid_hooks + + +def _try_parse_xt_table( + layer, addr: int, layout: XtTableLayout +) -> Optional[Tuple[str, int, int, int]]: + """Try to interpret a kernel virtual address as an xt_table *. + + Used by the hook-based path which has a proper kernel virtual address. + """ + if not addr or not _is_kernel_ptr(addr): + return None + try: + raw = layer.read(addr, layout.read_size) + except Exception: + return None + return _validate_raw_xt_table(raw, layout) + + +class _PhysLayerProxy: + """Thin wrapper around a physical layer for reading kernel virtual addresses. + + When only the physical (e.g. LimeLayer) is available and no Intel64 + virtual layer was created, kernel VAs in the direct-map range can still + be converted to physical addresses via ``pa = va - page_offset_base``. + VAs in vmalloc space (modules, large vmalloc allocations) cannot be + translated this way; reads to those addresses raise InvalidAddressException. + """ + + def __init__(self, phys_layer, page_offset_base: int): + self._layer = phys_layer + self._pob = page_offset_base + + def read(self, va: int, size: int) -> bytes: + pa = va - self._pob + if pa < 0 or pa > self._layer.maximum_address: + raise exceptions.InvalidAddressException( + "LimeLayer", + va, + f"VA 0x{va:x} not in direct-map range (page_offset_base=0x{self._pob:x})", + ) + return self._layer.read(pa, size) + + +def _read_xt_table_info( + layer, private_ptr: int, info_layout: "XtTableInfoLayout" +) -> Optional[Tuple[int, List[int], List[int]]]: + """Read xt_table_info at private_ptr. + + Returns (blob_size, hook_entry[5], underflow[5]) or None on failure. + underflow[i] is the blob byte-offset of chain i's default-policy entry. + """ + try: + raw = layer.read(private_ptr, info_layout.entries_off) + blob_size = struct.unpack_from(" Optional[bytes]: + if blob_size == 0 or blob_size > 0x10_0000: + return None + blob_addr = private_ptr + info_layout.entries_off + try: + return layer.read(blob_addr, blob_size) + except Exception as exc: + vollog.debug("Cannot read entries blob at 0x%x: %s", blob_addr, exc) + return None + + +# --------------------------------------------------------------------------- +# Match / target extension decoders +# --------------------------------------------------------------------------- + +def _dec_tcp(data: bytes, _rev: int) -> str: + if len(data) < 12: + return f"(short:{data.hex()})" + sp0, sp1, dp0, dp1, opt, fmask, fcmp, inv = struct.unpack_from(" str: + if len(data) < 9: + return f"(short:{data.hex()})" + sp0, sp1, dp0, dp1, inv = struct.unpack_from(" str: + if len(data) < 4: + return f"(short:{data.hex()})" + typ, c0, c1, inv = struct.unpack_from(" str: + if len(data) < 9: + return f"(short:{data.hex()})" + mark, mask, inv = struct.unpack_from(" str: + XT_MULTI_PORTS = 15 + if len(data) < 2: + return f"(short:{data.hex()})" + flags = data[0] + count = min(int(data[1]), XT_MULTI_PORTS) + dirmap = {1: "sport", 2: "dport", 3: "port"} + direction = dirmap.get(flags, "port") + if len(data) < 2 + count * 2: + return f"{direction}:(truncated)" + ports = struct.unpack_from(f"<{count}H", data, 2) + pfl_base = 2 + XT_MULTI_PORTS * 2 + inv_off = pfl_base + XT_MULTI_PORTS + inv = bool(data[inv_off]) if len(data) > inv_off else False + pflags = (data[pfl_base : pfl_base + count] + if len(data) >= pfl_base + count else b"\x00" * count) + parts: List[str] = [] + i = 0 + while i < count: + if i + 1 < count and i < len(pflags) and pflags[i]: + parts.append(f"{ports[i]}:{ports[i+1]}") + i += 2 + else: + parts.append(str(ports[i])) + i += 1 + return f"{'!' if inv else ''}{direction} {','.join(parts)}" + + +def _dec_conntrack(data: bytes, rev: int) -> str: + # struct xt_conntrack_mtinfo1 / mtinfo2 / mtinfo3 layout (x86-64): + # + # rev=1 (mtinfo1): 4 × union nf_inet_addr (addr only, 16 B each) = 64 B prefix + # [ 0: 64] origsrc_addr, origdst_addr, replsrc_addr, repldst_addr + # [ 64: 68] u32 expires_min + # [ 68: 72] u32 expires_max + # [ 72: 74] u16 l4proto + # [ 74: 80] 4 × be16 ports (origsrc/origdst/replsrc/repldst) + # [ 80: 82] be16 repldst_port + # [ 82: 84] u16 match_flags + # [ 84: 86] u16 invert_flags + # [ 86] u8 state_mask + # [ 87] u8 status_mask + # + # rev=2/3 (mtinfo2/3): 8 × union nf_inet_addr (addr+mask pairs, 16 B each) = 128 B prefix + # [ 0:128] 4 pairs: origsrc addr/mask, origdst addr/mask, replsrc addr/mask, repldst addr/mask + # [128:132] u32 expires_min + # [132:136] u32 expires_max + # [136:138] u16 l4proto + # [138:146] 4 × be16 ports + # [146:148] u16 match_flags + # [148:150] u16 invert_flags + # [150] u8 state_mask + # [151] u8 status_mask + # v3 adds: u8 origsrc_inv, origdst_inv, replsrc_inv, repldst_inv (152-155) + CT_STATES = { + 0x01: "INVALID", 0x02: "ESTABLISHED", 0x04: "RELATED", + 0x08: "NEW", 0x40: "UNTRACKED", + } + # Use data length to determine layout, not rev — rev is often misread as 0 + # because nft_compat may hold a rev=0 xt_match pointer alongside the actual + # rev=2/3 data. + # xt_conntrack_mtinfo1 (rev=1): 4 × addr (no mask) = 64 B prefix → total ≈ 88 B + # xt_conntrack_mtinfo2/3 (rev≥2): 8 × addr+mask = 128 B prefix → total ≥ 152 B + if len(data) >= 151: + _MATCH_FLAGS_OFF = 146 + _INVERT_FLAGS_OFF = 148 + _STATE_MASK_OFF = 150 + elif rev >= 2: + _MATCH_FLAGS_OFF = 146 + _INVERT_FLAGS_OFF = 148 + _STATE_MASK_OFF = 150 + else: + _MATCH_FLAGS_OFF = 82 + _INVERT_FLAGS_OFF = 84 + _STATE_MASK_OFF = 86 + try: + if len(data) < _STATE_MASK_OFF + 1: + return f"ct(rev{rev},short)" + match_flags = struct.unpack_from(" str: + XT_LIMIT_SCALE = 10_000 + if len(data) < 8: + return f"(short:{data.hex()})" + avg, burst = struct.unpack_from("= 1.0: + return f"limit:{rps:.0f}/sec burst:{burst}" + if rps * 60 >= 1.0: + return f"limit:{rps*60:.0f}/min burst:{burst}" + if rps * 3600 >= 1.0: + return f"limit:{rps*3600:.0f}/hour burst:{burst}" + return f"limit:{rps*86400:.0f}/day burst:{burst}" + + +def _dec_state(data: bytes, _rev: int) -> str: + """Decode xt_state_info (the old '-m state' match). + + struct xt_state_info { unsigned int statemask; }; — just 4 bytes at offset 0. + Uses the same bit assignments as _dec_conntrack's CT_STATES. + """ + if len(data) < 4: + return f"(short:{data.hex()})" + (sm,) = struct.unpack_from(" str: + return repr(_cstr(data[:256])) if data else "(empty)" + + +_ICMPV6_TYPES: Dict[int, str] = { + 1: "dest-unreachable", 2: "packet-too-big", + 3: "time-exceeded", 4: "parameter-problem", + 100: "private-exp", 101: "private-exp", + 128: "echo-request", 129: "echo-reply", + 130: "mld-query", 131: "mld-report", + 132: "mld-done", 133: "router-solicitation", + 134: "router-advertisement", 135: "neighbour-solicitation", + 136: "neighbour-advertisement", 137: "redirect", + 143: "mld2-report", +} + + +def _dec_icmpv6(data: bytes, _rev: int) -> str: + if len(data) < 4: + return f"(short:{data.hex()})" + typ, c0, c1, inv = struct.unpack_from(" str: + if len(data) < 17: + return f"(short:{data.hex()})" + uid_min, uid_max, gid_min, gid_max, flags = struct.unpack_from(" str: + if len(data) < 4: + return f"(short:{data.hex()})" + src, dst = struct.unpack_from(" str: + """Decode xt_set_info_match: ip_set index (u32) then match_set name (up to 32 bytes).""" + if len(data) < 6: + return f"(short:{data.hex()})" + set_name = _cstr(data[4:4 + 32]) if len(data) >= 36 else "" + flags = data[5] + direction = {1: "src", 2: "dst", 3: "src,dst"}.get(flags & 3, "") + if set_name: + return f"--match-set {set_name!r} {direction}" if direction else f"--match-set {set_name!r}" + return f"set(data={data[:8].hex()})" + + +def _dec_connmark(data: bytes, _rev: int) -> str: + """xt_connmark_mtinfo1: mark(u32) mask(u32) invert(u8)""" + if len(data) < 8: + return f"(short:{data.hex()})" + mark, mask = struct.unpack_from(" 8 else 0 + inv_s = "!" if inv else "" + return (f"{inv_s}0x{mark:x}" if mask == 0xFFFF_FFFF + else f"{inv_s}0x{mark:x}/0x{mask:x}") + + +def _dec_iprange(data: bytes, _rev: int) -> str: + """xt_iprange_mtinfo: src_min(16) src_max(16) dst_min(16) dst_max(16) flags(u8) + The first 4 bytes of each 16-byte union nf_inet_addr is the IPv4 address.""" + if len(data) < 65: + return f"(short:{data.hex()})" + src_min = socket.inet_ntoa(data[0:4]) + src_max = socket.inet_ntoa(data[16:20]) + dst_min = socket.inet_ntoa(data[32:36]) + dst_max = socket.inet_ntoa(data[48:52]) + flags = data[64] + inv_src = "!" if flags & 0x04 else "" + inv_dst = "!" if flags & 0x08 else "" + parts: List[str] = [] + if flags & 0x01: + rng = src_min if src_min == src_max else f"{src_min}-{src_max}" + parts.append(f"--src-range {inv_src}{rng}") + if flags & 0x02: + rng = dst_min if dst_min == dst_max else f"{dst_min}-{dst_max}" + parts.append(f"--dst-range {inv_dst}{rng}") + return " ".join(parts) or f"iprange(flags=0x{flags:x})" + + +def _dec_hashlimit(data: bytes, _rev: int) -> str: + """xt_hashlimit_mtinfo1/2: name[16], cfg(mode u32, avg u32, burst u32, ...)""" + if len(data) < 28: + return f"(short:{data.hex()})" + name = _cstr(data[0:16]) + mode, avg, burst = struct.unpack_from("= 1.0: + rate_s = f"{rps:.0f}/sec" + elif rps * 60 >= 1.0: + rate_s = f"{rps*60:.0f}/min" + elif rps * 3600 >= 1.0: + rate_s = f"{rps*3600:.0f}/hour" + else: + rate_s = f"{rps*86400:.0f}/day" + s = f"--hashlimit {rate_s} --hashlimit-burst {burst} --hashlimit-mode {mode_s}" + if name: + s += f" --hashlimit-name {name!r}" + return s + + +def _dec_recent(data: bytes, _rev: int) -> str: + """xt_recent_mtinfo: seconds(u32) hit_count(u32) check_set(u8) invert(u8) name[200] side(u8)""" + if len(data) < 12: + return f"(short:{data.hex()})" + seconds, hit_count = struct.unpack_from("= 210 else "" + inv_s = "!" if invert else "" + ops = {0x01: "--set", 0x02: "--rcheck", 0x04: "--update", 0x08: "--remove"} + op = next((v for k, v in ops.items() if check_set & k), "--rcheck") + parts = [f"{inv_s}{op}"] + if name: + parts.append(f"--name {name!r}") + if seconds: + parts.append(f"--seconds {seconds}") + if hit_count: + parts.append(f"--hitcount {hit_count}") + return " ".join(parts) + + +def _dec_string(data: bytes, _rev: int) -> str: + """xt_string_info: from(u16) to(u16) algo[16] pattern[128] patlen(u8) flags(u8)""" + if len(data) < 22: + return f"(short:{data.hex()})" + from_off, to_off = struct.unpack_from(" 148 else min(len(data) - 20, 128) + pattern = _cstr(data[20: 20 + min(patlen, 128)]) + flags = data[149] if len(data) > 149 else 0 + inv_s = "!" if flags & 0x01 else "" + s = f"{inv_s}--string {pattern!r}" + if algo: + s += f" --algo {algo}" + return s + + +def _dec_length(data: bytes, _rev: int) -> str: + """xt_length_info: min(u16) max(u16) invert(u8)""" + if len(data) < 4: + return f"(short:{data.hex()})" + lo, hi = struct.unpack_from(" 4 else 0 + inv_s = "!" if inv else "" + return (f"--length {inv_s}{lo}" if lo == hi else f"--length {inv_s}{lo}:{hi}") + + +def _dec_mac(data: bytes, _rev: int) -> str: + """xt_mac_info: srcaddr[6] + (padding 2) + invert(int 4)""" + if len(data) < 6: + return f"(short:{data.hex()})" + mac = ":".join(f"{b:02x}" for b in data[0:6]) + inv = struct.unpack_from("= 12 else 0 + return f"{'!' if inv else ''}--mac-source {mac}" + + +def _dec_physdev(data: bytes, _rev: int) -> str: + """xt_physdev_info: physindev[16] in_mask[16] physoutdev[16] out_mask[16] invert(u8) bitmask(u8)""" + if len(data) < 66: + return f"(short:{data.hex()})" + indev = _cstr(data[0:16]) + outdev = _cstr(data[32:48]) + invert = data[64] + bmask = data[65] + parts: List[str] = [] + if bmask & 0x01 and indev: + parts.append(f"{'!' if invert & 0x04 else ''}--physdev-in {indev}") + if bmask & 0x02 and outdev: + parts.append(f"{'!' if invert & 0x08 else ''}--physdev-out {outdev}") + if bmask & 0x04: + parts.append("--physdev-is-in") + if bmask & 0x08: + parts.append("--physdev-is-out") + if bmask & 0x10: + parts.append("--physdev-is-bridged") + return " ".join(parts) or f"physdev(bmask=0x{bmask:x})" + + +_PKTTYPE_NAMES = {0: "unicast", 1: "broadcast", 2: "multicast", + 3: "otherhost", 4: "outgoing"} + + +def _dec_pkttype(data: bytes, _rev: int) -> str: + """xt_pkttype_info: pkttype(int) invert(int)""" + if len(data) < 4: + return f"(short:{data.hex()})" + pkttype = struct.unpack_from("= 8 else 0 + name = _PKTTYPE_NAMES.get(pkttype, str(pkttype)) + return f"{'!' if inv else ''}--pkt-type {name}" + + +def _dec_statistic(data: bytes, _rev: int) -> str: + """xt_statistic_info: mode(u16) pad(u16) union{nth{every,packet,count}, random{probability}}""" + if len(data) < 6: + return f"(short:{data.hex()})" + mode = struct.unpack_from("= 12 else (0, 0) + return f"--mode nth --every {every} --packet {packet}" + else: # random + prob = struct.unpack_from("= 8 else 0 + # probability is scaled: 0x80000000 = 50%, 0xFFFFFFFF = 100% + pct = prob / 0xFFFF_FFFF * 100 + return f"--mode random --probability {pct:.4f}" + + +_MATCH_DECODERS = { + "tcp": _dec_tcp, + "udp": _dec_udp, + "icmp": _dec_icmp, + "icmpv6": _dec_icmpv6, + "mark": _dec_mark, + "multiport": _dec_multiport, + "conntrack": _dec_conntrack, + "state": _dec_state, + "limit": _dec_limit, + "comment": _dec_comment, + "owner": _dec_owner, + "addrtype": _dec_addrtype, + "set": _dec_set, + "connmark": _dec_connmark, + "iprange": _dec_iprange, + "hashlimit": _dec_hashlimit, + "recent": _dec_recent, + "string": _dec_string, + "length": _dec_length, + "mac": _dec_mac, + "physdev": _dec_physdev, + "pkttype": _dec_pkttype, + "statistic": _dec_statistic, +} + + +def _decode_match(name: str, rev: int, data: bytes, + kver: Tuple[int, int] = (0, 0)) -> str: + # Tier 1 — hardcoded decoder (best human-readable output) + decoder = _MATCH_DECODERS.get(name) + if decoder: + try: + return decoder(data, rev) + except Exception as exc: + vollog.debug("match %s hardcoded decode error: %s", name, exc) + + # Tier 3 — dynamic struct fetch from kernel source on GitHub + if kver != (0, 0) and name in _EXT_HEADERS: + header_path, struct_names = _EXT_HEADERS[name] + fields = _fetch_extension_struct(kver[0], kver[1], header_path, struct_names, rev) + if fields: + decoded = _generic_decode_fields(data, fields) + if decoded: + return decoded + + # Tier 2 — lossless raw hex fallback (never silently drop extension data) + return f"(raw:{data[:48].hex()})" if data else "" + + +# --------------------------------------------------------------------------- +# Target decoders +# --------------------------------------------------------------------------- + +def _tdec_reject(data: bytes) -> str: + if len(data) < 4: + return "REJECT" + (w,) = struct.unpack_from(" str: + if len(data) < 32: + return "LOG" + level = data[0] + prefix = _cstr(data[2:32]) + return f"LOG level:{level}" + (f" prefix:{prefix!r}" if prefix else "") + + +def _tdec_nflog(data: bytes) -> str: + if len(data) < 76: + return "NFLOG" + _, group = struct.unpack_from(" str: + if len(data) < 24: + return "SNAT" + _rsz, _flags, min_ip, max_ip = struct.unpack_from("H", data, 16)[0] + max_port = struct.unpack_from(">H", data, 18)[0] + ip_str = min_str if min_ip == max_ip else f"{min_str}-{max_str}" + port_str = (f":{min_port}" if min_port == max_port and min_port + else f":{min_port}-{max_port}" if min_port else "") + return f"SNAT to:{ip_str}{port_str}" + + +def _tdec_dnat(data: bytes) -> str: + return _tdec_snat(data).replace("SNAT", "DNAT", 1) + + +def _tdec_masq(data: bytes) -> str: + if len(data) < 20: + return "MASQUERADE" + min_port = struct.unpack_from(">H", data, 16)[0] if len(data) >= 18 else 0 + max_port = struct.unpack_from(">H", data, 18)[0] if len(data) >= 20 else 0 + port_str = (f":{min_port}" if min_port == max_port and min_port + else f":{min_port}-{max_port}" if min_port else "") + return f"MASQUERADE{port_str}" + + +def _tdec_redirect(data: bytes) -> str: + # struct nf_nat_ipv4_multi_range_compat: rangesize(u32) + nf_nat_ipv4_range[1] + # nf_nat_ipv4_range: flags(u32) + min_ip(u32) + max_ip(u32) + min_port(be16) + max_port(be16) + if len(data) < 20: + return "REDIRECT" + min_port, max_port = struct.unpack_from(">HH", data, 16) + if min_port == 0 and max_port == 0: + return "REDIRECT" + port_str = str(min_port) if min_port == max_port else f"{min_port}:{max_port}" + return f"REDIRECT --to-ports {port_str}" + + +def _tdec_mark_tgt(data: bytes) -> str: + """Decode XT_MARK target (struct xt_mark_tginfo2: mark + mask, each u32).""" + if len(data) < 8: + return "MARK" + mark, mask = struct.unpack_from(" str: + """Decode CONNMARK target (struct xt_connmark_tginfo1: ctmark,ctmask,nfmask u32, mode u8).""" + if len(data) < 13: + return "CONNMARK" + ctmark, ctmask, nfmask, mode = struct.unpack_from(" str: + """xt_tcpmss_info: mss(u16). 0xFFFF means clamp-to-PMTU.""" + if len(data) < 2: + return "TCPMSS" + mss = struct.unpack_from(" str: + """xt_NFQ_info (rev 0-3): queuenum(u16) [queues_total(u16)] [flags(u16)]""" + if len(data) < 2: + return "NFQUEUE" + qnum = struct.unpack_from(" 1: + s += f" --queue-balance {qnum}:{qnum + total - 1}" + if len(data) >= 6: + flags = struct.unpack_from(" str: + """xt_tproxy_target_info (rev 0): mark_mask(u32) mark_value(u32) laddr(be32) lport(be16)""" + if len(data) < 14: + return "TPROXY" + mark_mask, mark_value = struct.unpack_from("H", data, 12)[0] + s = "TPROXY" + if laddr != "0.0.0.0": + s += f" --on-ip {laddr}" + if lport: + s += f" --on-port {lport}" + if mark_value: + mask_s = f"/0x{mark_mask:x}" if mark_mask != 0xFFFF_FFFF else "" + s += f" --tproxy-mark 0x{mark_value:x}{mask_s}" + return s + + +def _tdec_set_tgt(data: bytes) -> str: + """xt_set_info_target_v*: two xt_set_info (add_set, del_set), each index(u16)+dim(u8)+flags(u8).""" + if len(data) < 4: + return "SET" + add_idx, add_dim, add_flags = struct.unpack_from("= 8: + del_idx, del_dim, del_flags = struct.unpack_from(" str: + """xt_tos_target_info: tos_value(u8) tos_mask(u8)""" + if len(data) < 1: + return "TOS" + tos = data[0] + mask = data[1] if len(data) > 1 else 0xFF + return (f"TOS --set-tos 0x{tos:02x}" if mask == 0xFF + else f"TOS --set-tos 0x{tos:02x}/0x{mask:02x}") + + +def _tdec_dscp(data: bytes) -> str: + """xt_dscp_target_info: dscp(u8)""" + if len(data) < 1: + return "DSCP" + dscp = data[0] & 0x3F + return f"DSCP --set-dscp 0x{dscp:02x}" + + +def _tdec_synproxy(data: bytes) -> str: + """xt_synproxy_info: options(u32) wscale(u8) mss_idx(u8) mss(u16)""" + if len(data) < 8: + return "SYNPROXY" + options, wscale, _, mss = struct.unpack_from(" str: + """NETMAP uses same struct as SNAT/DNAT.""" + return _tdec_snat(data).replace("SNAT", "NETMAP", 1) + + +_TARGET_DECODERS = { + "REJECT": _tdec_reject, + "LOG": _tdec_log, + "NFLOG": _tdec_nflog, + "SNAT": _tdec_snat, + "DNAT": _tdec_dnat, + "MASQUERADE": _tdec_masq, + "REDIRECT": _tdec_redirect, + "MARK": _tdec_mark_tgt, + "CONNMARK": _tdec_connmark, + "TCPMSS": _tdec_tcpmss, + "NFQUEUE": _tdec_nfqueue, + "TPROXY": _tdec_tproxy, + "SET": _tdec_set_tgt, + "TOS": _tdec_tos, + "DSCP": _tdec_dscp, + "SYNPROXY": _tdec_synproxy, + "NETMAP": _tdec_netmap, +} + + +def _decode_target(name: str, data: bytes, + kver: Tuple[int, int] = (0, 0)) -> str: + # Tier 1 — hardcoded decoder + decoder = _TARGET_DECODERS.get(name) + if decoder: + try: + return decoder(data) + except Exception as exc: + vollog.debug("target %s hardcoded decode error: %s", name, exc) + + # Tier 3 — dynamic struct fetch from kernel source on GitHub + if kver != (0, 0) and name in _EXT_HEADERS: + header_path, struct_names = _EXT_HEADERS[name] + fields = _fetch_extension_struct(kver[0], kver[1], header_path, struct_names, 0) + if fields: + decoded = _generic_decode_fields(data, fields) + if decoded: + return f"{name} {decoded}" + + # Tier 2 — lossless raw hex fallback + return f"{name}(raw:{data[:48].hex()})" if data else name + + +# --------------------------------------------------------------------------- +# Entry blob parsing +# --------------------------------------------------------------------------- + +# Offset of the kernel.match / kernel.target pointer within the 32-byte +# xt_entry_match / xt_entry_target header. In kernel memory the union member +# kernel.match (8-byte pointer, needs 8-byte alignment) is placed at union +# offset 8 (after the 2-byte match_size + 6 bytes of alignment padding), +# overwriting user.name[6..13]. Bytes 2-7 (user.name[0..5]) are preserved. +_KT_POINTER_OFF = 8 # kernel.match / kernel.target ptr within header +# xt_match / xt_target: list_head (16 bytes) then char name[30]. +_XT_MATCH_NAME_OFF = 16 + + +def _read_xt_name(header_bytes: bytes, layer) -> str: + """Return the match/target name from kernel memory. + + Strategy: + 1. Read the kernel.match pointer at _KT_POINTER_OFF within the header. + 2. Dereference pointer + _XT_MATCH_NAME_OFF via *layer* to get the name + from the live xt_match / xt_target struct (authoritative, works for + any name length). + 3. Fall back to reading bytes[2:] of the header with first-NUL stop. + This is correct for names whose NUL byte falls before _KT_POINTER_OFF + (i.e., names ≤ 5 chars: "udp", "tcp", "state", "ERROR", etc.) and + for the standard verdict target (name = ""). + """ + # --- strategy 1: dereference the kernel.match pointer --- + if len(header_bytes) >= _KT_POINTER_OFF + 8: + ptr = struct.unpack_from(" List[str]: + results: List[str] = [] + pos = 0 + while pos + MATCH_HEADER_SIZE <= len(elems_slice): + raw_size = struct.unpack_from(" len(elems_slice): + break + header = elems_slice[pos : pos + MATCH_HEADER_SIZE] + name = _read_xt_name(header, layer) + rev = header[2 + XT_FUNCTION_MAXNAMELEN - 1] if len(header) > 2 + XT_FUNCTION_MAXNAMELEN - 1 else 0 + data = elems_slice[pos + MATCH_HEADER_SIZE : pos + raw_size] + decoded = _decode_match(name, rev, data, kver) + results.append(f"-m {name}" + (f" {decoded}" if decoded else "")) + pos += raw_size + return results + + +def _parse_target(elems_slice: bytes, tgt_pos: int, layer, + kver: Tuple[int, int] = (0, 0)) -> Tuple[str, str]: + """Returns (raw_name, human_string). + raw_name == "" → standard verdict (ACCEPT/DROP/RETURN/JUMP) + raw_name == "ERROR" → user-chain header or end sentinel + """ + if tgt_pos + MATCH_HEADER_SIZE > len(elems_slice): + return ("?", "?") + raw_size = struct.unpack_from("= 4: + verdict = struct.unpack_from("= 0 else f"UNKNOWN({verdict})", + ) + return ("", label) + return ("", "?") + return (name, _decode_target(name, data, kver)) + + +def _parse_ipv4_entry(blob: bytes, offset: int, layer, + percpu_offsets: Optional[List[int]] = None, + kver: Tuple[int, int] = (0, 0)) -> Optional[dict]: + if offset + _IPT_ENTRY_SIZE > len(blob): + return None + target_off, next_off = struct.unpack_from(" len(blob): + return None + pcnt_raw, bcnt_raw = struct.unpack_from(" Optional[dict]: + if offset + _IP6T_ENTRY_SIZE > len(blob): + return None + target_off, next_off = struct.unpack_from(" len(blob): + return None + pcnt_raw, bcnt_raw = struct.unpack_from(" 132 else 0 + + if invflags & IPT_INV_SRCIP: src = "!" + src + if invflags & IPT_INV_DSTIP: dst = "!" + dst + proto_s = ("!" if invflags & IPT_INV_PROTO else "") + _proto_name(proto_num) + ini_s = ("!" if invflags & IPT_INV_VIA_IN else "") + ini if ini else "*" + outi_s = ("!" if invflags & IPT_INV_VIA_OUT else "") + outi if outi else "*" + + elems = blob[offset + _IP6T_ENTRY_SIZE : offset + next_off] + match_end = target_off - _IP6T_ENTRY_SIZE + matches = _parse_matches(elems[:match_end], layer, kver) + t_name, t_str = _parse_target(elems, match_end, layer, kver) + + return dict(next_off=next_off, t_name=t_name, t_str=t_str, + target_off=target_off, + src=src, dst=dst, proto=proto_s, ini=ini_s, outi=outi_s, + matches="; ".join(matches), pkts=pcnt, bytes=bcnt) + + +def _extract_error_chain_name(blob: bytes, offset: int, + entry_hdr_size: int, target_off_field_off: int, + layer) -> str: + """Return the chain name from an ERROR target entry. + + The ERROR target stores the chain name in its *data* section (right after + the 32-byte xt_entry_target header), NOT in the name field. It is a plain + C string written by the kernel at rule-load time and is NOT overwritten by + the kernel.target pointer, so _cstr(find-NUL) is sufficient here. + """ + try: + target_off = struct.unpack_from(" Dict[int, str]: + result: Dict[int, str] = {} + for i, name in enumerate(NF_INET_HOOKS): + if valid_hooks & (1 << i): + result[hook_entry[i]] = name + return result + + +def _build_underflow_set(valid_hooks: int, underflow: List[int]) -> set: + """Return the set of blob offsets that are default-policy entries. + + underflow[i] == 0xFFFFFFFF means hook i is not active in this table. + """ + result = set() + for i in range(len(underflow)): + if (valid_hooks & (1 << i)) and underflow[i] != 0xFFFF_FFFF: + result.add(underflow[i]) + return result + + +def _build_user_chain_map( + blob: bytes, + entry_hdr_size: int, + target_off_field_off: int, + layer, +) -> Dict[int, str]: + """Pre-scan the entries blob and return {blob_offset: chain_name} for user chains. + + User-defined chains are headed by an ERROR-target entry whose target.data[] + contains the chain name. A JUMP rule's verdict is a positive integer equal + to the blob offset of that ERROR header. This map lets _walk_entries resolve + JUMP@+offset → human chain name. + """ + result: Dict[int, str] = {} + off = 0 + while off + entry_hdr_size <= len(blob): + t_off_pos = off + target_off_field_off + if t_off_pos + 4 > len(blob): + break + next_off = struct.unpack_from(" len(blob): + break + off += next_off + return result + + +# --------------------------------------------------------------------------- +# Generic entry walker +# --------------------------------------------------------------------------- + +def _walk_entries( + blob: bytes, + cmap: Dict[int, str], + underflow_set: set, + tbl_name: str, + netns_id, + af: str, + entry_hdr_size: int, + target_off_field_off: int, + parse_fn, + layer, + percpu_offsets: Optional[List[int]] = None, + kver: Tuple[int, int] = (0, 0), +) -> Iterator[Tuple]: + # Pre-scan: build a map of blob_offset → user-chain name for JUMP resolution. + user_chains = _build_user_chain_map(blob, entry_hdr_size, target_off_field_off, layer) + + chain = "UNKNOWN" + rule_num = 0 + offset = 0 + + while offset < len(blob): + if offset in cmap: + chain = cmap[offset] + rule_num = 0 + + entry = parse_fn(blob, offset, layer, percpu_offsets, kver) + if entry is None: + vollog.debug("[%s/%s] unparseable entry at blob+%d", af, tbl_name, offset) + break + + next_off = entry["next_off"] + t_name = entry["t_name"] + + if t_name == "ERROR": + chain_name = _extract_error_chain_name( + blob, offset, entry_hdr_size, target_off_field_off, layer + ) + if not chain_name or chain_name == "ERROR": + break # end-of-table sentinel + chain = chain_name + rule_num = 0 + offset += next_off + continue + + # Policy (default) entries sit at underflow[] offsets — mark them so + # the analyst can distinguish them from explicit rules. + is_policy = offset in underflow_set + display_chain = f"{chain} (default policy)" if is_policy else chain + + # Resolve JUMP@+ to a user-chain name when possible. + t_str = entry["t_str"] + if t_str.startswith("JUMP@+"): + try: + jump_off = int(t_str[6:]) + t_str = user_chains.get(jump_off, t_str) + except ValueError: + pass + + yield (0, ( + netns_id, af, tbl_name, display_chain, rule_num, + entry["src"], entry["dst"], entry["proto"], + entry["ini"], entry["outi"], + entry["matches"], t_str, + entry["pkts"], entry["bytes"], + )) + rule_num += 1 + offset += next_off + + +# --------------------------------------------------------------------------- +# Module-list check +# --------------------------------------------------------------------------- + +_IPTABLES_MODULE_NAMES = frozenset({ + "ip_tables", "iptable_filter", "iptable_nat", "iptable_mangle", + "iptable_raw", "iptable_security", + "ip6_tables", "ip6table_filter", "ip6table_nat", "ip6table_mangle", +}) + + +def _loaded_iptables_modules(vmlinux) -> List[str]: + """Walk the kernel module list and return any ip(6)tables-related modules. + + Uses the kernel's doubly-linked ``modules`` list (symbol ``modules``, + type ``module``). Returns [] if the symbol/type is absent or the walk + fails (e.g. no debug symbols for ``struct module``). + """ + found: List[str] = [] + try: + if not vmlinux.has_symbol("modules") or not vmlinux.has_type("module"): + vollog.debug("modules symbol/type absent — cannot check loaded modules") + return found + + mod_type = vmlinux.symbol_table_name + constants.BANG + "module" + head = vmlinux.object_from_symbol("modules") + layer = vmlinux.context.layers[vmlinux.layer_name] + + for mod in head.to_list(mod_type, "list"): + try: + # char name[MODULE_NAME_LEN] — read raw bytes from the layer + raw = layer.read(mod.name.vol.offset, 64) + name = raw.split(b"\x00")[0].decode("ascii", errors="replace") + if name in _IPTABLES_MODULE_NAMES: + found.append(name) + except Exception: + continue + except Exception as exc: + vollog.debug("Cannot walk module list: %s", exc) + return found + + +# --------------------------------------------------------------------------- +# Memory scan fallback +# --------------------------------------------------------------------------- + + +def _get_page_offset_base(vmlinux) -> int: + """Read the ``page_offset_base`` kernel variable (virtual base of direct map). + + Falls back to the canonical default for 5.x x86-64 if the symbol is not + readable. + """ + try: + layer = vmlinux.context.layers[vmlinux.layer_name] + sym = vmlinux.get_symbol("page_offset_base") + pob_vaddr = vmlinux.offset + sym.address + pob = struct.unpack_from(" Optional[List[int]]: + """Return per-CPU base offsets for counter resolution. + + On SMP kernels (nr_cpu_ids > 1), xt_percpu_counter_alloc() stores a + percpu *offset* in ipt_entry.counters.pcnt instead of the packet count. + The actual count for CPU n lives at pcnt_offset + __per_cpu_offset[n]. + + Returns a list of __per_cpu_offset values (one per possible CPU) if SMP, + or None if the kernel is single-CPU (counters are inline u64 values). + """ + try: + nr_sym = vmlinux.get_symbol("nr_cpu_ids") + layer = vmlinux.context.layers[vmlinux.layer_name] + nr_cpus = struct.unpack_from(" Tuple[int, int]: + """Return (packets, bytes) for a rule entry. + + Single-CPU kernels (percpu_offsets is None): pcnt_raw and bcnt_raw are + directly the packet/byte totals stored inline in ipt_entry.counters. + + SMP kernels (percpu_offsets provided): pcnt_raw is the percpu allocation + offset (NOT a packet count). The real xt_counters live at + pcnt_raw + __per_cpu_offset[cpu] for each CPU. Sum across all CPUs. + """ + if percpu_offsets is None: + return pcnt_raw, bcnt_raw + + if pcnt_raw == 0: + return 0, 0 + + pkts = 0 + byts = 0 + for cpu_off in percpu_offsets: + try: + addr = (pcnt_raw + cpu_off) & 0xFFFF_FFFF_FFFF_FFFF + p, b = struct.unpack_from(" Tuple[str, int, int]: + """Scan physical memory for the Linux kernel version banner (no ISF needed). + + Used when the plugin runs without a symbol table (--scan-only or when no + matching ISF is available). Returns (banner_str, major, minor), or + ("unknown", 0, 0) if nothing is found. + """ + try: + virt_layer = context.layers[layer_name] + phys_name = virt_layer.config.get("memory_layer") + phys_layer = context.layers[phys_name] if phys_name else virt_layer + prefix = b"Linux version " + for offset, _ in phys_layer.scan( + context=context, + scanner=scanners.MultiStringScanner([prefix]), + ): + try: + raw = phys_layer.read(offset, 256) + end = raw.find(b"\x00") + text = raw[: end if end > 0 else 256].decode("ascii", errors="replace") + m = re.search(r"Linux version (\d+)\.(\d+)", text) + if m: + vollog.debug("Banner found at phys 0x%x: %s", offset, text[:80]) + return text, int(m.group(1)), int(m.group(2)) + except Exception: + continue + except Exception as exc: + vollog.debug("Banner scan failed: %s", exc) + return "unknown", 0, 0 + + +def _scan_for_xt_tables( + context, + layer_name: str, + layout: XtTableLayout, + page_offset_base: int = 0xFFFF_8880_0000_0000, + progress_callback=None, +) -> Dict[int, Tuple[str, int, int, int]]: + """Scan **physical** memory for xt_table structs. + + WHY PHYSICAL: + The Intel64 virtual layer's maximum_address is (1<<48)-1. All canonical + kernel addresses (0xffff…) exceed that, so sections in the kernel range + get clipped to nothing by the scanner. Scanning physical memory avoids + that limitation and is faster (no per-chunk page-table translation). + + WHY NOT page_offset_base FOR VALIDATION: + xt_table structs are static variables inside kernel modules (e.g. + iptable_filter.ko). Module data lives in vmalloc space (~0xffffc9…), + NOT in the direct physical mapping (~0xffff88…). Translating physical + hit addresses through page_offset_base gives a wrong virtual address for + those pages, causing the virtual-layer read to fail or return garbage. + + FIX — read and validate from the physical layer directly: + 1. Scan physical layer for known table-name byte patterns (Aho-Corasick). + 2. For each hit at physical address P, compute struct base: P - name_off. + 3. Read layout.read_size bytes from the **physical layer** at that offset. + 4. Validate using _validate_raw_xt_table() which checks pointer ranges on + the raw bytes (no virtual-layer translation needed). + 5. Store the extracted private_ptr (a real kernel VA) for later use with + the virtual layer to read xt_table_info and the entries blob. + + Namespace detection still needs the xt_table's virtual address. We + derive it from list.next: the next struct's VA is stored in the struct + itself, so we can walk the list using virtual addresses via the virtual + layer once we know one concrete VA (obtained from private_ptr or list + pointers in the raw bytes). + + Returns {private_ptr: (name, private_ptr, af, valid_hooks)}. The key is + private_ptr (a kernel VA) rather than a translated virtual address, since + we cannot reliably compute the struct's VA from physical for vmalloc pages. + """ + virt_layer = context.layers[layer_name] + phys_layer_name = virt_layer.config.get("memory_layer") + phys_layer = context.layers[phys_layer_name] if phys_layer_name else virt_layer + + phys_size = int(phys_layer.maximum_address) + 1 + vollog.info( + "Scanning physical memory 0x0 – 0x%x (%d MiB) for xt_table name strings " + "(layout: name_off=%d, valid_hooks_off=%d, private_off=%d, af_off=%d)", + phys_layer.maximum_address, phys_size >> 20, + layout.name_off, layout.valid_hooks_off, layout.private_off, layout.af_off, + ) + + # Scan for each known table name followed immediately by a NUL byte. + # The name field is char name[32], so "filter\0" occupies bytes 0-6 of + # a 32-byte field — the pattern matches reliably. + # Search for each table name immediately followed by \x00. + # The name field is char name[XT_TABLE_MAXNAMELEN] = char name[32], so + # "filter\x00" reliably matches the start of that field. + patterns = [name.encode() + b"\x00" for name in sorted(KNOWN_TABLE_NAMES)] + hits_total = 0 + hits_validated = 0 + + # ----------------------------------------------------------------------- + # Name-offset candidates to try for each hit. + # Priority order: fetched layout value first, then the two known variants. + # Using dict.fromkeys preserves order while deduplicating. + # ----------------------------------------------------------------------- + _name_offs = list(dict.fromkeys([layout.name_off, 56, 48])) + + # Read enough bytes to cover the largest possible layout. + # layout.read_size covers name_off + 32; add 8 bytes margin. + _READ_SIZE = max(layout.read_size + 8, 96) + + vollog.warning( + "Scan using layout: list@0 valid_hooks@%d private@%d me@%d af@%d " + "name candidates: %s read_size=%d", + layout.valid_hooks_off, layout.private_off, layout.me_off, layout.af_off, + _name_offs, _READ_SIZE, + ) + + # Key: private_ptr (kernel VA of xt_table_info). + found: Dict[int, Tuple] = {} + + try: + for hit_phys, _matched in phys_layer.scan( + context=context, + scanner=scanners.MultiStringScanner(patterns), + progress_callback=progress_callback, + ): + hits_total += 1 + + for name_off_try in _name_offs: + struct_phys = hit_phys - name_off_try + if struct_phys < 0: + continue + + try: + raw = phys_layer.read(struct_phys, _READ_SIZE) + except Exception: + continue + + if len(raw) < _READ_SIZE: + continue + + # ---- validate using layout-derived field offsets ----------- + # list.next and list.prev are always at 0 and 8 (list_head first). + list_next = struct.unpack_from(" len(raw): + continue + valid_hooks = struct.unpack_from(" 0xFF: + continue + + if layout.private_off + 8 > len(raw): + continue + private_ptr = struct.unpack_from(" len(raw): + continue + me_ptr = struct.unpack_from("= len(raw): + continue + af = raw[layout.af_off] + if af > 13: + continue + + # Confirm name at this candidate offset + tbl_name = _cstr(raw[name_off_try : name_off_try + 32]) + if tbl_name not in KNOWN_TABLE_NAMES: + continue + + # ---- accepted ----------------------------------------------- + hits_validated += 1 + parsed = (tbl_name, private_ptr, af, valid_hooks) + + if private_ptr not in found: + found[private_ptr] = parsed + vollog.debug( + " xt_table '%s' phys=0x%x (name_off=%d) private=0x%x " + "af=%d valid_hooks=0x%x list.next=0x%x", + tbl_name, struct_phys, name_off_try, private_ptr, + af, valid_hooks, list_next, + ) + break # don't re-validate the same hit with the other name_off + + except Exception as exc: + vollog.warning("xt_table physical scan error: %s", exc) + + # Always show hit counts at WARNING so they're visible without -v flags. + if hits_total == 0: + vollog.warning( + "Physical scan found 0 occurrences of table name strings " + "('filter', 'nat', 'mangle', 'raw', 'security'). " + "The ip_tables / iptable_filter kernel module is likely NOT loaded. " + "On iptables-nft systems the rule lives in nftables structures — " + "use the linux.netfilter plugin instead, or verify with " + "'lsmod | grep ip_tables' on the source system." + ) + elif hits_validated == 0: + vollog.warning( + "Physical scan: %d name-string hit(s) for table names, but 0 passed " + "struct validation (name_off tried: %s; valid_hooks@%d private@%d " + "me@%d af@%d). " + "Most likely cause: the individual iptables table modules are not loaded. " + "Having ip_tables.ko loaded is NOT sufficient — xt_table structs are " + "only created when the per-table modules load: iptable_filter.ko, " + "iptable_nat.ko, iptable_mangle.ko, iptable_raw.ko. " + "On nft_compat / iptables-nft systems, ip_tables.ko may be present as " + "a dependency while all rules live in nftables structures. " + "Verify on the source system: lsmod | grep -E 'iptable_|ip6table_'", + hits_total, _name_offs, + layout.valid_hooks_off, layout.private_off, layout.me_off, layout.af_off, + ) + else: + vollog.warning( + "Physical scan: %d name-string hit(s), %d passed struct validation " + "(name_off tried: %s; af@%d me@%d from fetched layout).", + hits_total, hits_validated, + _name_offs, layout.af_off, layout.me_off, + ) + return found + + + +# --------------------------------------------------------------------------- +# Plugin +# --------------------------------------------------------------------------- + +class IPTables(plugins.PluginInterface): + """Extracts iptables / ip6tables rules from a Linux memory image. + + Works with kernels where ip_tables / ip6_tables are compiled as modules + (the common case on Ubuntu, Debian and similar distros). Reaches the + xt_table structs via nf_hook_entries.hooks[i].priv, which always holds a + pointer to the xt_table when the hook was registered by ip_tables.ko. + """ + + _required_framework_version = (2, 0, 0) + _version = (2, 1, 0) + + @classmethod + def get_requirements(cls) -> List[interfaces.configuration.RequirementInterface]: + return [ + requirements.TranslationLayerRequirement( + name="primary", + description="Memory layer to scan (physical or virtual)", + ), + requirements.ModuleRequirement( + name="kernel", + description="Linux kernel (optional — required only for hook-walk mode)", + architectures=["Intel32", "Intel64"], + optional=True, + ), + requirements.VersionRequirement( + name="Net", + component=network.NetSymbols, + version=(1, 0, 0), + ), + requirements.BooleanRequirement( + name="scan_only", + description=( + "Skip hook-walk and ISF; go straight to physical memory scan. " + "Use this when no matching ISF is available for the image." + ), + default=False, + optional=True, + ), + ] + + # ------------------------------------------------------------------ + # ISF checks (only for types that ARE in the kernel ISF) + # ------------------------------------------------------------------ + + @classmethod + def _check_isf(cls, vmlinux) -> None: + if not vmlinux.has_symbol("net_namespace_list"): + raise exceptions.PluginRequirementException( + "Symbol 'net_namespace_list' not found." + ) + for req in ("netns_nf", "nf_hook_entries", "nf_hook_entry"): + if not vmlinux.has_type(req): + raise exceptions.PluginRequirementException( + f"Type '{req}' not found in ISF." + ) + nf = vmlinux.get_type("netns_nf") + if not nf.has_member("hooks_ipv4"): + vollog.warning( + "netns_nf has no 'hooks_ipv4' member (kernel < 4.16?). " + "Hook walk will be skipped; falling back to memory scan." + ) + + # ------------------------------------------------------------------ + # Namespace iterator + # ------------------------------------------------------------------ + + def _iter_namespaces(self, vmlinux): + net_sym = vmlinux.symbol_table_name + constants.BANG + "net" + nethead = vmlinux.object_from_symbol("net_namespace_list") + for net in nethead.to_list(net_sym, "list"): + try: + inum = str(int(net.ns.inum)) + except Exception: + inum = "-" + yield inum, net + + # ------------------------------------------------------------------ + # Hook-entry priv iterator → deduplicated xt_table addresses + # ------------------------------------------------------------------ + + def _collect_xt_tables(self, vmlinux, net, layout: XtTableLayout) -> Dict[int, Tuple]: + """Walk nf_hook_entries for IPv4 and IPv6, collect unique xt_table addresses. + + Returns {xt_table_addr: (tbl_name, private_ptr, af, valid_hooks)}. + """ + layer = self.context.layers[vmlinux.layer_name] + tables: Dict[int, Tuple] = {} + nf_hook_entry_sym = vmlinux.symbol_table_name + constants.BANG + "nf_hook_entry" + + # IPv4 hooks: net.nf.hooks_ipv4[0..4] + # IPv6 hooks: net.nf.hooks_ipv6[0..4] + hook_arrays = [] + try: + hook_arrays.append(("IPv4", net.nf.hooks_ipv4)) + except Exception: + pass + try: + hook_arrays.append(("IPv6", net.nf.hooks_ipv6)) + except Exception: + pass + + for af_label, hook_arr in hook_arrays: + for hook_idx in range(5): + try: + entries_ptr = hook_arr[hook_idx] + entries_addr = int(entries_ptr) + if not entries_addr or not _is_kernel_ptr(entries_addr): + continue + entries_obj = vmlinux.object( + type_name=vmlinux.symbol_table_name + constants.BANG + "nf_hook_entries", + offset=entries_addr, + native_layer_name=vmlinux.layer_name, + ) + num_hooks = int(entries_obj.num_hook_entries) + if num_hooks == 0 or num_hooks > 64: + continue + # hooks[] starts at offset 8 within nf_hook_entries + hooks_base = entries_addr + 8 + for j in range(num_hooks): + hook_entry_addr = hooks_base + j * _NF_HOOK_ENTRY_SIZE + try: + hook_entry = vmlinux.object( + type_name=nf_hook_entry_sym, + offset=hook_entry_addr, + native_layer_name=vmlinux.layer_name, + ) + priv_addr = int(hook_entry.priv) + except Exception: + priv_addr = _read_u64(layer, hook_entry_addr + _NF_HOOK_ENTRY_PRIV_OFF) + + if priv_addr in tables: + continue + + parsed = _try_parse_xt_table(layer, priv_addr, layout) + if parsed: + tables[priv_addr] = parsed + except exceptions.InvalidAddressException: + continue + except Exception as exc: + vollog.debug("hook walk error [%s hook %d]: %s", af_label, hook_idx, exc) + + return tables + + # ------------------------------------------------------------------ + # Generator + # ------------------------------------------------------------------ + + def _generator(self) -> Iterator[Tuple]: + scan_only = self.config.get("scan_only", False) + kernel_key = self.config.get("kernel") + has_kernel = ( + not scan_only + and kernel_key is not None + and kernel_key in self.context.modules + ) + + if not has_kernel: + # --------------------------------------------------------------- + # ISF-free path: physical memory scan only (no symbol table). + # Triggered by --scan-only or when no matching ISF was found. + # --------------------------------------------------------------- + if scan_only: + vollog.info( + "--scan-only: skipping ISF and hook walk; " + "going straight to physical memory scan." + ) + else: + vollog.warning( + "No Linux ISF (symbol table) could be matched for this image. " + "Falling back to ISF-free physical memory scan. " + "For full results, generate an ISF with dwarf2json, or run " + "'vol -f IMAGE linux.banners' to identify the exact kernel " + "version and then provide the matching ISF via -s/--symbols." + ) + layer_name = self.config["primary"] + banner, major, minor = _detect_banner_from_layer(self.context, layer_name) + if major == 0: + vollog.warning( + "Could not detect kernel version from banner scan. " + "Defaulting to >= 4.15 struct layout. " + "Packet/byte counters will be reported as inline (non-SMP)." + ) + layout = XtTableLayout( + name_off=56, valid_hooks_off=16, private_off=24, + me_off=32, af_off=40, read_size=92, + ) + info_layout = _XT_TABLE_INFO_FALLBACK + else: + vollog.info( + "Detected kernel %d.%d from banner scan: %s", + major, minor, banner.split(" #")[0], + ) + layout = _get_xt_table_layout(major, minor) + info_layout = _get_xt_table_info_layout(major, minor) + + layer_obj = self.context.layers[layer_name] + phys_sub = layer_obj.config.get("memory_layer") if hasattr(layer_obj, "config") else None + + # Build a read-layer for blob access via kernel virtual addresses. + # If we have an Intel64 virtual layer, use it directly (full VA translation). + # If we only have a physical layer (LimeLayer), use _PhysLayerProxy which + # converts direct-map VAs (va - page_offset_base); vmalloc'd blobs will + # be skipped gracefully when the translation goes out of range. + _POB = 0xFFFF_8880_0000_0000 # canonical x86-64 direct-map base + if phys_sub: + read_layer = layer_obj # Intel64 — has full VA translation + else: + read_layer = _PhysLayerProxy(layer_obj, _POB) + vollog.info( + "No Intel64 virtual layer available; using direct-map VA translation " + "(page_offset_base=0x%x). Tables backed by vmalloc may not be readable.", + _POB, + ) + + # Without ISF we cannot read __per_cpu_offset; assume inline counters. + percpu_offsets = None + + found_tables = _scan_for_xt_tables( + self.context, + layer_name, + layout=layout, + page_offset_base=_POB, + progress_callback=self._progress_callback, + ) + if not found_tables: + vollog.warning( + "Memory scan found no xt_table structs. " + "This is expected on systems using iptables-nft (CentOS 9, " + "modern Arch/Fedora/etc.) where iptables rules are stored in " + "nftables kernel structures rather than xt_table structs. " + "Verify on the source system with: lsmod | grep ip_tables" + ) + return + + vollog.info("Memory scan found %d xt_table struct(s).", len(found_tables)) + + for private_ptr, (tbl_name, _pp, af_byte, valid_hooks) in found_tables.items(): + af = "IPv6" if af_byte == NFPROTO_IPV6 else "IPv4" + netns_id = "-" + + info = _read_xt_table_info(read_layer, private_ptr, info_layout) + if info is None: + vollog.debug( + "Cannot read xt_table_info for '%s' at 0x%x " + "(may be vmalloc'd — requires ISF for page-table walk)", + tbl_name, private_ptr, + ) + continue + blob_size, hook_entry, underflow = info + + blob = _read_entries_blob(read_layer, private_ptr, blob_size, info_layout) + if blob is None: + vollog.debug("Cannot read entries blob for '%s'", tbl_name) + continue + + cmap = _build_chain_map(valid_hooks, hook_entry) + underflow_set = _build_underflow_set(valid_hooks, underflow) + + if af == "IPv4": + yield from _walk_entries( + blob, cmap, underflow_set, tbl_name, netns_id, af, + _IPT_ENTRY_SIZE, _IPT_ENTRY_TARGET_OFF_OFF, + _parse_ipv4_entry, read_layer, percpu_offsets, + kver=(major, minor), + ) + else: + yield from _walk_entries( + blob, cmap, underflow_set, tbl_name, netns_id, af, + _IP6T_ENTRY_SIZE, _IP6T_ENTRY_TARGET_OFF_OFF, + _parse_ipv6_entry, read_layer, percpu_offsets, + kver=(major, minor), + ) + return + + # --------------------------------------------------------------- + # ISF-backed path: hook walk + scan fallback (original behavior). + # --------------------------------------------------------------- + vmlinux = self.context.modules[kernel_key] + network.NetSymbols.apply( + self.context.symbol_space[vmlinux.symbol_table_name] + ) + self._check_isf(vmlinux) + + # Detect kernel version, fetch matching struct layout from source. + banner, major, minor = _get_kernel_version(vmlinux) + if major == 0: + vollog.warning( + "Could not determine kernel version from linux_banner. " + "Defaulting to >= 4.15 struct layout (name_off=56). " + "Run the banners plugin to identify the exact kernel version." + ) + layout = XtTableLayout(name_off=56, valid_hooks_off=16, private_off=24, me_off=32, af_off=40, read_size=92) + elif major < 4: + vollog.warning( + "Kernel %d.%d detected (%s). " + "xt_table struct layout changed in ~4.0; " + "results for kernels < 4.0 will be unreliable.", + major, minor, banner.split(" #")[0], + ) + layout = _get_xt_table_layout(major, minor) + else: + vollog.info( + "Kernel %d.%d detected. Banner: %s", + major, minor, banner.split(" #")[0], + ) + layout = _get_xt_table_layout(major, minor) + + vollog.info( + "xt_table layout: name_off=%d valid_hooks_off=%d private_off=%d af_off=%d", + layout.name_off, layout.valid_hooks_off, layout.private_off, layout.af_off, + ) + + if major == 0: + info_layout = _XT_TABLE_INFO_FALLBACK + else: + info_layout = _get_xt_table_info_layout(major, minor) + + layer = self.context.layers[vmlinux.layer_name] + percpu_offsets = _get_percpu_offsets(vmlinux) + + # --------------------------------------------------------------- + # Strategy 1: navigate via nf_hook_entries.hooks[i].priv + # Works when ip_tables.ko registers hooks directly (iptables-legacy). + # Does NOT work with iptables-nft / nft_compat because hooks are + # owned by nftables in that case. + # --------------------------------------------------------------- + hook_found_any = False + for netns_id, net in self._iter_namespaces(vmlinux): + tables = self._collect_xt_tables(vmlinux, net, layout) + if not tables: + vollog.debug("netns %s: no xt_tables found via hook entries", netns_id) + continue + + hook_found_any = True + for tbl_addr, (tbl_name, private_ptr, af_byte, valid_hooks) in tables.items(): + af = "IPv6" if af_byte == NFPROTO_IPV6 else "IPv4" + + info = _read_xt_table_info(layer, private_ptr, info_layout) + if info is None: + continue + blob_size, hook_entry, underflow = info + + blob = _read_entries_blob(layer, private_ptr, blob_size, info_layout) + if blob is None: + continue + + cmap = _build_chain_map(valid_hooks, hook_entry) + underflow_set = _build_underflow_set(valid_hooks, underflow) + + if af == "IPv4": + yield from _walk_entries( + blob, cmap, underflow_set, tbl_name, netns_id, af, + _IPT_ENTRY_SIZE, _IPT_ENTRY_TARGET_OFF_OFF, + _parse_ipv4_entry, layer, percpu_offsets, + kver=(major, minor), + ) + else: + yield from _walk_entries( + blob, cmap, underflow_set, tbl_name, netns_id, af, + _IP6T_ENTRY_SIZE, _IP6T_ENTRY_TARGET_OFF_OFF, + _parse_ipv6_entry, layer, percpu_offsets, + kver=(major, minor), + ) + + if hook_found_any: + return + + # --------------------------------------------------------------- + # Strategy 2: memory scan for xt_table name strings + # Handles iptables-nft (nft_compat) where hooks belong to nftables + # and priv never points to xt_table. The xt_table structs are still + # allocated in kernel heap — we just have to find them by scanning. + # --------------------------------------------------------------- + _TABLE_MODULES = frozenset({ + "iptable_filter", "iptable_nat", "iptable_mangle", + "iptable_raw", "iptable_security", + "ip6table_filter", "ip6table_nat", "ip6table_mangle", + }) + loaded_mods = _loaded_iptables_modules(vmlinux) + table_mods = [m for m in loaded_mods if m in _TABLE_MODULES] + if table_mods: + vollog.warning( + "Hook-based lookup found no xt_tables, but table modules ARE " + "loaded: %s — falling back to memory scan. " + "This can happen with nft_compat where hooks are owned by " + "nftables rather than ip_tables.", + ", ".join(table_mods), + ) + elif loaded_mods: + vollog.warning( + "Hook-based lookup found no xt_tables. Framework module(s) " + "loaded (%s) but NO per-table modules (iptable_filter, " + "iptable_nat, etc.). xt_table structs are only created when " + "table modules load. This system likely uses iptables-nft: " + "rules live in nftables structures.", + ", ".join(loaded_mods), + ) + else: + vollog.warning( + "Hook-based lookup found no xt_tables AND no ip(6)tables " + "modules are loaded (ip_tables.ko / iptable_filter.ko not in " + "module list). This system likely uses iptables-nft: rules " + "are stored in nftables structures, not xt_table structs. " + "Falling back to memory scan (expect 0 results)." + ) + found_tables = _scan_for_xt_tables( + self.context, + vmlinux.layer_name, + layout=layout, + page_offset_base=_get_page_offset_base(vmlinux), + progress_callback=self._progress_callback, + ) + if not found_tables: + vollog.warning("Memory scan found no xt_table structs either.") + return + + vollog.info("Memory scan found %d xt_table struct(s).", len(found_tables)) + + # Emit rows ----------------------------------------------------------- + # found_tables is keyed by private_ptr (kernel VA of xt_table_info). + # Namespace detection is not attempted in the scan path: the scan + # gives us no reliable virtual address for the xt_table struct itself, + # so we cannot walk the list_head chain. NetNS is reported as N/A. + for private_ptr, (tbl_name, _pp, af_byte, valid_hooks) in found_tables.items(): + af = "IPv6" if af_byte == NFPROTO_IPV6 else "IPv4" + netns_id = "-" + + info = _read_xt_table_info(layer, private_ptr, info_layout) + if info is None: + vollog.debug("Cannot read xt_table_info for '%s' at 0x%x", tbl_name, private_ptr) + continue + blob_size, hook_entry, underflow = info + vollog.debug( + " '%s' private=0x%x blob_size=%d hook_entry=%s", + tbl_name, private_ptr, blob_size, hook_entry, + ) + + blob = _read_entries_blob(layer, private_ptr, blob_size, info_layout) + if blob is None: + vollog.debug("Cannot read entries blob for '%s'", tbl_name) + continue + + cmap = _build_chain_map(valid_hooks, hook_entry) + underflow_set = _build_underflow_set(valid_hooks, underflow) + + if af == "IPv4": + yield from _walk_entries( + blob, cmap, underflow_set, tbl_name, netns_id, af, + _IPT_ENTRY_SIZE, _IPT_ENTRY_TARGET_OFF_OFF, + _parse_ipv4_entry, layer, percpu_offsets, + kver=(major, minor), + ) + else: + yield from _walk_entries( + blob, cmap, underflow_set, tbl_name, netns_id, af, + _IP6T_ENTRY_SIZE, _IP6T_ENTRY_TARGET_OFF_OFF, + _parse_ipv6_entry, layer, percpu_offsets, + kver=(major, minor), + ) + + # ------------------------------------------------------------------ + # Plugin entry point + # ------------------------------------------------------------------ + + def run(self): + columns = [ + ("NetNS", str), + ("AF", str), + ("Table", str), + ("Chain", str), + ("Num", int), + ("Source", str), + ("Dest", str), + ("Proto", str), + ("InIface", str), + ("OutIface", str), + ("Matches", str), + ("Target", str), + ("Pkts", int), + ("Bytes", int), + ] + return renderers.TreeGrid(columns, self._generator()) diff --git a/volatility3/framework/plugins/linux/iptables_nft.py b/volatility3/framework/plugins/linux/iptables_nft.py new file mode 100644 index 0000000000..b43e60beef --- /dev/null +++ b/volatility3/framework/plugins/linux/iptables_nft.py @@ -0,0 +1,2122 @@ +# This file is Copyright 2025 Volatility Foundation and licensed under the +# Volatility Software License 1.0 which is available at +# https://www.volatilityfoundation.org/license/vsl-v1.0 +"""Linux iptables-nft rule extraction plugin. + +When ``iptables-nft`` is the active iptables backend (Debian 11+, Ubuntu 22.04+, +CentOS 9+, Arch Linux, Fedora 33+), iptables rules are stored in the nftables +kernel subsystem as ``nft_compat`` match/target expressions rather than in the +legacy ``xt_table`` blob format read by ``linux.iptables``. + +This plugin walks the in-memory nftables structures and focuses on iptables-nft +tables (ip / ip6 family, names: filter / nat / mangle / raw / security), decoding +``nft_compat`` match/target data with the same decoders as ``linux.iptables``. + +TWO WALK PATHS +============== +ISF path (CONFIG_NF_TABLES=y — built-in) + nft_table, nft_chain, nft_rule, nft_expr_ops, nft_expr_type are all present + in the kernel ISF. The plugin uses standard volatility3 object accessors. + +Raw-walk path (CONFIG_NF_TABLES=m — most distros) + nf_tables types are absent from the ISF (only in the module's debug info). + The plugin walks the kernel linked lists via raw memory reads, using + hardcoded struct offsets verified against Linux 5.10 x86-64: + + netns_nft tables list_head +0 + nft_table chains list_head +152 (after list@0 + rhltable@136) + family u16 +236 + name char* +248 + nft_chain rules list_head +16 (after blob_gen_0,1 @ +0,+8) + list list_head +32 (link node in nft_table.chains) + flags u8 +84 (bit 0 = NFT_BASE_CHAIN) + name char* +88 + nft_base_chain policy u8 +49 (NF_DROP=0, NF_ACCEPT=1) + chain nft_chain +56 + nft_rule list list_head +0 + u64 packed +16 (handle:42,genmask:2,dlen:12,ulen:8) + data[] +24 expression blob + nft_expr_ops size u32 +8 (immediately after eval fn-ptr) + + Verified struct sizes from ISF: rhltable=136, nf_hook_ops=40, + mutex=32, work_struct=32, rhlist_head=16. + +HOW iptables-nft STORES RULES +============================== +Each iptables rule becomes an nft_rule with: + -m extension → nft_compat "match" expression (priv.info → xt_entry_match) + -j EXTENSION → nft_compat "target" expression (priv.info → xt_entry_target) + ACCEPT/DROP… → "immediate" verdict expression (priv: nft_data verdict) + -i / -o / -p → native meta/payload/cmp expressions + +xt_entry_match / xt_entry_target header (32 bytes): + +0: u16 size (32 + data length) + +8: xt_match * / xt_target * (kernel pointer; use to recover full name) + +32: match / target data bytes + +LIMITATION +========== +The raw-walk path uses offsets computed for Linux 5.x x86-64 with a typical +distro configuration. Kernels with significantly different struct layouts +(very old kernels, 32-bit, custom configs) may produce incorrect results. +""" + +import logging +import re +import struct +import urllib.error +import urllib.request +from typing import Dict, Iterator, List, NamedTuple, Optional, Tuple + +from volatility3.framework import constants, exceptions, interfaces, renderers +from volatility3.framework.configuration import requirements +from volatility3.framework.interfaces import plugins +from volatility3.framework.symbols.linux import network + +# Reuse match/target decoders and xt_entry helpers from linux.iptables. +from volatility3.plugins.linux import iptables as _ipt + +vollog = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# Protocol family constants +# --------------------------------------------------------------------------- + +_IPT_FAMILIES: Dict[int, str] = {2: "ip", 10: "ip6"} +_IPT_TABLES = frozenset({"filter", "nat", "mangle", "raw", "security"}) + +# --------------------------------------------------------------------------- +# NFT verdict codes → iptables target names +# --------------------------------------------------------------------------- + +_NF_DROP = 0 +_NF_ACCEPT = 1 +_NFT_VERDICTS: Dict[int, str] = { + 0: "DROP", + 1: "ACCEPT", + -1: "CONTINUE", + -2: "BREAK", + -3: "JUMP", + -4: "GOTO", + -5: "RETURN", +} + +# --------------------------------------------------------------------------- +# Compact decoders for native nft expressions (meta / payload / cmp) +# --------------------------------------------------------------------------- + +_META_KEYS: Dict[int, str] = { + 0: "len", 1: "protocol", 2: "priority", 3: "mark", + 4: "iif", 5: "oif", 6: "iifname", 7: "oifname", + 8: "iiftype", 9: "oiftype", 10: "skuid", 11: "skgid", + 12: "nftrace", 13: "rtclassid",14: "secmark", 15: "nfproto", + 16: "l4proto", 17: "bri_iifname",18: "bri_oifname", + 19: "pkttype", 20: "cpu", 21: "iifgroup", 22: "oifgroup", + 23: "cgroup", 24: "prandom", 25: "symhash", 26: "ifindex", + 36: "sdif", 37: "sdifname", +} +_L4PROTO_NAMES: Dict[int, str] = { + 1: "icmp", 6: "tcp", 17: "udp", 33: "dccp", + 41: "ipv6", 47: "gre", 50: "esp", 51: "ah", + 58: "ipv6-icmp", 89: "ospf", 132: "sctp", +} +_PAYLOAD_BASES: Dict[int, str] = {0: "ll", 1: "nh", 2: "th", 3: "inner"} +_CMP_OPS: Dict[int, str] = { + 0: "eq", 1: "neq", 2: "lt", 3: "lte", 4: "gt", 5: "gte", +} + +# --------------------------------------------------------------------------- +# Hardcoded struct offsets (Linux 5.x x86-64 with standard distro config) +# Verified using ISF sizes: rhltable=136, nf_hook_ops=40, mutex=32, +# work_struct=32, rhlist_head=16. +# --------------------------------------------------------------------------- + +# nft_table.list is always the first field (never moves between kernel versions) +_NFT_TABLE_LIST_OFF = 0 +_NFT_CHAIN_BASE_FLAG = 0x01 # NFT_BASE_CHAIN + + +class NftLayout(NamedTuple): + """Per-version struct field offsets for the nftables raw-walk path.""" + # netns_nft / nftables_pernet (embedded in struct net at net.nft) + tables_off: int # offset of tables list_head within netns_nft + # nft_table + tbl_chains_off: int # offset of chains list_head within nft_table + tbl_family_off: int # offset of u16 family bitfield within nft_table + tbl_name_off: int # offset of char* name within nft_table + # nft_chain + chain_rules_off: int # offset of rules list_head within nft_chain + chain_list_off: int # offset of list link (in table.chains) within nft_chain + chain_flags_off: int # offset of u8 flags bitfield within nft_chain + chain_name_off: int # offset of char* name within nft_chain + chain_table_off: int # offset of table* back-pointer within nft_chain + # nft_base_chain + bc_policy_off: int # offset of u8 policy within nft_base_chain + bc_chain_off: int # offset of embedded nft_chain within nft_base_chain + + +# nft_rule (list is at +0, so node ptr == struct base) +# +0: struct list_head list +# +16: u64 packed { handle:42, genmask:2, dlen:12, ulen:8 } +# +24: unsigned char data[] expression blob +_NFT_RULE_LIST_OFF = 0 +_NFT_RULE_PACKED_OFF = 16 +_NFT_RULE_DATA_OFF = 24 + +# nft_expr_ops (Linux 5.1+) +# +0: void (*eval)(...) 8 bytes +# +8: int (*clone)(...) 8 bytes ← added ~5.1; NULL for most expressions +# +16: unsigned int size ← total expression size including ops pointer +_NFT_EXPR_OPS_SIZE_OFF = 16 + +# Maximum number of entries to walk per list before giving up (corruption guard) +_MAX_LIST_WALK = 2048 + + +# Known native nft expression type names (from nft_expr_type.name strings). +_NFT_KNOWN_EXPR_NAMES = frozenset(( + "meta", "payload", "cmp", "bitwise", "immediate", + "match", "target", "counter", "nat", "lookup", + "range", "dynset", "log", "limit", "reject", + "conntrack", "quota", "tproxy", "fwd", "dup", + "ct", "notrack", "hash", "socket", "osf", +)) + +# Cache: ops_ptr → type name (or None). Populated by _read_native_expr_type_name. +_ops_type_name_cache: Dict[int, Optional[str]] = {} + +# --------------------------------------------------------------------------- +# Per-kernel-version NftLayout fallback table +# Resolution: cache → GitHub fetch → this table → default (last entry) +# +# Key offsets that change between kernel versions: +# tables_off — 0 for all 5.x+ (tables is first field in netns_nft) +# tbl_name_off — 240 before 5.11 (no nlpid field), 248 from 5.11+ +# bc_policy_off / bc_chain_off — 64 / 80 for all 5.x+ (hook_list added ~5.8) +# --------------------------------------------------------------------------- + +_NFT_LAYOUT_FALLBACKS: List[Tuple] = [ + # ((major, minor_min, minor_max), NftLayout(...)) + # 5.x (Linux 5.0–5.10) — vanilla kernel source layout. + # netns_nftables: tables list_head is the FIRST field at offset 0. + # nft_table (no nlpid field in 5.x): + # list(16) + chains_ht(136) → chains@+152 + # chains/sets/objects/flowtables(4×16) + hgen(8) + handle(8) + use(4) = +220 + # family u16@+236 (bitfield), name char*@+240 (after 2-byte pad, no nlpid). + # nft_base_chain: hook_list added ~5.8; policy@+64, chain embed@+80. + ((5, 0, 99), NftLayout( + tables_off=0, + tbl_chains_off=152, tbl_family_off=236, tbl_name_off=240, + chain_rules_off=16, chain_list_off=32, chain_flags_off=84, chain_name_off=88, chain_table_off=64, + bc_policy_off=64, bc_chain_off=80, + )), + # 6.0–6.4 + ((6, 0, 4), NftLayout( + tables_off=0, + tbl_chains_off=152, tbl_family_off=236, tbl_name_off=248, + chain_rules_off=16, chain_list_off=32, chain_flags_off=84, chain_name_off=88, chain_table_off=64, + bc_policy_off=64, bc_chain_off=80, + )), + # 6.1.x — Debian 12 LTS + ((6, 1, 99), NftLayout( + tables_off=0, + tbl_chains_off=152, tbl_family_off=236, tbl_name_off=248, + chain_rules_off=16, chain_list_off=32, chain_flags_off=84, chain_name_off=88, chain_table_off=64, + bc_policy_off=64, bc_chain_off=80, + )), + # 6.5+ — Arch Linux, Fedora 38+, Ubuntu 23.10+ (default / last-resort) + ((6, 5, 99), NftLayout( + tables_off=0, + tbl_chains_off=152, tbl_family_off=236, tbl_name_off=248, + chain_rules_off=16, chain_list_off=32, chain_flags_off=84, chain_name_off=88, chain_table_off=64, + bc_policy_off=64, bc_chain_off=80, + )), +] + +_nft_layout_cache: Dict[Tuple[int, int], NftLayout] = {} + +_NFT_GITHUB_HEADER = "include/net/netfilter/nf_tables.h" +_NFT_NETNS_HEADER = "include/net/netns/nftables.h" + +# Compound type sizes needed to parse nf_tables.h via _ipt._parse_struct_offsets. +# These are temporarily injected into _ipt._C_TYPE_SIZES during GitHub fetch. +_NFT_COMPOUND_SIZES: Dict[str, Tuple[int, int]] = { + "struct rhltable": (136, 8), + "struct rhlist_head": ( 16, 8), + "struct nf_hook_ops": ( 40, 8), + "struct nft_stats": ( 8, 8), # __percpu ptr → treat as pointer + "struct flow_block": ( 24, 8), # list_head(16)+spinlock(4)+pad(4) + "struct mutex": ( 32, 8), + "struct nft_rule_blob": ( 8, 8), # opaque, treat as pointer +} + + +def _parse_tables_off_from_header(url: str, tag: str) -> Optional[int]: + """Fetch a kernel header and return the offset of the 'tables' field. + + Used for both include/net/netns/nftables.h (v5.x netns_nftables struct) + and include/net/netfilter/nf_tables.h (v5.15+ nftables_pernet struct). + Returns None on fetch failure or if 'tables' field not found. + """ + try: + req = urllib.request.Request( + url, headers={"User-Agent": "volatility3-iptables-nft-plugin"} + ) + with urllib.request.urlopen(req, timeout=15) as resp: + source = resp.read().decode("utf-8", errors="replace") + except Exception as exc: + vollog.debug("Cannot fetch %s (%s): %s", url, tag, exc) + return None + + for struct_name in ("netns_nftables", "nftables_pernet"): + m = re.search( + r"struct\s+" + re.escape(struct_name) + r"\s*\{([^}]+)\}", + source, re.DOTALL, + ) + if m: + try: + offsets = _ipt._parse_struct_offsets(m.group(1)) + if "tables" in offsets: + return offsets["tables"] + except Exception as exc: + vollog.debug( + "Parsing %s from %s failed: %s", struct_name, url, exc + ) + return None + + +def _parse_nft_structs_from_source( + source: str, tag: str, tables_off: int +) -> Optional[NftLayout]: + """Parse nft_table / nft_chain / nft_base_chain from nf_tables.h source text.""" + offsets: Dict[str, Dict[str, int]] = {} + for struct_name in ("nft_table", "nft_chain", "nft_base_chain"): + m = re.search( + r"struct\s+" + re.escape(struct_name) + r"\s*\{([^}]+)\}", + source, re.DOTALL, + ) + if not m: + vollog.warning( + "struct %s not found in nf_tables.h for %s", struct_name, tag + ) + return None + try: + offsets[struct_name] = _ipt._parse_struct_offsets(m.group(1)) + except Exception as exc: + vollog.warning( + "Parsing struct %s from %s failed: %s", struct_name, tag, exc + ) + return None + + tbl = offsets["nft_table"] + chn = offsets["nft_chain"] + bc = offsets["nft_base_chain"] + + required = [ + ("nft_table", "chains", tbl), ("nft_table", "family", tbl), + ("nft_table", "name", tbl), + ("nft_chain", "rules", chn), ("nft_chain", "list", chn), + ("nft_chain", "flags", chn), ("nft_chain", "name", chn), + ("nft_chain", "table", chn), + ("nft_base_chain", "policy", bc), ("nft_base_chain", "chain", bc), + ] + for struct_name, field, d in required: + if field not in d: + vollog.warning( + "Field '%s' not found in struct %s for %s", field, struct_name, tag + ) + return None + + layout = NftLayout( + tables_off=tables_off, + tbl_chains_off=tbl["chains"], tbl_family_off=tbl["family"], + tbl_name_off=tbl["name"], + chain_rules_off=chn["rules"], chain_list_off=chn["list"], + chain_flags_off=chn["flags"], chain_name_off=chn["name"], + chain_table_off=chn["table"], + bc_policy_off=bc["policy"], bc_chain_off=bc["chain"], + ) + vollog.warning( + "NftLayout from GitHub (%s): tables_off=%d tbl_chains=%d tbl_family=%d " + "tbl_name=%d chain_rules=%d chain_list=%d chain_flags=%d chain_name=%d " + "chain_table=%d bc_policy=%d bc_chain=%d", + tag, layout.tables_off, layout.tbl_chains_off, layout.tbl_family_off, + layout.tbl_name_off, layout.chain_rules_off, layout.chain_list_off, + layout.chain_flags_off, layout.chain_name_off, layout.chain_table_off, + layout.bc_policy_off, layout.bc_chain_off, + ) + return layout + + +def _fetch_nft_layout_from_source(major: int, minor: int) -> Optional[NftLayout]: + """Fetch nf_tables.h from GitHub and compute NftLayout for the given kernel version. + + Temporarily injects compound type sizes into _ipt._C_TYPE_SIZES so that + _parse_struct_offsets can handle nft_table / nft_chain / nft_base_chain bodies. + Returns None on any fetch or parse failure; caller falls back to table. + """ + tag = f"v{major}.{minor}" + base = _ipt._GITHUB_RAW + f"/{tag}" + + tables_off = _parse_tables_off_from_header( + f"{base}/{_NFT_NETNS_HEADER}", tag + ) + if tables_off is None: + tables_off = _parse_tables_off_from_header( + f"{base}/{_NFT_GITHUB_HEADER}", tag + ) + if tables_off is None: + vollog.warning( + "Cannot determine tables_off from kernel source for %s", tag + ) + return None + + url = f"{base}/{_NFT_GITHUB_HEADER}" + try: + req = urllib.request.Request( + url, headers={"User-Agent": "volatility3-iptables-nft-plugin"} + ) + with urllib.request.urlopen(req, timeout=15) as resp: + source = resp.read().decode("utf-8", errors="replace") + except Exception as exc: + vollog.warning("Cannot fetch %s (%s): %s", url, tag, exc) + return None + + # Temporarily inject compound sizes so _parse_struct_offsets handles nft types + saved: Dict[str, Optional[Tuple[int, int]]] = {} + for k, v in _NFT_COMPOUND_SIZES.items(): + saved[k] = _ipt._C_TYPE_SIZES.get(k) + _ipt._C_TYPE_SIZES[k] = v + + try: + result = _parse_nft_structs_from_source(source, tag, tables_off) + finally: + for k, orig in saved.items(): + if orig is None: + _ipt._C_TYPE_SIZES.pop(k, None) + else: + _ipt._C_TYPE_SIZES[k] = orig + + return result + + +def _get_nft_layout(major: int, minor: int) -> NftLayout: + """Return NftLayout for the given kernel version. + + Resolution order: + 1. In-process cache. + 2. Dynamic fetch from GitHub (nf_tables.h + netns/nftables.h). + 3. Hard-coded fallback table (_NFT_LAYOUT_FALLBACKS). + 4. Default to the last fallback entry if nothing matches. + """ + key = (major, minor) + if key in _nft_layout_cache: + return _nft_layout_cache[key] + + layout: Optional[NftLayout] = _fetch_nft_layout_from_source(major, minor) + + if layout is None: + for (maj, mn_min, mn_max), candidate in _NFT_LAYOUT_FALLBACKS: + if major == maj and mn_min <= minor <= mn_max: + layout = candidate + vollog.warning( + "NftLayout for kernel %d.%d (fallback table): " + "tables_off=%d tbl_name_off=%d bc_policy_off=%d bc_chain_off=%d", + major, minor, + layout.tables_off, layout.tbl_name_off, + layout.bc_policy_off, layout.bc_chain_off, + ) + break + + if layout is None: + layout = _NFT_LAYOUT_FALLBACKS[-1][1] + vollog.warning( + "No known NftLayout for kernel %d.%d — defaulting to 6.5 layout.", + major, minor, + ) + + _nft_layout_cache[key] = layout + return layout + + +# --------------------------------------------------------------------------- +# Low-level helpers +# --------------------------------------------------------------------------- + +def _canon(addr: int) -> int: + """Canonicalize an x86-64 virtual address by sign-extending bit 47. + + Volatility stores object offsets as 48-bit ints (bits 63:48 stripped), + but the kernel writes full 64-bit canonical addresses in memory. This + function normalises both forms so they compare equal. + """ + if addr & (1 << 47): + return addr | (0xFFFF << 48) + return addr & 0x0000_FFFF_FFFF_FFFF + + +def _is_kernel_ptr(addr: int) -> bool: + """Heuristic: kernel virtual addresses have bit 47 set (canonical form ≥ 0xffff800…).""" + return _canon(addr) > 0xFFFF_0000_0000_0000 + + +def _read_cstr(layer, addr: int, maxlen: int = 256) -> str: + try: + if not addr or not _is_kernel_ptr(addr): + return "" + raw = layer.read(addr, maxlen) + end = raw.find(b"\x00") + return raw[: end if end >= 0 else maxlen].decode("ascii", errors="replace") + except Exception: + return "" + + +def _read_u32(layer, addr: int) -> int: + return struct.unpack_from(" int: + return struct.unpack_from(" Iterator[int]: + """Walk a kernel circular doubly-linked list. + + ``head_addr`` address of the sentinel ``list_head`` node. + ``entry_list_off`` byte offset of the ``list_head`` member within + each entry struct. + + Yields the base address of each entry (= node_ptr - entry_list_off). + Stops at the sentinel, on non-kernel pointers, or after _MAX_LIST_WALK + entries (corruption guard). + """ + # Canonicalize head_addr so the termination comparison works regardless of + # whether Volatility stored a 48-bit or full 64-bit address. + head_canon = _canon(head_addr) + seen: set = set() + count = 0 + try: + cur = _canon(_read_u64(layer, head_addr)) # head.next + except Exception: + return + while _is_kernel_ptr(cur) and cur != head_canon and count < _MAX_LIST_WALK: + if cur in seen: + break + seen.add(cur) + yield cur - entry_list_off + count += 1 + try: + cur = _canon(_read_u64(layer, cur)) # list_node.next + except Exception: + break + + +# --------------------------------------------------------------------------- +# nft_compat match / target extraction +# --------------------------------------------------------------------------- + +_XT_NAME_OFFSET = 16 # offsetof(xt_match/xt_target, name) — after list_head (16B) +_XT_REV_OFFSET = 46 # offsetof(xt_match/xt_target, revision) — after list_head+name[30] + + +def _xt_compat_name_data( + layer, ops_ptr: int, priv_addr: int, expr_size: int +) -> Optional[Tuple[str, int, bytes]]: + """Extract ``(name, revision, data)`` from a heap-allocated nft_compat expression. + + Modern nft_compat (Linux 5.x+) stores match/target private data **inline** + in the expression's priv area. The ``xt_match``/``xt_target`` pointer is + somewhere within the dynamically-allocated ``nft_compat_match`` struct that + contains the embedded ``nft_expr_ops``. + + We locate it generically: scan 8-byte words in the ops struct (up to 256 B) + for kernel TEXT pointers (high 32 bits == 0xffffffff); for each, try reading + 30 bytes at ``+16`` (``xt_match.name`` after ``list_head``). The first + entry that yields a valid ASCII alphanumeric name of 2–29 chars is taken as + the ``xt_match``/``xt_target`` pointer. + + This is layout-independent and works across kernel versions because: + * ``XT_FUNCTION_MAXNAMELEN = 30`` has not changed since netfilter inception. + * The name field is always the first named field after the 16-byte + ``struct list_head``. + * Function pointers in the same struct contain machine code that rarely + produces valid short alphanumeric ASCII at offset +16. + """ + priv_size = max(0, expr_size - 8) + try: + wide = layer.read(ops_ptr, 256) + except Exception: + return None + + seen: set = set() + for ci in range(0, len(wide) - 7, 8): + val = struct.unpack_from("> 32) != 0xffffffff: + continue # skip NULL / heap pointers — xt_match is in module memory + if val in seen: + continue + seen.add(val) + try: + name_raw = layer.read(val + _XT_NAME_OFFSET, 30) + except Exception: + continue + name_bytes = name_raw.split(b"\x00")[0] + if len(name_bytes) < 2: + continue + try: + name = name_bytes.decode("ascii") + except Exception: + continue # non-ASCII bytes → this is machine code, not a name + if not name.replace("-", "").replace("_", "").isalnum(): + continue # contains unusual chars → machine code + # Found a valid xt_match/xt_target name. + try: + rev = layer.read(val + _XT_REV_OFFSET, 1)[0] + except Exception: + rev = 0 + try: + data = layer.read(priv_addr, priv_size) if priv_size > 0 else b"" + except Exception: + data = b"" + return name, rev, data + + return None + + +def _xt_ext_full(layer, priv_addr: int) -> Optional[Tuple[str, int, bytes]]: + """Extract ``(name, revision, data)`` from an nft_compat priv area. + + Older nft_compat layout (pre-5.x) where ``nft_compat_match_priv`` / + ``nft_compat_target_priv`` begin with a ``void *info`` pointer to a + full ``xt_entry_match`` / ``xt_entry_target`` (header + data). + + Superseded by ``_xt_compat_name_data`` for modern kernels, kept as + fallback. + """ + try: + info_ptr = _read_u64(layer, priv_addr) + if not _is_kernel_ptr(info_ptr): + return None + header = layer.read(info_ptr, _ipt.MATCH_HEADER_SIZE) + size = struct.unpack_from(" 4096: + return None + name = _ipt._read_xt_name(header, layer) + rev = header[2 + _ipt.XT_FUNCTION_MAXNAMELEN - 1] + data_size = size - _ipt.MATCH_HEADER_SIZE + data = (layer.read(info_ptr + _ipt.MATCH_HEADER_SIZE, data_size) + if data_size > 0 else b"") + return name, rev, data + except Exception: + return None + + +# --------------------------------------------------------------------------- +# Native nft expression type detection (raw walk path) +# --------------------------------------------------------------------------- + +def _scan_ops_for_type_name(layer, ops_ptr: int) -> Optional[str]: + """Scan the nft_expr_ops struct to find the expression type name. + + Native nft expression ops are static const structs in kernel text/rodata + (high 32 bits == 0xffffffff). The ops struct contains a pointer to + ``nft_expr_type``, which in turn has a ``const char *name`` field. + + We scan the ops struct (up to 200 bytes) for kernel-text pointers, treat + each as a candidate ``nft_expr_type *``, and within the first 96 bytes of + that struct look for another kernel-text pointer that resolves to a known + nft expression type name string. + """ + try: + wide = layer.read(ops_ptr, 200) + except Exception: + return None + + for ci in range(0, min(len(wide) - 7, 192), 8): + type_ptr = struct.unpack_from("> 32) != 0xffffffff: + continue + try: + type_raw = layer.read(type_ptr, 96) + except Exception: + continue + for name_off in range(0, 96, 8): + name_ptr = struct.unpack_from("> 32) != 0xffffffff: + continue + try: + name_bytes = layer.read(name_ptr, 20) + end = name_bytes.find(b"\x00") + if end < 2 or end > 16: + continue + try: + name = name_bytes[:end].decode("ascii") + except Exception: + continue + if name in _NFT_KNOWN_EXPR_NAMES: + return name + except Exception: + continue + return None + + +def _read_native_expr_type_name(layer, ops_ptr: int) -> Optional[str]: + """Return the type name for a native nft expression ops pointer (cached).""" + if ops_ptr in _ops_type_name_cache: + return _ops_type_name_cache[ops_ptr] + name = _scan_ops_for_type_name(layer, ops_ptr) + _ops_type_name_cache[ops_ptr] = name + return name + + +# --------------------------------------------------------------------------- +# Payload + cmp expression pair decoder (native nft source/dest IP, port, etc.) +# --------------------------------------------------------------------------- + +# NFT_PAYLOAD_* base constants +_NFT_PAYLOAD_LL = 0 # link-layer header +_NFT_PAYLOAD_NETWORK = 1 # network header (IPv4/IPv6) +_NFT_PAYLOAD_TRANSPORT = 2 # transport header (TCP/UDP) + +# NFT_CMP_* op constants +_NFT_CMP_EQ = 0 +_NFT_CMP_NEQ = 1 +_NFT_CMP_LT = 2 +_NFT_CMP_LTE = 3 +_NFT_CMP_GT = 4 +_NFT_CMP_GTE = 5 + +_NFT_CMP_OP_STR: Dict[int, str] = { + _NFT_CMP_EQ: "", _NFT_CMP_NEQ: "!=", _NFT_CMP_LT: "<", + _NFT_CMP_LTE: "<=", _NFT_CMP_GT: ">", _NFT_CMP_GTE: ">=", +} + +# nft_payload priv struct (Linux 5.x x86-64): +# +# struct nft_payload { +# enum nft_payload_bases base:8, // byte 0 +# offset:8, // byte 1 +# len:8, // byte 2 +# dreg:8; // byte 3 +# }; +# +# All four fields are packed into a single u32 bitfield. Read each as a +# single byte at the corresponding byte offset within the priv area. +_NFT_PAYLOAD_BASE_OFF = 0 # byte 0 +_NFT_PAYLOAD_OFFSET_OFF = 1 # byte 1 +_NFT_PAYLOAD_LEN_OFF = 2 # byte 2 +_NFT_PAYLOAD_DREG_OFF = 3 # byte 3 + +# nft_cmp_expr priv struct (Linux 5.x x86-64): +# +0 struct nft_data data (16B — comparison value in first bytes) +# +16 u8 sreg (1B) — OR op:8 in some layouts +# +17 u8 len (1B) +# +18 u8 op:8 (1B) — OR at +20 as full int +# We try reading op from +16 first (fits in range 0–5), then +18, then +20. +_NFT_CMP_DATA_OFF = 0 # nft_data starts here +_NFT_CMP_SREG_OFF = 16 +_NFT_CMP_LEN_OFF = 17 +_NFT_CMP_OP_OFF_A = 16 # some layouts: op byte at +16 +_NFT_CMP_OP_OFF_B = 18 # other layouts: op byte at +18 +_NFT_CMP_OP_OFF_C = 20 # full-int layout + + +def _read_cmp_op(layer, priv_addr: int) -> int: + """Read nft_cmp op from priv, trying multiple known offsets.""" + for off in (_NFT_CMP_OP_OFF_A, _NFT_CMP_OP_OFF_B, _NFT_CMP_OP_OFF_C): + try: + v = _read_u32(layer, priv_addr + off) & 0xFF + if v <= _NFT_CMP_GTE: + return v + except Exception: + pass + return _NFT_CMP_EQ # default to eq + + +def _decode_payload_cmp( + p_base: int, p_off: int, p_len: int, op: int, val_bytes: bytes +) -> Optional[str]: + """Decode a payload+cmp expression pair to an iptables-style string.""" + op_str = _NFT_CMP_OP_STR.get(op, f"op{op}") + + if p_base == _NFT_PAYLOAD_NETWORK: + if p_off == 12 and p_len == 4: # IPv4 src addr + try: + import socket + ip = socket.inet_ntoa(val_bytes[:4]) + return f"-s {op_str}{ip}" if op_str else f"-s {ip}" + except Exception: + return f"-s ??" + if p_off == 16 and p_len == 4: # IPv4 dst addr + try: + import socket + ip = socket.inet_ntoa(val_bytes[:4]) + return f"-d {op_str}{ip}" if op_str else f"-d {ip}" + except Exception: + return f"-d ??" + if p_off == 9 and p_len == 1: # IPv4 protocol + proto = val_bytes[0] if val_bytes else 0 + pname = {6: "tcp", 17: "udp", 1: "icmp", 58: "icmpv6"}.get(proto, str(proto)) + return f"-p {pname}" + if p_off == 6 and p_len == 1: # IPv4 ToS / DSCP byte + return f"tos {op_str}0x{val_bytes[0]:02x}" if val_bytes else None + if p_off == 8 and p_len == 16: # IPv6 src addr + try: + import socket + ip = socket.inet_ntop(socket.AF_INET6, bytes(val_bytes[:16])) + return f"-s {op_str}{ip}" if op_str else f"-s {ip}" + except Exception: + return "-s ??" + if p_off == 24 and p_len == 16: # IPv6 dst addr + try: + import socket + ip = socket.inet_ntop(socket.AF_INET6, bytes(val_bytes[:16])) + return f"-d {op_str}{ip}" if op_str else f"-d {ip}" + except Exception: + return "-d ??" + elif p_base == _NFT_PAYLOAD_TRANSPORT: + if p_off == 0 and p_len == 2: # src port + port = struct.unpack(">H", val_bytes[:2])[0] + return f"--sport {op_str}{port}" if op_str else f"--sport {port}" + if p_off == 2 and p_len == 2: # dst port + port = struct.unpack(">H", val_bytes[:2])[0] + return f"--dport {op_str}{port}" if op_str else f"--dport {port}" + return None + + +# --------------------------------------------------------------------------- +# Verdict decoder +# --------------------------------------------------------------------------- + +def _decode_verdict_isf(layer, vmlinux, priv_addr: int) -> str: + """Decode an ``nft_immediate`` priv area using ISF nft_chain type.""" + try: + code = struct.unpack_from(" str: + """Decode an ``nft_immediate`` priv area using raw memory reads.""" + try: + code = struct.unpack_from(" Optional[str]: + """Return a compact summary of a native nft expression, or None to suppress. + + Used by the ISF path (_parse_rule_isf) and as a last-resort fallback in the + raw path when the payload+cmp stateful decoder doesn't produce output. + """ + try: + if type_name == "meta": + key = layer.read(priv_addr, 1)[0] # nft_meta.key is u8 bitfield + return f"meta({_META_KEYS.get(key, str(key))})" + if type_name == "payload": + base = layer.read(priv_addr + _NFT_PAYLOAD_BASE_OFF, 1)[0] + offset = layer.read(priv_addr + _NFT_PAYLOAD_OFFSET_OFF, 1)[0] + ln = layer.read(priv_addr + _NFT_PAYLOAD_LEN_OFF, 1)[0] + return f"payload({_PAYLOAD_BASES.get(base, str(base))}+{offset}[{ln}])" + if type_name == "cmp": + op = _read_cmp_op(layer, priv_addr) + val = _read_u32(layer, priv_addr + _NFT_CMP_DATA_OFF) + return f"cmp({_CMP_OPS.get(op, str(op))} 0x{val:x})" + if type_name == "bitwise": + mask = _read_u32(layer, priv_addr + 4) + xor = _read_u32(layer, priv_addr + 8) + return f"bitwise(mask=0x{mask:x} xor=0x{xor:x})" + if type_name == "counter": + return None # suppress counters + if type_name == "lookup": + return "lookup(set)" + if type_name == "nat": + # nft_nat priv: sreg_addr_min(1)+sreg_addr_max(1)+sreg_proto_min(1)+ + # sreg_proto_max(1)+type(4)+family(1)+pad(1)+flags(2) + # nft_nat_type: NFT_NAT_SNAT=0, NFT_NAT_DNAT=1 + nat_type = _read_u32(layer, priv_addr + 4) & 0xFF + return "SNAT" if nat_type == 0 else "DNAT" + if type_name == "reject": + # nft_reject priv: type(4)+icmp_code(1) + # nft_reject_types: ICMP_UNREACH=0, TCP_RST=1, ICMPX_UNREACH=2 + _REJECT_LABELS = { + 0: "REJECT --reject-with icmp-port-unreachable", + 1: "REJECT --reject-with tcp-reset", + 2: "REJECT --reject-with icmpx-admin-prohibited", + } + rtype = _read_u32(layer, priv_addr) + return _REJECT_LABELS.get(rtype, f"REJECT(type={rtype})") + if type_name == "log": + # nft_log priv: prefix*(8) + level(1) + _pad(1) + logflags(2) + # level: 0=emerg 1=alert 2=crit 3=err 4=warn 5=notice 6=info 7=debug + prefix_ptr = _read_u64(layer, priv_addr) + level = layer.read(priv_addr + 8, 1)[0] + _LOG_LEVELS = {0: "emerg", 1: "alert", 2: "crit", 3: "err", + 4: "warn", 5: "notice", 6: "info", 7: "debug"} + level_s = _LOG_LEVELS.get(level, str(level)) + prefix_s = "" + if prefix_ptr and prefix_ptr > 0xFFFF: + try: + raw = layer.read(prefix_ptr, 64) + nul = raw.find(b"\x00") + if nul >= 0: + prefix_s = f" prefix={raw[:nul].decode('ascii', errors='replace')!r}" + except Exception: + pass + return f"LOG level={level_s}{prefix_s}" + if type_name == "limit": + # nft_limit priv: tokens(8) + rate(8) + burst(8) + unit(8) + flags(4) + type(4) + # unit: 0=second 1=minute 2=hour 3=day 4=week + # type: 0=NFT_LIMIT_PKTS 1=NFT_LIMIT_PKT_BYTES + rate = _read_u64(layer, priv_addr + 8) + burst = _read_u64(layer, priv_addr + 16) + unit = _read_u64(layer, priv_addr + 24) + ltype = _read_u32(layer, priv_addr + 36) + _UNIT_NAMES = {0: "second", 1: "minute", 2: "hour", 3: "day", 4: "week"} + unit_s = _UNIT_NAMES.get(int(unit), f"unit{unit}") + type_s = "bytes" if ltype == 1 else "pkts" + return f"limit: {rate}/{unit_s} burst {burst} {type_s}" + if type_name == "range": + # nft_range_expr: from_data(16) + to_data(16) + sreg(1) + op(1) + len(1) + # op: NFT_RANGE_EQ=0, NFT_RANGE_NEQ=1 + length = layer.read(priv_addr + 34, 1)[0] + op = layer.read(priv_addr + 33, 1)[0] + if length == 2: + from_raw = layer.read(priv_addr, 2) + to_raw = layer.read(priv_addr + 16, 2) + from_val = struct.unpack(">H", bytes(from_raw))[0] + to_val = struct.unpack(">H", bytes(to_raw))[0] + inv = "!" if op == 1 else "" + return f"{inv}{from_val}:{to_val}" + return f"range(len={length})" + if type_name: + return type_name + except Exception: + pass + return None + + +# --------------------------------------------------------------------------- +# ISF-backed expression parsing (used when nft_expr_ops / nft_expr_type in ISF) +# --------------------------------------------------------------------------- + +def _parse_rule_isf(context, vmlinux, rule) -> Tuple[List[str], str]: + """Parse an nft_rule using ISF types for expression ops/type lookup.""" + layer = context.layers[vmlinux.layer_name] + + try: + dlen = int(rule.dlen) + except Exception: + try: + packed = _read_u64(layer, rule.vol.offset + _NFT_RULE_PACKED_OFF) + dlen = (packed >> 44) & 0xFFF + except Exception: + return [], "?" + + if dlen == 0: + return [], "-" + if dlen > 4096: + return [], f"(dlen={dlen})" + + data_addr = rule.vol.offset + _NFT_RULE_DATA_OFF + has_ops = vmlinux.has_type("nft_expr_ops") + has_type = vmlinux.has_type("nft_expr_type") + + matches: List[str] = [] + target = "" + offset = 0 + + while offset + 8 <= dlen: + try: + ops_ptr = _read_u64(layer, data_addr + offset) + if not ops_ptr or not _is_kernel_ptr(ops_ptr): + break + + expr_size = 0 + type_name = "" + + if has_ops: + ops_obj = vmlinux.object( + vmlinux.symbol_table_name + constants.BANG + "nft_expr_ops", + offset=ops_ptr, + native_layer_name=vmlinux.layer_name, + ) + expr_size = int(ops_obj.size) + if has_type: + type_ptr = int(ops_obj.type) + if type_ptr and _is_kernel_ptr(type_ptr): + type_obj = vmlinux.object( + vmlinux.symbol_table_name + constants.BANG + "nft_expr_type", + offset=type_ptr, + native_layer_name=vmlinux.layer_name, + ) + type_name = _read_cstr(layer, int(type_obj.name), 32) + + if expr_size == 0 or expr_size > dlen - offset: + break + + priv_addr = data_addr + offset + 8 + + if type_name == "match": + ext = _xt_ext_full(layer, priv_addr) + if ext: + name, rev, data = ext + decoded = _ipt._decode_match(name, rev, data) + matches.append(f"-m {name}" + (f" {decoded}" if decoded else "")) + else: + matches.append("match:?") + + elif type_name == "target": + ext = _xt_ext_full(layer, priv_addr) + if ext: + name, _rev, data = ext + target = _ipt._decode_target(name, data) + else: + target = "target:?" + + elif type_name == "immediate": + verdict = _decode_verdict_isf(layer, vmlinux, priv_addr) + if not target: + target = verdict + + else: + decoded = _decode_native(type_name, layer, priv_addr) + if decoded is not None: + matches.append(decoded) + + offset += expr_size + + except exceptions.InvalidAddressException: + break + except Exception as exc: + vollog.debug("isf expr parse error at offset %d: %s", offset, exc) + break + + return matches, target or "-" + + +# --------------------------------------------------------------------------- +# Raw expression parsing (used in the raw-walk path, no ISF types) +# --------------------------------------------------------------------------- + +def _parse_rule_raw(layer, rule_addr: int, layout: NftLayout) -> Tuple[List[str], str]: + """Parse an nft_rule expression blob without ISF expression types. + + Identifies expressions by their priv data shape: + - nft_compat match/target: priv[0..7] is a kernel pointer to xt_entry_match + - immediate verdict: priv[0..3] is a small signed verdict code + - other: expression is skipped (native nft, counters, etc.) + + Expression size is always read from nft_expr_ops.size at ops_ptr+8. + """ + try: + packed = _read_u64(layer, rule_addr + _NFT_RULE_PACKED_OFF) + dlen = (packed >> 44) & 0xFFF + except Exception: + return [], "?" + + if dlen == 0: + return [], "-" + if dlen > 4096: + return [], f"(dlen={dlen})" + + data_addr = rule_addr + _NFT_RULE_DATA_OFF + matches: List[str] = [] + target = "" + offset = 0 + # Stateful tracker for payload+cmp pairs (native nft IP/port matching). + # Set when a "payload" expr is decoded; cleared when consumed by "cmp" or + # by any non-cmp expression that follows. + pending_payload: Optional[Tuple[int, int, int]] = None # (base, offset, len) + # Stateful tracker for meta+cmp pairs (interface name matching: -i/-o). + # Stores the meta key (6=iifname, 7=oifname) when awaiting a cmp. + pending_meta: Optional[int] = None + + while offset + 8 <= dlen: + try: + ops_ptr = _read_u64(layer, data_addr + offset) + if not ops_ptr or not _is_kernel_ptr(ops_ptr): + break + + # Get expression total size from nft_expr_ops.size (always at +16) + expr_size = _read_u32(layer, ops_ptr + _NFT_EXPR_OPS_SIZE_OFF) + if expr_size == 0 or expr_size > dlen - offset: + vollog.debug( + "raw expr: bad size %d at offset %d (dlen=%d)", expr_size, offset, dlen + ) + break + + priv_addr = data_addr + offset + 8 + # heap ops (0xffff8... / 0xffffa... style) = nft_compat match/target + # text ops (0xffffffff... style) = native nft expression + is_heap_ops = (ops_ptr >> 32) != 0xffffffff + + # --- nft_compat match / target (heap-allocated ops struct) --- + if is_heap_ops: + ext = _xt_compat_name_data(layer, ops_ptr, priv_addr, expr_size) + if not ext: + ext = _xt_ext_full(layer, priv_addr) + if ext: + name, rev, data = ext + if name and name.isprintable(): + pending_payload = None + pending_meta = None + if name[0].isupper(): + if not target: + target = _ipt._decode_target(name, data) + else: + decoded = _ipt._decode_match(name, rev, data) + matches.append(f"-m {name}" + (f" {decoded}" if decoded else "")) + offset += expr_size + continue + + # --- Native nft expression (kernel-text ops struct) --- + # Identify the expression type by scanning the ops struct for the + # nft_expr_type pointer chain. + type_name = _read_native_expr_type_name(layer, ops_ptr) + + if type_name == "payload": + pending_meta = None + try: + base = layer.read(priv_addr + _NFT_PAYLOAD_BASE_OFF, 1)[0] + p_off = layer.read(priv_addr + _NFT_PAYLOAD_OFFSET_OFF, 1)[0] + p_len = layer.read(priv_addr + _NFT_PAYLOAD_LEN_OFF, 1)[0] + pending_payload = (base, p_off, p_len) + except Exception: + pending_payload = None + offset += expr_size + continue + + if type_name == "cmp": + if pending_meta is not None: + meta_key = pending_meta + pending_meta = None + pending_payload = None + if meta_key == 16: + # NFT_META_L4PROTO: value is a single protocol byte + try: + proto = layer.read(priv_addr + _NFT_CMP_DATA_OFF, 1)[0] + matches.append(f"-p {_L4PROTO_NAMES.get(proto, str(proto))}") + except Exception: + pass + else: + # Interface name is a NUL-terminated string in cmp data. + # op=NEQ means negated match (! -i / ! -o). + # + # Linux 5.13+ nft_cmp_expr includes a 16-byte mask field: + # data(16) + mask(16) + [sreg:8,len:8,op:8] = 40B → esize=48 + # op is at priv+34 in that layout. + # Older layouts (no mask field): op at priv+18 via _read_cmp_op. + try: + try: + cmp_ops_ptr = _read_u64(layer, priv_addr - 8) + priv_size = _read_u32(layer, cmp_ops_ptr + _NFT_EXPR_OPS_SIZE_OFF) - 8 + except Exception: + priv_size = 0 + if priv_size >= 35: + op_byte = layer.read(priv_addr + 34, 1)[0] + if op_byte > _NFT_CMP_GTE: + op_byte = _NFT_CMP_EQ + else: + op_byte = _read_cmp_op(layer, priv_addr) + raw = layer.read(priv_addr + _NFT_CMP_DATA_OFF, 16) + ifname = raw.split(b"\x00")[0].decode("ascii", errors="replace") + if ifname: + flag = "-i" if meta_key == 6 else "-o" + inv = "! " if op_byte == _NFT_CMP_NEQ else "" + matches.append(f"{inv}{flag} {ifname}") + except Exception: + pass + elif pending_payload is not None: + p_base, p_off, p_len = pending_payload + pending_payload = None + try: + op = _read_cmp_op(layer, priv_addr) + val_data = layer.read(priv_addr + _NFT_CMP_DATA_OFF, min(p_len, 16)) + desc = _decode_payload_cmp(p_base, p_off, p_len, op, val_data) + if desc: + matches.append(desc) + except Exception: + pass + else: + pending_payload = None + pending_meta = None + offset += expr_size + continue + + if type_name == "immediate": + # Fall through to the verdict check below + pass + elif type_name == "meta": + # Read meta key; track iifname(6)/oifname(7) for -i/-o decoding. + # nft_meta.key is a u8 bitfield (key:8) in all 5.x/6.x kernels, + # so read a single byte — not u32. + pending_payload = None + try: + meta_key = layer.read(priv_addr, 1)[0] + if meta_key in (6, 7, 16): # NFT_META_IIFNAME, NFT_META_OIFNAME, NFT_META_L4PROTO + pending_meta = meta_key + else: + pending_meta = None + except Exception: + pending_meta = None + offset += expr_size + continue + elif type_name in ("counter", "bitwise", None): + # counter: suppress. bitwise: context only, not user-visible. + # None: unrecognised — skip silently. + pending_payload = None + pending_meta = None + offset += expr_size + continue + else: + # Other known native type (log, limit, reject, nat, …) + decoded = _decode_native(type_name, layer, priv_addr) + if decoded is not None: + matches.append(decoded) + pending_payload = None + pending_meta = None + offset += expr_size + continue + + # --- Immediate verdict: priv[0..3] is a signed verdict code --- + pending_payload = None + pending_meta = None + try: + code = struct.unpack_from(" str: + """Return ACCEPT / DROP / - for a chain using raw memory reads.""" + try: + flags = layer.read(chain_addr + layout.chain_flags_off, 1)[0] + if not (flags & _NFT_CHAIN_BASE_FLAG): + return "-" # not a base chain + bc_addr = chain_addr - layout.bc_chain_off + policy_byte = layer.read(bc_addr + layout.bc_policy_off, 1)[0] + return "DROP" if policy_byte == _NF_DROP else "ACCEPT" + except Exception: + return "-" + + +# --------------------------------------------------------------------------- +# Offset of netns_nft within struct net (read from ISF) +# --------------------------------------------------------------------------- + +def _scan_table_layout( + layer, tbl_addr: int, base_layout: "NftLayout" +) -> "NftLayout": + """Scan an nft_table struct to auto-detect family_off and name_off. + + Used when the distribution kernel has a different nft_table layout from + what the fallback table assumes (e.g. Debian adds fields between rhltable + and the family bitfield). Falls back to base_layout values on failure. + """ + try: + raw = layer.read(tbl_addr, 400) + except Exception: + return base_layout + + # --- detect family_off --- + # Find first 2-byte aligned slot whose low 6 bits are a known NFPROTO family + # and whose high 10 bits are small (it's a packed bitfield, not a big int). + family_off: Optional[int] = None + # Start at 220 to skip the rhltable region (offsets 0-219) which contains + # values that can falsely match NFPROTO family numbers. + for off in range(220, 260, 2): + v = struct.unpack_from("> 6) < 0x200: + family_off = off + break + + # --- detect name_off --- + # Scan for a kernel heap pointer that resolves to a short ASCII string + # matching known iptables table names. Skip self-referential pointers and + # kernel-image pointers (bits 63:32 all 1 = 0xffffffff prefix). + # If string verification fails (page not in dump), take the first plausible + # heap pointer as a fallback (it will be retried at walk time via _read_cstr). + name_off: Optional[int] = None + first_heap_ptr_off: Optional[int] = None + # Align to 8-byte boundary so the step-8 scan below hits the pointer slot + # exactly (e.g. family_off=236 → search_start=232, hits 232,240,248,...). + search_start = (family_off & ~7) if family_off is not None else 128 + for off in range(max(search_start, 128), 320, 8): + ptr_raw = struct.unpack_from("> 32) == 0xFFFF_FFFF: + continue + if first_heap_ptr_off is None: + first_heap_ptr_off = off + try: + name_bytes = layer.read(ptr, 16) + end = name_bytes.find(b"\x00") + name = name_bytes[: end if end >= 0 else 16].decode("ascii", errors="replace") + if name in _IPT_TABLES or (name.isalpha() and 3 <= len(name) <= 12): + name_off = off + break + except Exception: + continue + # If we found a plausible heap pointer but couldn't verify the string + # (e.g. page not captured in the dump), still record the offset so the + # live read in _generator_raw gets another chance. + if name_off is None: + name_off = first_heap_ptr_off + + if family_off is None and name_off is None: + return base_layout + + return NftLayout( + tables_off=base_layout.tables_off, + tbl_chains_off=base_layout.tbl_chains_off, + tbl_family_off=family_off if family_off is not None else base_layout.tbl_family_off, + tbl_name_off=name_off if name_off is not None else base_layout.tbl_name_off, + chain_rules_off=base_layout.chain_rules_off, + chain_list_off=base_layout.chain_list_off, + chain_flags_off=base_layout.chain_flags_off, + chain_name_off=base_layout.chain_name_off, + chain_table_off=base_layout.chain_table_off, + bc_policy_off=base_layout.bc_policy_off, + bc_chain_off=base_layout.bc_chain_off, + ) + + +def _find_tables_off(layer, netns_nft_addr: int) -> Optional[int]: + """Scan the first 64 bytes of netns_nftables for the tables list_head. + + Checks offsets 0, 8, 16, … 56 looking for the first slot where both + list_head.next and list_head.prev are valid kernel pointers (non-empty + list) or both are self-referential (empty initialized list). Used as a + fallback when the layout's tables_off gives a null pointer, which happens + on distribution kernels that add fields before tables. + """ + for off in range(0, 64, 8): + try: + next_ptr = _canon(_read_u64(layer, netns_nft_addr + off)) + prev_ptr = _canon(_read_u64(layer, netns_nft_addr + off + 8)) + except Exception: + continue + if next_ptr == 0 or prev_ptr == 0: + continue + head_canon = _canon(netns_nft_addr + off) + # Empty but initialised list: both pointers are self-referential + if next_ptr == head_canon and prev_ptr == head_canon: + return off + # Non-empty list: both next and prev are kernel pointers + if _is_kernel_ptr(next_ptr) and _is_kernel_ptr(prev_ptr): + return off + return None + + +def _nft_net_offset(vmlinux) -> Optional[int]: + """Return the byte offset of netns_nft (or netns_nftables) within struct net. + + Tries ``net.nft`` first (5.x direct embed), then ``net.nf.nft`` (older + layout where it was nested inside netns_nf). + """ + try: + net_t = vmlinux.get_type("net") + if net_t.has_member("nft"): + return net_t.members["nft"][0] + if net_t.has_member("nf"): + nf_off = net_t.members["nf"][0] + nf_t = vmlinux.get_type("netns_nf") + if nf_t.has_member("nft"): + return nf_off + nf_t.members["nft"][0] + except Exception: + pass + return None + + +# --------------------------------------------------------------------------- +# Plugin +# --------------------------------------------------------------------------- + +class IPTablesNFT(plugins.PluginInterface): + """Extract iptables-nft rules from the nftables kernel subsystem. + + On modern Linux systems (Debian 11+, Ubuntu 22.04+, CentOS 9+, Arch) + the default iptables backend is ``iptables-nft``, which stores rules in + nftables rather than in the legacy x_tables blob format. This plugin + walks the in-memory nftables structures and decodes those rules using + the same match/target decoders as ``linux.iptables``. + + Two walk paths are supported: + + * **ISF path** — when nf_tables is compiled built-in (CONFIG_NF_TABLES=y) + and its types are in the kernel ISF. + * **Raw-walk path** — when nf_tables is a module (CONFIG_NF_TABLES=m, the + default on most distros). Uses hardcoded struct offsets for 5.x kernels. + + Only ip/ip6-family tables named ``filter``, ``nat``, ``mangle``, ``raw``, + or ``security`` are shown. Native nft expressions (used by iptables-nft to + encode IP/interface/protocol matches) are shown in compact form. + """ + + _required_framework_version = (2, 0, 0) + _version = (1, 6, 0) + + @classmethod + def get_requirements(cls) -> List[interfaces.configuration.RequirementInterface]: + return [ + requirements.ModuleRequirement( + name="kernel", + description="Linux kernel", + architectures=["Intel32", "Intel64"], + ), + requirements.VersionRequirement( + name="Net", + component=network.NetSymbols, + version=(1, 0, 0), + ), + ] + + # ------------------------------------------------------------------ + # Namespace iterator (shared by both paths) + # ------------------------------------------------------------------ + + def _iter_namespaces(self, vmlinux): + net_sym = vmlinux.symbol_table_name + constants.BANG + "net" + nethead = vmlinux.object_from_symbol("net_namespace_list") + for net in nethead.to_list(net_sym, "list"): + try: + inum = str(int(net.ns.inum)) + except Exception: + inum = "-" + yield inum, net + + # ------------------------------------------------------------------ + # ISF path helpers + # ------------------------------------------------------------------ + + @classmethod + def _get_nft_isf(cls, vmlinux, net): + """Return netns_nft object via ISF (tries net.nft and net.nf.nft).""" + for attr_path in (["nft"], ["nf", "nft"]): + try: + obj = net + for attr in attr_path: + obj = getattr(obj, attr) + _ = obj.tables # sanity check + return obj + except Exception: + continue + raise exceptions.PluginRequirementException( + "Cannot locate netns_nft (tried net.nft and net.nf.nft)." + ) + + def _chain_policy_isf(self, vmlinux, chain) -> str: + """Return ACCEPT / DROP / - for a chain using ISF types.""" + try: + if not vmlinux.has_type("nft_base_chain"): + return "-" + bc_type = vmlinux.get_type("nft_base_chain") + if not bc_type.has_member("chain"): + return "-" + chain_off = bc_type.members["chain"][0] + bc_addr = chain.vol.offset - chain_off + bc_obj = vmlinux.object( + vmlinux.symbol_table_name + constants.BANG + "nft_base_chain", + offset=bc_addr, + native_layer_name=vmlinux.layer_name, + ) + return "DROP" if int(bc_obj.policy) == _NF_DROP else "ACCEPT" + except Exception: + return "-" + + def _generator_isf(self, vmlinux) -> Iterator[Tuple]: + """Walk via ISF types (CONFIG_NF_TABLES=y kernels).""" + nft_table_sym = vmlinux.symbol_table_name + constants.BANG + "nft_table" + nft_chain_sym = vmlinux.symbol_table_name + constants.BANG + "nft_chain" + nft_rule_sym = vmlinux.symbol_table_name + constants.BANG + "nft_rule" + layer = self.context.layers[vmlinux.layer_name] + + for netns_id, net in self._iter_namespaces(vmlinux): + try: + nft = self._get_nft_isf(vmlinux, net) + except Exception as exc: + vollog.debug("NetNS %s: cannot get nft state: %s", netns_id, exc) + continue + + try: + tables = list(nft.tables.to_list(nft_table_sym, "list")) + except Exception as exc: + vollog.debug("NetNS %s: cannot iterate tables: %s", netns_id, exc) + continue + + for tbl in tables: + try: + family_int = int(tbl.family) + if family_int not in _IPT_FAMILIES: + continue + family = _IPT_FAMILIES[family_int] + tbl_name = _read_cstr(layer, int(tbl.name)) + if tbl_name not in _IPT_TABLES: + continue + except Exception: + continue + + try: + chains = list(tbl.chains.to_list(nft_chain_sym, "list")) + except Exception: + chains = [] + + if not chains: + yield 0, (netns_id, family, tbl_name, "(no chains)", "-", -1, "", "-") + continue + + for chain in chains: + try: + chain_name = _read_cstr(layer, int(chain.name)) + policy = self._chain_policy_isf(vmlinux, chain) + except Exception as exc: + vollog.debug("chain read error: %s", exc) + continue + + try: + rules = list(chain.rules.to_list(nft_rule_sym, "list")) + except Exception: + rules = [] + + if not rules: + yield 0, ( + netns_id, family, tbl_name, chain_name, policy, -1, "", "-" + ) + continue + + for rule_num, rule in enumerate(rules): + try: + matches, tgt = _parse_rule_isf( + self.context, vmlinux, rule + ) + except Exception as exc: + matches, tgt = [], f"(parse error: {exc})" + yield 0, ( + netns_id, family, tbl_name, chain_name, policy, + rule_num, " ".join(matches), tgt, + ) + + # ------------------------------------------------------------------ + # Raw walk path (CONFIG_NF_TABLES=m kernels) + # ------------------------------------------------------------------ + # Hook-walk path (primary for iptables-nft) + # ------------------------------------------------------------------ + + def _generator_hooks(self, vmlinux, layout: NftLayout) -> Iterator[Tuple]: + """Walk net.nf.hooks_ipv4/ipv6 → nf_hook_entries → priv (nft_base_chain) → rules. + + When iptables-nft is active, ip_tables.ko is absent and the hooks are + owned by nftables. nf_hook_ops.priv points to nft_base_chain, so we + recover the embedded nft_chain and walk its rules directly — bypassing + the nft_table chains linked list which can appear self-referential when + walking from the table side. + """ + layer = self.context.layers[vmlinux.layer_name] + + seen_chains: set = set() + tbl_layout_cache: Dict[int, NftLayout] = {} + tbl_info_cache: Dict[int, Tuple[str, str]] = {} # tbl_ptr → (tbl_name, family) + + for netns_id, net in self._iter_namespaces(vmlinux): + seen_chains.clear() + tbl_layout_cache.clear() + tbl_info_cache.clear() + + hook_arrays = [] + try: + hook_arrays.append((2, "ip", net.nf.hooks_ipv4)) + except Exception: + pass + try: + hook_arrays.append((10, "ip6", net.nf.hooks_ipv6)) + except Exception: + pass + + if not hook_arrays: + vollog.debug("NetNS %s: no hooks_ipv4/ipv6 accessible", netns_id) + continue + + for family_int, family, hook_array in hook_arrays: + for i, hook_ptr_obj in enumerate(hook_array): + try: + entries_addr = int(hook_ptr_obj) + except Exception: + continue + if not entries_addr or not _is_kernel_ptr(entries_addr): + continue + try: + # Read num_hook_entries directly as u16 at offset 0. + # The ISF-generated struct offset for this field is unreliable + # (gives garbage values); raw layout is confirmed by kernel source: + # struct nf_hook_entries { u16 num_hook_entries; /* pad */ hooks[]; } + num_hooks = struct.unpack_from(" 64: + continue + + # hooks[] array starts at offset +8 within nf_hook_entries + hooks_base = entries_addr + 8 + for j in range(num_hooks): + hook_entry_addr = ( + hooks_base + j * _ipt._NF_HOOK_ENTRY_SIZE + ) + try: + # Read priv directly: nf_hook_entry layout is + # { hook_fn(8), priv(8) } so priv is always at +8. + # The ISF field offset for priv is unreliable on + # distribution kernels; raw read is authoritative. + bc_addr = _read_u64( + layer, + hook_entry_addr + _ipt._NF_HOOK_ENTRY_PRIV_OFF, + ) + except Exception: + continue + + if not bc_addr or not _is_kernel_ptr(bc_addr): + continue + + # priv = nft_chain* (the embedded chain inside nft_base_chain). + # nft_base_chain_init() sets ops[i].priv = &basechain->chain, + # so priv points directly at nft_chain, not nft_base_chain. + chain_addr = bc_addr + + if chain_addr in seen_chains: + continue + seen_chains.add(chain_addr) + + # Validate via nft_table (nft_chain.table at layout.chain_table_off) + try: + tbl_ptr = _read_u64( + layer, chain_addr + layout.chain_table_off + ) + if not _is_kernel_ptr(tbl_ptr): + vollog.debug( + "NetNS %s hook[%d][%d]: chain@0x%x " + "tbl_ptr=0x%x not kernel ptr — skip", + netns_id, i, j, chain_addr, tbl_ptr, + ) + continue + + if tbl_ptr not in tbl_layout_cache: + tbl_layout_cache[tbl_ptr] = _scan_table_layout( + layer, tbl_ptr, layout + ) + tbl_layout = tbl_layout_cache[tbl_ptr] + + name_ptr = _read_u64(layer, tbl_ptr + tbl_layout.tbl_name_off) + tbl_name = _read_cstr(layer, name_ptr) + if tbl_name not in _IPT_TABLES: + if _is_kernel_ptr(name_ptr) and not tbl_name: + tbl_name = "(name unreadable)" + else: + vollog.debug( + "NetNS %s hook[%d][%d]: chain@0x%x " + "tbl_name=%r not iptables — skip", + netns_id, i, j, chain_addr, tbl_name, + ) + continue + except Exception as exc: + vollog.debug( + "NetNS %s hook[%d][%d]: chain@0x%x table read " + "failed: %s", + netns_id, i, j, chain_addr, exc, + ) + continue + + # Reject spurious hook matches: a real nft_chain is always + # linked into its table's chains list (list.next ≠ self). + # INIT_LIST_HEAD (next == self) means this is not a valid + # nft_chain — it's some other structure with a coincidental + # kernel pointer at chain_table_off. + try: + list_head = chain_addr + tbl_layout.chain_list_off + list_next = _read_u64(layer, list_head) + if list_next == list_head: + vollog.debug( + "NetNS %s hook[%d][%d]: chain@0x%x " + "has INIT_LIST_HEAD — not a real nft_chain, skip", + netns_id, i, j, chain_addr, + ) + seen_chains.discard(chain_addr) + continue + except Exception: + pass # unreadable list — proceed cautiously + + # Cache validated table info for user-defined chain walk later + if tbl_ptr not in tbl_info_cache: + tbl_info_cache[tbl_ptr] = (tbl_name, family) + + try: + chain_name_ptr = _read_u64( + layer, chain_addr + tbl_layout.chain_name_off + ) + chain_name = _read_cstr(layer, chain_name_ptr) or "(unnamed)" + policy = _chain_policy_raw(layer, chain_addr, tbl_layout) + except Exception as exc: + vollog.debug( + "hook walk: chain@0x%x name/policy read failed: %s", + chain_addr, exc, + ) + continue + + vollog.debug( + "NetNS %s: hook chain '%s' table='%s' family=%s " + "policy=%s @ 0x%x", + netns_id, chain_name, tbl_name, family, policy, chain_addr, + ) + + rules_head = chain_addr + tbl_layout.chain_rules_off + rule_addrs = list( + _walk_list(layer, rules_head, _NFT_RULE_LIST_OFF) + ) + + if not rule_addrs: + yield 0, ( + netns_id, family, tbl_name, chain_name, policy, + -1, "", "-", + ) + continue + + for rule_num, rule_addr in enumerate(rule_addrs): + try: + matches, tgt = _parse_rule_raw( + layer, rule_addr, tbl_layout + ) + except Exception as exc: + matches, tgt = [], f"(parse error: {exc})" + yield 0, ( + netns_id, family, tbl_name, chain_name, policy, + rule_num, " ".join(matches), tgt, + ) + + # Discover sibling tables (nat, mangle, etc.) by walking the + # nft_table.list ring from already-known tables. The table list is + # a circular linked list; walking from a known table finds all others + # in the same namespace without needing the netns sentinel address. + for seed_tbl_ptr in list(tbl_info_cache.keys()): + for other_tbl_ptr in _walk_list( + layer, seed_tbl_ptr, _NFT_TABLE_LIST_OFF + ): + if other_tbl_ptr in tbl_info_cache: + continue + try: + other_layout = _scan_table_layout( + layer, other_tbl_ptr, tbl_layout_cache[seed_tbl_ptr] + ) + name_ptr = _read_u64( + layer, other_tbl_ptr + other_layout.tbl_name_off + ) + other_name = _read_cstr(layer, name_ptr) + if other_name not in _IPT_TABLES: + if _is_kernel_ptr(name_ptr) and not other_name: + other_name = "(name unreadable)" + else: + continue + fam_raw = struct.unpack_from( + " Iterator[Tuple]: + """Walk via struct offsets resolved per kernel version (CONFIG_NF_TABLES=m).""" + layer = self.context.layers[vmlinux.layer_name] + + nft_off = _nft_net_offset(vmlinux) + if nft_off is None: + vollog.warning( + "Cannot find net.nft member in ISF — " + "raw nftables walk not possible for this kernel." + ) + return + + banner, major, minor = _ipt._get_kernel_version(vmlinux) + layout = _get_nft_layout(major, minor) + + vollog.info( + "iptables_nft: using raw walk path (nf_tables is a module). " + "Kernel: %s net.nft at offset %d. " + "NftLayout: tables_off=%d tbl_chains=%d tbl_family=%d tbl_name=%d " + "chain.rules=%d chain.list=%d chain.flags=%d chain.name=%d " + "bc.policy=%d bc.chain=%d", + banner.split("(")[0].strip() if banner != "unknown" else f"{major}.{minor}", + nft_off, + layout.tables_off, layout.tbl_chains_off, layout.tbl_family_off, + layout.tbl_name_off, layout.chain_rules_off, layout.chain_list_off, + layout.chain_flags_off, layout.chain_name_off, + layout.bc_policy_off, layout.bc_chain_off, + ) + + for netns_id, net in self._iter_namespaces(vmlinux): + net_addr = net.vol.offset + netns_nft_addr = net_addr + nft_off + tables_head = netns_nft_addr + layout.tables_off + + # If the tables list head appears null, the layout's tables_off may + # be wrong for this distribution kernel. Scan for the real offset. + try: + tables_next = _canon(_read_u64(layer, tables_head)) + except Exception: + tables_next = 0 + if not _is_kernel_ptr(tables_next): + detected_off = _find_tables_off(layer, netns_nft_addr) + if detected_off is not None and detected_off != layout.tables_off: + vollog.warning( + "NetNS %s: tables_off=%d gives null pointer; " + "auto-detected tables_off=%d — distribution-specific layout " + "(kernel %d.%d). Please report for fallback table update.", + netns_id, layout.tables_off, detected_off, major, minor, + ) + tables_head = netns_nft_addr + detected_off + # Peek at the first table to refine family_off / name_off + try: + first_tbl_ptr = _canon(_read_u64(layer, tables_head)) + if _is_kernel_ptr(first_tbl_ptr): + refined = _scan_table_layout(layer, first_tbl_ptr, layout) + if refined is not layout: + vollog.warning( + "NetNS %s: auto-refined NftLayout: " + "tbl_family_off %d→%d tbl_name_off %d→%d", + netns_id, + layout.tbl_family_off, refined.tbl_family_off, + layout.tbl_name_off, refined.tbl_name_off, + ) + layout = refined + except Exception: + pass + + try: + raw = layer.read(netns_nft_addr, 128) + kptrs = [] + for off in range(0, 128, 8): + val = struct.unpack_from(" Iterator[Tuple]: + kernel_key = self.config["kernel"] + vmlinux = self.context.modules[kernel_key] + network.NetSymbols.apply( + self.context.symbol_space[vmlinux.symbol_table_name] + ) + + isf_has_nft = ( + vmlinux.has_type("nft_table") + and vmlinux.has_type("nft_chain") + and vmlinux.has_type("nft_rule") + ) + + if isf_has_nft: + vollog.info("iptables_nft: ISF path (CONFIG_NF_TABLES=y)") + yield from self._generator_isf(vmlinux) + else: + vollog.info( + "iptables_nft: nft_table/nft_chain/nft_rule absent from ISF " + "— using raw walk (CONFIG_NF_TABLES=m)" + ) + _, major, minor = _ipt._get_kernel_version(vmlinux) + layout = _get_nft_layout(major, minor) + + # Primary: walk via nf_hook_entries (finds chains even when the + # nft_table chains list appears self-referential). + vollog.info("iptables_nft: trying hook walk path") + hook_rows = list(self._generator_hooks(vmlinux, layout)) + if hook_rows: + vollog.info( + "iptables_nft: hook walk yielded %d rows", len(hook_rows) + ) + yield from iter(hook_rows) + else: + # Fallback: walk net.nft.tables → chains → rules + vollog.info( + "iptables_nft: hook walk produced no rows — " + "falling back to table-list walk" + ) + yield from self._generator_raw(vmlinux) + + # ------------------------------------------------------------------ + # Entry point + # ------------------------------------------------------------------ + + def run(self): + columns = [ + ("NetNS", str), + ("Family", str), + ("Table", str), + ("Chain", str), + ("Policy", str), + ("RuleNum", int), + ("Matches", str), + ("Target", str), + ] + return renderers.TreeGrid(columns, self._generator())