Skip to content

Commit 7990829

Browse files
Frandoclaude
andcommitted
fix: make conntrack flush best-effort when conntrack binary is missing
CI environments may not have the conntrack binary installed. The flush is a performance optimization, not a correctness requirement. Log and continue instead of returning an error. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 155c113 commit 7990829

File tree

2 files changed

+45
-32
lines changed

2 files changed

+45
-32
lines changed

patchbay/src/balancer.rs

Lines changed: 41 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -313,11 +313,14 @@ pub(crate) fn generate_lb_rules(balancers: &[ResolvedBalancer]) -> String {
313313
for balancer in balancers {
314314
writeln!(rules, " chain {} {{", balancer.name).unwrap();
315315

316-
// Affinity lookup (before distribution).
316+
// Affinity lookup (before distribution). If the client IP is in the
317+
// affinity map the lookup returns the pinned backend and DNAT fires.
318+
// If the client is not in the map the lookup fails silently and
319+
// execution falls through to the numgen distribution rule below.
317320
if balancer.has_affinity() {
318321
writeln!(
319322
rules,
320-
" ip saddr @{name}_affinity dnat to ip saddr map @{name}_affinity",
323+
" dnat to ip saddr map @{name}_affinity",
321324
name = balancer.name,
322325
)
323326
.unwrap();
@@ -391,23 +394,17 @@ pub(crate) fn generate_lb_rules(balancers: &[ResolvedBalancer]) -> String {
391394
// Apply / setup helpers
392395
// ─────────────────────────────────────────────
393396

394-
/// Applies load balancer nftables rules for a router during initial setup.
397+
/// Logs build-time balancer configs during router setup.
395398
///
396-
/// Called from `wiring::setup_router_async` when the router has balancers
397-
/// configured at build time.
398-
pub(crate) async fn setup_balancers(
399-
_netns: &crate::netns::NetnsManager,
400-
router: &crate::core::RouterData,
401-
) -> Result<()> {
402-
// At setup time, backends may not have IPs yet if they are built after
403-
// the router. Build-time balancers with no backends are stored but rules
404-
// are deferred until backends are added at runtime.
399+
/// Rules are not applied here because backend devices may not have IPs yet
400+
/// (they are typically built after the router). Rules are generated on the
401+
/// first runtime mutation (`add_balancer`, `add_lb_backend`, etc.).
402+
pub(crate) fn log_build_time_balancers(router: &crate::core::RouterData) {
405403
debug!(
406404
router = %router.name,
407405
count = router.balancers.len(),
408-
"balancer: setup (build-time configs stored, rules applied on first backend add)"
406+
"balancer: build-time configs stored, rules applied on first mutation"
409407
);
410-
Ok(())
411408
}
412409

413410
/// Resolves and applies all load balancer rules for a router.
@@ -438,18 +435,24 @@ async fn apply_all_lb_rules(
438435
let rules = generate_lb_rules(&resolved);
439436
debug!(ns = %router_ns, rules = %rules, "balancer: applying rules");
440437

441-
// Delete existing table (ignore error if it does not exist).
442-
run_nft_in(&lab.netns, router_ns, "delete table ip lb")
438+
// Atomically replace the table: prepend a delete so the entire
439+
// operation is a single nft -f invocation with no gap.
440+
let atomic_rules = format!("delete table ip lb\n{rules}");
441+
// First attempt: atomic replace (table exists).
442+
if run_nft_in(&lab.netns, router_ns, &atomic_rules)
443443
.await
444-
.ok();
445-
run_nft_in(&lab.netns, router_ns, &rules).await?;
444+
.is_err()
445+
{
446+
// Table did not exist yet; create without the delete prefix.
447+
run_nft_in(&lab.netns, router_ns, &rules).await?;
448+
}
446449

447450
// Ensure VIP addresses are on the bridge.
451+
let bridge: Arc<str> = bridge.into();
448452
for balancer in &resolved {
449453
let vip = balancer.vip;
450-
let bridge: Arc<str> = bridge.into();
454+
let bridge = bridge.clone();
451455
nl_run(&lab.netns, router_ns, {
452-
let bridge = bridge.clone();
453456
move |nl: crate::netlink::Netlink| async move {
454457
// add_addr4 is idempotent; if the address exists it returns Ok.
455458
nl.add_addr4(&bridge, vip, 32).await.ok();
@@ -666,16 +669,20 @@ impl crate::router::Router {
666669
&port.to_string(),
667670
])
668671
.status()
669-
.await
670-
.context("spawn conntrack -D")?;
671-
// conntrack -D returns non-zero if no entries matched, which is fine.
672-
debug!(
673-
vip = %vip,
674-
port = %port,
675-
proto = %proto,
676-
success = status.success(),
677-
"balancer: conntrack flush"
678-
);
672+
.await;
673+
// conntrack may not be installed or may find no matching entries.
674+
// Both are fine. Log and continue.
675+
match status {
676+
Ok(s) => debug!(
677+
vip = %vip, port = %port, proto = %proto,
678+
success = s.success(),
679+
"balancer: conntrack flush"
680+
),
681+
Err(e) => debug!(
682+
error = %e,
683+
"balancer: conntrack not available, skipping flush"
684+
),
685+
}
679686
}
680687
Ok(())
681688
};
@@ -766,6 +773,10 @@ mod tests {
766773
let rules = generate_lb_rules(&balancers);
767774
assert!(rules.contains("map sticky_affinity"));
768775
assert!(rules.contains("timeout 3600s"));
776+
assert!(
777+
rules.contains("dnat to ip saddr map @sticky_affinity"),
778+
"affinity lookup rule missing"
779+
);
769780
assert!(rules.contains("meta l4proto tcp dnat to numgen random mod 2"));
770781
assert!(rules.contains("update @sticky_affinity"));
771782
}

patchbay/src/wiring.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -434,9 +434,11 @@ pub(crate) async fn setup_router_async(
434434
};
435435
apply_firewall(netns, &router.ns, &router.cfg.firewall, fw_wan).await?;
436436

437-
// Apply load balancer rules if any balancers are configured at build time.
437+
// Log build-time balancer configs. Rules are deferred until the first
438+
// runtime mutation (add_balancer / add_lb_backend) because backend
439+
// devices may not have IPs assigned yet at router setup time.
438440
if !router.balancers.is_empty() {
439-
crate::balancer::setup_balancers(netns, router).await?;
441+
crate::balancer::log_build_time_balancers(router);
440442
}
441443

442444
// NAT64: create TUN device, routes, nft masquerade, and start translator.

0 commit comments

Comments
 (0)