From 17cf7d578dc8b54cc53e48559d091526768adf29 Mon Sep 17 00:00:00 2001 From: mfw78 Date: Mon, 1 Jun 2026 06:09:24 +0000 Subject: [PATCH] fix(mantaray): tolerate bee's `ref_size = 0` wire form on decode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit bee occasionally emits manifest nodes with `ref_size = 0` for entry-less terminal nodes (mantaray-js documents this with an explicit FIXME: "in Bee, if one uploads a file on the bzz endpoint, the node under `/` gets 0 refsize"). Per the spec and every reference impl, `ref_size` is a uniform per-node width governing both the entry slot and every fork ref slot; the only legal values are `E::SIZE` (32 plain / 64 encrypted). Accept `ref_size = 0` only when the forks bitfield is also empty (the terminal-node case the bee artifact represents). Reject `ref_size = 0` with non-empty forks as malformed rather than silently dropping forks the way bee's v0.2 decoder does at marshal.go:285-287 — no impl can coherently round-trip fork refs at zero width. The encoder is unchanged: it continues to emit `ref_size = E::SIZE` with zero entry bytes for entry-less nodes, matching bee's own "valid manifest" test fixture and mantaray-js. Reproducing the bee bug on the write side would defeat the purpose. Adds four regression tests pinning the v0.1 and v0.2 decoder behaviour and the encoder's spec-correct output. --- crates/mantaray/src/codec.rs | 135 +++++++++++++++++++++++++++++++++++ crates/mantaray/src/lib.rs | 9 +++ 2 files changed, 144 insertions(+) diff --git a/crates/mantaray/src/codec.rs b/crates/mantaray/src/codec.rs index 4c7d6ea..66adcac 100644 --- a/crates/mantaray/src/codec.rs +++ b/crates/mantaray/src/codec.rs @@ -183,8 +183,59 @@ impl TryFrom<&[u8]> for Node { } } +// ┌─────────────────────────── HAZMAT ───────────────────────────┐ +// │ BEE-WORKAROUND(bee#5483): bee's mantaray writer occasionally │ +// │ emits a node with `ref_size = 0` (the byte at header offset │ +// │ 63) for entry-less terminal nodes. This is not spec-legal: │ +// │ the spec doc (bee/pkg/manifest/mantaray/docs/format/node.md) │ +// │ and every reference impl (bee, mantaray-js, nectar) treat │ +// │ `ref_size` as a single uniform width in {32, 64} governing │ +// │ both the entry slot and every fork ref slot. mantaray-js │ +// │ documents the bee artifact with an explicit FIXME: "in Bee, │ +// │ if one uploads a file on the bzz endpoint, the node under │ +// │ `/` gets 0 refsize." │ +// │ │ +// │ Remove `decode_empty_terminal_node` and the two call-sites │ +// │ guarded by `BEE-WORKAROUND(bee#5483)` once the upstream bee │ +// │ fix lands and downstream consumers have upgraded past the │ +// │ buggy releases. │ +// └──────────────────────────────────────────────────────────────┘ + +/// Decode a `ref_size = 0` node as the empty terminal node that bee intends +/// it to mean. +/// +/// Accepts this wire shape only when the forks bitfield is also empty. A +/// `ref_size = 0` node with non-empty forks is unrecoverable by any +/// implementation (fork refs would have zero width), so we reject it as +/// malformed rather than silently dropping forks the way bee's v0.2 decoder +/// does (`bee/pkg/manifest/mantaray/marshal.go:285-287`). +/// +/// See the HAZMAT block above for the full context. +fn decode_empty_terminal_node(data: &[u8]) -> Result> { + let bitfield_start = NodeHeader::SIZE; + let bitfield_end = bitfield_start + 32; + if data.len() < bitfield_end { + return Err(MantarayError::DataTooShort); + } + if data[bitfield_start..bitfield_end].iter().any(|&b| b != 0) { + return Err(MantarayError::EntrySizeMismatch { + expected: E::SIZE, + actual: 0, + }); + } + Ok(Node { + entry: None, + forks: BTreeMap::new(), + ..Default::default() + }) +} + fn decode_v01(data: &[u8]) -> Result> { let ref_bytes_size = data[NodeHeader::REF_SIZE_OFFSET] as usize; + // BEE-WORKAROUND(bee#5483): see HAZMAT block above `decode_empty_terminal_node`. + if ref_bytes_size == 0 { + return decode_empty_terminal_node::(data); + } if ref_bytes_size != E::SIZE { return Err(MantarayError::EntrySizeMismatch { expected: E::SIZE, @@ -231,6 +282,10 @@ fn decode_v01(data: &[u8]) -> Result> { fn decode_v02(data: &[u8]) -> Result> { let ref_bytes_size = data[NodeHeader::REF_SIZE_OFFSET] as usize; + // BEE-WORKAROUND(bee#5483): see HAZMAT block above `decode_empty_terminal_node`. + if ref_bytes_size == 0 { + return decode_empty_terminal_node::(data); + } if ref_bytes_size != E::SIZE { return Err(MantarayError::EntrySizeMismatch { expected: E::SIZE, @@ -632,6 +687,86 @@ mod tests { assert!(Node::::try_from(data.as_slice()).is_err()); } + /// BEE-WORKAROUND(bee#5483): bee occasionally emits nodes with + /// `ref_size = 0` for entry-less terminal nodes (mantaray-js FIXME: + /// "in Bee, if one uploads a file on the bzz endpoint, the node under + /// `/` gets 0 refsize"). Tolerate this wire shape only when the forks + /// bitfield is also empty. + #[test] + fn decode_bee_legacy_ref_size_zero_empty_node() { + // v0.2 layout: 32 obfuscation key zeros || 31 version hash || ref_size=0 || 32 index zeros = 96 bytes + let mut data = vec![0u8; 96]; + data[ObfuscationKey::SIZE + ..ObfuscationKey::SIZE + VersionHash::SIZE] + .copy_from_slice(VersionHash::V02.as_bytes()); + // ref_size at offset 63 is left as 0; index (offset 64..96) is all zero. + + let n = Node::::try_from(data.as_slice()) + .expect("ref_size=0 with empty forks should decode as terminal node"); + assert!(n.entry().is_none()); + assert!(n.forks().is_empty()); + } + + /// BEE-WORKAROUND(bee#5483): a `ref_size = 0` node with a non-empty forks + /// bitfield is unrecoverable by any reference implementation (fork refs + /// would have zero width). Reject as malformed rather than silently + /// dropping forks the way bee's v0.2 decoder does. + #[test] + fn decode_bee_legacy_ref_size_zero_with_forks_is_rejected() { + let mut data = vec![0u8; 96]; + data[ObfuscationKey::SIZE + ..ObfuscationKey::SIZE + VersionHash::SIZE] + .copy_from_slice(VersionHash::V02.as_bytes()); + // ref_size = 0 (offset 63 already zero), but flip one bit in the index. + data[NodeHeader::SIZE] = 0x01; + + let result = Node::::try_from(data.as_slice()); + assert!(matches!( + result, + Err(MantarayError::EntrySizeMismatch { + expected: 32, + actual: 0 + }) + )); + } + + /// BEE-WORKAROUND(bee#5483): same as above but for v0.1; both decoders + /// must apply the same rule. + #[test] + fn decode_bee_legacy_ref_size_zero_v01_empty_node() { + let mut data = vec![0u8; 96]; + data[ObfuscationKey::SIZE + ..ObfuscationKey::SIZE + VersionHash::SIZE] + .copy_from_slice(VersionHash::V01.as_bytes()); + + let n = Node::::try_from(data.as_slice()) + .expect("v0.1 ref_size=0 with empty forks should decode as terminal node"); + assert!(n.entry().is_none()); + assert!(n.forks().is_empty()); + } + + /// Pin nectar's encoder behaviour: even for an entry-less node, it must + /// emit `ref_size = E::SIZE`, never `0`. Spec-correct, matches bee's + /// "valid manifest" test fixture, matches mantaray-js. Emitting 0 would + /// reproduce the bee bug rather than fix it. + #[test] + fn encoder_never_emits_ref_size_zero_for_entryless_node() { + let n = Node::::new_unencrypted(); + let encoded = Vec::::try_from(&n).unwrap(); + + // Decrypt (obfuscation key is all-zero for `new_unencrypted`, so XOR + // is a no-op, but go through the motions for clarity). + let mut decoded = encoded.clone(); + let key = decoded[..ObfuscationKey::SIZE].to_vec(); + xor_in_place(&mut decoded[ObfuscationKey::SIZE..], &key); + + assert_eq!( + decoded[NodeHeader::REF_SIZE_OFFSET] as usize, + ::SIZE, + "encoder must emit ref_size = E::SIZE, not 0; spec requires uniform reference width" + ); + } + /// Encode-decode round-trip preserves entries and metadata. #[test] fn encode_decode_round_trip() { diff --git a/crates/mantaray/src/lib.rs b/crates/mantaray/src/lib.rs index 3d08e20..bdbe46a 100644 --- a/crates/mantaray/src/lib.rs +++ b/crates/mantaray/src/lib.rs @@ -51,6 +51,15 @@ //! use nectar_mantaray::metadata; //! assert_eq!(metadata::CONTENT_TYPE, "Content-Type"); //! ``` +//! +//! # Upstream-bug workarounds +//! +//! Code that exists solely to tolerate a defect in an upstream reference +//! implementation is tagged with a grep-able `BEE-WORKAROUND(bee#NNNN)` +//! comment. When the upstream fix lands and downstream consumers have +//! upgraded past the buggy releases, every site tagged with that issue +//! number should be removed. Run `git grep -n BEE-WORKAROUND` to enumerate +//! them. use nectar_primitives::bmt::DEFAULT_BODY_SIZE; use nectar_primitives::chunk::ChunkAddress;