From 2993d5150222ed9e3300ea783ab8efdc53cce514 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sun, 22 Mar 2026 20:52:36 +0200 Subject: [PATCH 01/19] feat: add UserComparator::name() for stable identity persistence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a `name() -> &'static str` method to the `UserComparator` trait that returns a stable string identifier. The name is persisted in the tree manifest (sfa archive) on creation and checked on every subsequent open — a mismatch produces `Error::ComparatorMismatch`, preventing silent data corruption from reopening with an incompatible ordering. - Persist comparator name as `comparator_name` section in version file - Read comparator name on recovery (backward-compat: absent section defaults to "default", matching `DefaultUserComparator::name()`) - Store comparator name in `SuperVersions` so version upgrades (flush, compaction) write it without extra plumbing - Add `Error::ComparatorMismatch { stored, supplied }` variant - Add 4 integration tests: same-comparator reopen, mismatch detection, custom→default mismatch, default→default reopen Closes #74 --- src/comparator.rs | 24 +++- src/config/mod.rs | 11 +- src/error.rs | 12 ++ src/manifest.rs | 17 +++ src/table/util.rs | 4 + src/tree/inner.rs | 2 +- src/tree/mod.rs | 13 ++ src/version/mod.rs | 4 + src/version/persist.rs | 8 +- src/version/super_version.rs | 257 ++++++++++++++++++----------------- tests/custom_comparator.rs | 112 +++++++++++++++ 11 files changed, 325 insertions(+), 139 deletions(-) diff --git a/src/comparator.rs b/src/comparator.rs index b4b5d023d..62ae3b68d 100644 --- a/src/comparator.rs +++ b/src/comparator.rs @@ -29,8 +29,9 @@ use std::sync::Arc; /// # Important /// /// Once a tree is created with a comparator, it must always be opened with the -/// same comparator. Using a different comparator on an existing tree will produce -/// incorrect results. +/// same comparator. The comparator's [`name`](UserComparator::name) is +/// persisted and checked on every subsequent open — a mismatch causes the open +/// to fail with [`Error::ComparatorMismatch`](crate::Error::ComparatorMismatch). /// /// # Examples /// @@ -42,6 +43,10 @@ use std::sync::Arc; /// struct U64Comparator; /// /// impl UserComparator for U64Comparator { +/// fn name(&self) -> &'static str { +/// "u64-big-endian" +/// } +/// /// fn compare(&self, a: &[u8], b: &[u8]) -> Ordering { /// if a.len() == 8 && b.len() == 8 { /// // Length checked, conversion cannot fail. @@ -57,6 +62,17 @@ use std::sync::Arc; /// } /// ``` pub trait UserComparator: Send + Sync + std::panic::RefUnwindSafe + 'static { + /// Returns a stable identifier for this comparator. + /// + /// The name is persisted when a tree is first created. On subsequent + /// opens the stored name is compared against the caller-supplied + /// comparator's name — a mismatch causes the open to fail, preventing + /// silent data corruption from using an incompatible ordering. + /// + /// Choose a name that uniquely identifies the ordering logic and will + /// not change across releases (e.g. `"u64-big-endian"`, `"reverse-lexicographic"`). + fn name(&self) -> &'static str; + /// Compares two user keys, returning their ordering. fn compare(&self, a: &[u8], b: &[u8]) -> std::cmp::Ordering; @@ -78,6 +94,10 @@ pub trait UserComparator: Send + Sync + std::panic::RefUnwindSafe + 'static { pub struct DefaultUserComparator; impl UserComparator for DefaultUserComparator { + fn name(&self) -> &'static str { + "default" + } + #[inline] fn compare(&self, a: &[u8], b: &[u8]) -> std::cmp::Ordering { a.cmp(b) diff --git a/src/config/mod.rs b/src/config/mod.rs index 58e91b2fc..842cb8758 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -605,13 +605,10 @@ impl Config { /// /// # Important /// - /// Once a tree is created with a custom comparator, it **must** be - /// re-opened with the same comparator. Using a different comparator - /// on an existing tree produces incorrect results. - /// - /// The comparator identity is **not** persisted to disk — the caller - /// is responsible for ensuring the same comparator is used across - /// open/close cycles (same approach as `RocksDB`). + /// The comparator's [`UserComparator::name`] is persisted when a tree is + /// first created. On subsequent opens the stored name is compared against + /// the supplied comparator's name — a mismatch causes the open to fail + /// with [`Error::ComparatorMismatch`](crate::Error::ComparatorMismatch). #[must_use] pub fn comparator(mut self, comparator: SharedComparator) -> Self { self.comparator = comparator; diff --git a/src/error.rs b/src/error.rs index f34c5036d..0b09f43ac 100644 --- a/src/error.rs +++ b/src/error.rs @@ -76,6 +76,18 @@ pub enum Error { /// Decryption failed Decrypt(&'static str), + /// Comparator mismatch on tree reopen. + /// + /// The tree was created with a comparator whose [`UserComparator::name`] + /// differs from the one supplied at reopen time. + ComparatorMismatch { + /// Comparator name persisted in the tree metadata. + stored: String, + + /// Comparator name supplied by the caller. + supplied: &'static str, + }, + /// Range tombstone block decode failure. RangeTombstoneDecode { /// Which field or validation failed (e.g. `start_len`, `start`, `seqno`, `interval`) diff --git a/src/manifest.rs b/src/manifest.rs index f5a9886b8..1c59210ca 100644 --- a/src/manifest.rs +++ b/src/manifest.rs @@ -14,6 +14,7 @@ pub struct Manifest { )] pub tree_type: TreeType, pub level_count: u8, + pub comparator_name: String, } impl Manifest { @@ -90,10 +91,26 @@ impl Manifest { ); } + // Optional section — absent in manifests written before comparator + // identity persistence was added. Trees created without this section + // were implicitly using `DefaultUserComparator` whose name is "default". + let comparator_name = match toc.section(b"comparator_name") { + Some(section) => { + let bytes: Vec = section + .buf_reader(path)? + .bytes() + .collect::, _>>()?; + + String::from_utf8(bytes).map_err(|e| crate::Error::Utf8(e.utf8_error()))? + } + None => "default".to_owned(), + }; + Ok(Self { version, tree_type, level_count, + comparator_name, }) } } diff --git a/src/table/util.rs b/src/table/util.rs index bfe21839c..3caf141e5 100644 --- a/src/table/util.rs +++ b/src/table/util.rs @@ -377,6 +377,10 @@ mod tests { /// Reverse comparator to exercise the Vec-allocation slow path. struct ReverseComparator; impl crate::comparator::UserComparator for ReverseComparator { + fn name(&self) -> &'static str { + "test-reverse" + } + fn compare(&self, a: &[u8], b: &[u8]) -> std::cmp::Ordering { b.cmp(a) } diff --git a/src/tree/inner.rs b/src/tree/inner.rs index 8bee97659..45b6a69f6 100644 --- a/src/tree/inner.rs +++ b/src/tree/inner.rs @@ -80,7 +80,7 @@ impl TreeInner { crate::TreeType::Standard }, ); - persist_version(&config.path, &version)?; + persist_version(&config.path, &version, config.comparator.name())?; let comparator = config.comparator.clone(); diff --git a/src/tree/mod.rs b/src/tree/mod.rs index 579cfb52d..61b559964 100644 --- a/src/tree/mod.rs +++ b/src/tree/mod.rs @@ -1365,6 +1365,19 @@ impl Tree { return Err(crate::Error::Unrecoverable); } + let supplied_name = config.comparator.name(); + if manifest.comparator_name != supplied_name { + log::error!( + "Comparator mismatch: tree was created with {:?} but opened with {:?}", + manifest.comparator_name, + supplied_name, + ); + return Err(crate::Error::ComparatorMismatch { + stored: manifest.comparator_name, + supplied: supplied_name, + }); + } + // IMPORTANT: Restore persisted config config.level_count = manifest.level_count; } diff --git a/src/version/mod.rs b/src/version/mod.rs index 4abb70db2..0bcc2442f 100644 --- a/src/version/mod.rs +++ b/src/version/mod.rs @@ -631,6 +631,7 @@ impl Version { pub(crate) fn encode_into( &self, writer: &mut sfa::Writer, + comparator_name: &str, ) -> Result<(), crate::Error> { use crate::FormatVersion; use byteorder::{LittleEndian, WriteBytesExt}; @@ -659,6 +660,9 @@ impl Version { writer.start("filter_hash_type")?; writer.write_u8(u8::from(ChecksumType::Xxh3))?; + writer.start("comparator_name")?; + writer.write_all(comparator_name.as_bytes())?; + // // Levels // diff --git a/src/version/persist.rs b/src/version/persist.rs index ad42fcf6c..7cf6ac45a 100644 --- a/src/version/persist.rs +++ b/src/version/persist.rs @@ -6,7 +6,11 @@ use crate::{ use byteorder::{LittleEndian, WriteBytesExt}; use std::{io::BufWriter, path::Path}; -pub fn persist_version(folder: &Path, version: &Version) -> crate::Result<()> { +pub fn persist_version( + folder: &Path, + version: &Version, + comparator_name: &str, +) -> crate::Result<()> { log::trace!( "Persisting version {} in {}", version.id(), @@ -21,7 +25,7 @@ pub fn persist_version(folder: &Path, version: &Version) -> crate::Result<()> { { let mut writer = sfa::Writer::from_writer(&mut writer); - version.encode_into(&mut writer)?; + version.encode_into(&mut writer, comparator_name)?; writer.finish().map_err(|e| match e { sfa::Error::Io(e) => crate::Error::from(e), diff --git a/src/version/super_version.rs b/src/version/super_version.rs index b0c11350c..5d945a87d 100644 --- a/src/version/super_version.rs +++ b/src/version/super_version.rs @@ -27,25 +27,33 @@ pub struct SuperVersion { pub(crate) seqno: SeqNo, } -pub struct SuperVersions(VecDeque); +pub struct SuperVersions { + versions: VecDeque, + + /// Stable comparator identity persisted in every version file. + comparator_name: Arc, +} impl SuperVersions { pub fn new(version: Version, comparator: SharedComparator) -> Self { - Self( - vec![SuperVersion { + let comparator_name: Arc = comparator.name().into(); + + Self { + versions: vec![SuperVersion { active_memtable: Arc::new(Memtable::new(0, comparator)), sealed_memtables: Arc::default(), version, seqno: 0, }] .into(), - ) + comparator_name, + } } pub fn memtable_size_sum(&self) -> u64 { let mut set = crate::HashMap::default(); - for super_version in &self.0 { + for super_version in &self.versions { set.entry(super_version.active_memtable.id) .and_modify(|bytes| *bytes += super_version.active_memtable.size()) .or_insert_with(|| super_version.active_memtable.size()); @@ -61,7 +69,7 @@ impl SuperVersions { } pub fn len(&self) -> usize { - self.0.len() + self.versions.len() } pub fn free_list_len(&self) -> usize { @@ -79,9 +87,9 @@ impl SuperVersions { log::trace!("Running manifest GC with watermark={gc_watermark}"); - if let Some(hi_idx) = self.0.iter().rposition(|x| x.seqno < gc_watermark) { + if let Some(hi_idx) = self.versions.iter().rposition(|x| x.seqno < gc_watermark) { for _ in 0..hi_idx { - let Some(head) = self.0.front() else { + let Some(head) = self.versions.front() else { break; }; @@ -96,11 +104,14 @@ impl SuperVersions { std::fs::remove_file(path)?; } - self.0.pop_front(); + self.versions.pop_front(); } } - log::trace!("Manifest GC done, version length now {}", self.0.len()); + log::trace!( + "Manifest GC done, version length now {}", + self.versions.len() + ); Ok(()) } @@ -140,7 +151,7 @@ impl SuperVersions { next_version.seqno = seqno; log::trace!("Next version seqno={}", next_version.seqno); - persist_version(tree_path, &next_version.version)?; + persist_version(tree_path, &next_version.version, &self.comparator_name)?; self.append_version(next_version); // Clamp to stay below the reserved MSB range. @@ -151,18 +162,18 @@ impl SuperVersions { } pub fn append_version(&mut self, version: SuperVersion) { - self.0.push_back(version); + self.versions.push_back(version); } pub fn replace_latest_version(&mut self, version: SuperVersion) { - if self.0.pop_back().is_some() { - self.0.push_back(version); + if self.versions.pop_back().is_some() { + self.versions.push_back(version); } } pub fn latest_version(&self) -> SuperVersion { #[expect(clippy::expect_used, reason = "SuperVersion is expected to exist")] - self.0 + self.versions .iter() .last() .cloned() @@ -173,14 +184,14 @@ impl SuperVersions { if seqno == 0 { #[expect(clippy::expect_used, reason = "SuperVersion is expected to exist")] return self - .0 + .versions .front() .cloned() .expect("should always find a SuperVersion"); } let version = self - .0 + .versions .iter() .rev() .find(|version| version.seqno < seqno) @@ -190,7 +201,7 @@ impl SuperVersions { log::error!("Failed to find a SuperVersion for snapshot with seqno={seqno}"); log::error!("SuperVersions:"); - for version in self.0.iter().rev() { + for version in self.versions.iter().rev() { log::error!("-> {}, seqno={}", version.version.id(), version.seqno); } } @@ -210,31 +221,35 @@ mod tests { Memtable::new(id, default_comparator()) } + fn test_super_versions(versions: Vec) -> SuperVersions { + SuperVersions { + versions: versions.into(), + comparator_name: "default".into(), + } + } + #[test] fn super_version_gc_above_watermark() -> crate::Result<()> { - let mut history = SuperVersions( - vec![ - SuperVersion { - active_memtable: Arc::new(new_memtable(0)), - sealed_memtables: Arc::default(), - version: Version::new(0, crate::TreeType::Standard), - seqno: 0, - }, - SuperVersion { - active_memtable: Arc::new(new_memtable(0)), - sealed_memtables: Arc::default(), - version: Version::new(0, crate::TreeType::Standard), - seqno: 1, - }, - SuperVersion { - active_memtable: Arc::new(new_memtable(0)), - sealed_memtables: Arc::default(), - version: Version::new(0, crate::TreeType::Standard), - seqno: 2, - }, - ] - .into(), - ); + let mut history = test_super_versions(vec![ + SuperVersion { + active_memtable: Arc::new(new_memtable(0)), + sealed_memtables: Arc::default(), + version: Version::new(0, crate::TreeType::Standard), + seqno: 0, + }, + SuperVersion { + active_memtable: Arc::new(new_memtable(0)), + sealed_memtables: Arc::default(), + version: Version::new(0, crate::TreeType::Standard), + seqno: 1, + }, + SuperVersion { + active_memtable: Arc::new(new_memtable(0)), + sealed_memtables: Arc::default(), + version: Version::new(0, crate::TreeType::Standard), + seqno: 2, + }, + ]); history.maintenance(Path::new("."), 0)?; @@ -245,29 +260,26 @@ mod tests { #[test] fn super_version_gc_below_watermark_simple() -> crate::Result<()> { - let mut history = SuperVersions( - vec![ - SuperVersion { - active_memtable: Arc::new(new_memtable(0)), - sealed_memtables: Arc::default(), - version: Version::new(0, crate::TreeType::Standard), - seqno: 0, - }, - SuperVersion { - active_memtable: Arc::new(new_memtable(0)), - sealed_memtables: Arc::default(), - version: Version::new(0, crate::TreeType::Standard), - seqno: 1, - }, - SuperVersion { - active_memtable: Arc::new(new_memtable(0)), - sealed_memtables: Arc::default(), - version: Version::new(0, crate::TreeType::Standard), - seqno: 2, - }, - ] - .into(), - ); + let mut history = test_super_versions(vec![ + SuperVersion { + active_memtable: Arc::new(new_memtable(0)), + sealed_memtables: Arc::default(), + version: Version::new(0, crate::TreeType::Standard), + seqno: 0, + }, + SuperVersion { + active_memtable: Arc::new(new_memtable(0)), + sealed_memtables: Arc::default(), + version: Version::new(0, crate::TreeType::Standard), + seqno: 1, + }, + SuperVersion { + active_memtable: Arc::new(new_memtable(0)), + sealed_memtables: Arc::default(), + version: Version::new(0, crate::TreeType::Standard), + seqno: 2, + }, + ]); history.maintenance(Path::new("."), 3)?; @@ -278,35 +290,32 @@ mod tests { #[test] fn super_version_gc_below_watermark_simple_2() -> crate::Result<()> { - let mut history = SuperVersions( - vec![ - SuperVersion { - active_memtable: Arc::new(new_memtable(0)), - sealed_memtables: Arc::default(), - version: Version::new(0, crate::TreeType::Standard), - seqno: 0, - }, - SuperVersion { - active_memtable: Arc::new(new_memtable(0)), - sealed_memtables: Arc::default(), - version: Version::new(0, crate::TreeType::Standard), - seqno: 1, - }, - SuperVersion { - active_memtable: Arc::new(new_memtable(0)), - sealed_memtables: Arc::default(), - version: Version::new(0, crate::TreeType::Standard), - seqno: 2, - }, - SuperVersion { - active_memtable: Arc::new(new_memtable(0)), - sealed_memtables: Arc::default(), - version: Version::new(0, crate::TreeType::Standard), - seqno: 8, - }, - ] - .into(), - ); + let mut history = test_super_versions(vec![ + SuperVersion { + active_memtable: Arc::new(new_memtable(0)), + sealed_memtables: Arc::default(), + version: Version::new(0, crate::TreeType::Standard), + seqno: 0, + }, + SuperVersion { + active_memtable: Arc::new(new_memtable(0)), + sealed_memtables: Arc::default(), + version: Version::new(0, crate::TreeType::Standard), + seqno: 1, + }, + SuperVersion { + active_memtable: Arc::new(new_memtable(0)), + sealed_memtables: Arc::default(), + version: Version::new(0, crate::TreeType::Standard), + seqno: 2, + }, + SuperVersion { + active_memtable: Arc::new(new_memtable(0)), + sealed_memtables: Arc::default(), + version: Version::new(0, crate::TreeType::Standard), + seqno: 8, + }, + ]); history.maintenance(Path::new("."), 3)?; @@ -317,23 +326,20 @@ mod tests { #[test] fn super_version_gc_below_watermark_keep() -> crate::Result<()> { - let mut history = SuperVersions( - vec![ - SuperVersion { - active_memtable: Arc::new(new_memtable(0)), - sealed_memtables: Arc::default(), - version: Version::new(0, crate::TreeType::Standard), - seqno: 0, - }, - SuperVersion { - active_memtable: Arc::new(new_memtable(0)), - sealed_memtables: Arc::default(), - version: Version::new(0, crate::TreeType::Standard), - seqno: 8, - }, - ] - .into(), - ); + let mut history = test_super_versions(vec![ + SuperVersion { + active_memtable: Arc::new(new_memtable(0)), + sealed_memtables: Arc::default(), + version: Version::new(0, crate::TreeType::Standard), + seqno: 0, + }, + SuperVersion { + active_memtable: Arc::new(new_memtable(0)), + sealed_memtables: Arc::default(), + version: Version::new(0, crate::TreeType::Standard), + seqno: 8, + }, + ]); history.maintenance(Path::new("."), 3)?; @@ -344,23 +350,20 @@ mod tests { #[test] fn super_version_gc_below_watermark_shadowed() -> crate::Result<()> { - let mut history = SuperVersions( - vec![ - SuperVersion { - active_memtable: Arc::new(new_memtable(0)), - sealed_memtables: Arc::default(), - version: Version::new(0, crate::TreeType::Standard), - seqno: 0, - }, - SuperVersion { - active_memtable: Arc::new(new_memtable(0)), - sealed_memtables: Arc::default(), - version: Version::new(0, crate::TreeType::Standard), - seqno: 2, - }, - ] - .into(), - ); + let mut history = test_super_versions(vec![ + SuperVersion { + active_memtable: Arc::new(new_memtable(0)), + sealed_memtables: Arc::default(), + version: Version::new(0, crate::TreeType::Standard), + seqno: 0, + }, + SuperVersion { + active_memtable: Arc::new(new_memtable(0)), + sealed_memtables: Arc::default(), + version: Version::new(0, crate::TreeType::Standard), + seqno: 2, + }, + ]); history.maintenance(Path::new("."), 3)?; diff --git a/tests/custom_comparator.rs b/tests/custom_comparator.rs index 71ce102ac..ef49af125 100644 --- a/tests/custom_comparator.rs +++ b/tests/custom_comparator.rs @@ -6,6 +6,10 @@ use std::sync::Arc; struct ReverseComparator; impl UserComparator for ReverseComparator { + fn name(&self) -> &'static str { + "reverse-lexicographic" + } + fn compare(&self, a: &[u8], b: &[u8]) -> Ordering { b.cmp(a) // reversed } @@ -15,6 +19,10 @@ impl UserComparator for ReverseComparator { struct U64BigEndianComparator; impl UserComparator for U64BigEndianComparator { + fn name(&self) -> &'static str { + "u64-big-endian" + } + fn compare(&self, a: &[u8], b: &[u8]) -> Ordering { if a.len() == 8 && b.len() == 8 { let a_u64 = u64::from_be_bytes(a.try_into().unwrap()); @@ -279,3 +287,107 @@ fn u64_comparator_bounded_range_scan() -> lsm_tree::Result<()> { Ok(()) } + +#[test] +fn reopen_with_same_comparator_succeeds() -> lsm_tree::Result<()> { + let folder = tempfile::tempdir()?; + + // Create tree with reverse comparator + { + let cmp: SharedComparator = Arc::new(ReverseComparator); + let tree = Config::new(&folder, Default::default(), Default::default()) + .comparator(cmp) + .open()?; + tree.insert("a", "1", 0); + tree.flush_active_memtable(1)?; + } + + // Reopen with the same comparator — must succeed + let cmp: SharedComparator = Arc::new(ReverseComparator); + let tree = Config::new(&folder, Default::default(), Default::default()) + .comparator(cmp) + .open()?; + + assert_eq!(tree.get("a", 2)?, Some("1".as_bytes().into())); + + Ok(()) +} + +#[test] +fn reopen_with_different_comparator_fails() -> lsm_tree::Result<()> { + let folder = tempfile::tempdir()?; + + // Create tree with reverse comparator + { + let cmp: SharedComparator = Arc::new(ReverseComparator); + let tree = Config::new(&folder, Default::default(), Default::default()) + .comparator(cmp) + .open()?; + tree.insert("a", "1", 0); + tree.flush_active_memtable(1)?; + } + + // Reopen with u64 comparator — must fail + let cmp: SharedComparator = Arc::new(U64BigEndianComparator); + let result = Config::new(&folder, Default::default(), Default::default()) + .comparator(cmp) + .open(); + + match result { + Err(lsm_tree::Error::ComparatorMismatch { stored, supplied }) => { + assert_eq!(stored, "reverse-lexicographic"); + assert_eq!(supplied, "u64-big-endian"); + } + Ok(_) => panic!("expected ComparatorMismatch, got Ok"), + Err(e) => panic!("expected ComparatorMismatch, got {e:?}"), + } + + Ok(()) +} + +#[test] +fn reopen_custom_with_default_comparator_fails() -> lsm_tree::Result<()> { + let folder = tempfile::tempdir()?; + + // Create tree with reverse comparator + { + let cmp: SharedComparator = Arc::new(ReverseComparator); + let tree = Config::new(&folder, Default::default(), Default::default()) + .comparator(cmp) + .open()?; + tree.insert("a", "1", 0); + tree.flush_active_memtable(1)?; + } + + // Reopen without specifying a comparator (uses default) — must fail + let result = Config::new(&folder, Default::default(), Default::default()).open(); + + match result { + Err(lsm_tree::Error::ComparatorMismatch { stored, supplied }) => { + assert_eq!(stored, "reverse-lexicographic"); + assert_eq!(supplied, "default"); + } + Ok(_) => panic!("expected ComparatorMismatch, got Ok"), + Err(e) => panic!("expected ComparatorMismatch, got {e:?}"), + } + + Ok(()) +} + +#[test] +fn reopen_default_tree_with_default_comparator_succeeds() -> lsm_tree::Result<()> { + let folder = tempfile::tempdir()?; + + // Create tree with default comparator + { + let tree = Config::new(&folder, Default::default(), Default::default()).open()?; + tree.insert("a", "1", 0); + tree.flush_active_memtable(1)?; + } + + // Reopen with default comparator — must succeed + let tree = Config::new(&folder, Default::default(), Default::default()).open()?; + assert_eq!(tree.get("a", 2)?, Some("1".as_bytes().into())); + + Ok(()) +} From 3f3aec2d714706870528d3a7c53c8a926bc42de3 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sun, 22 Mar 2026 21:08:18 +0200 Subject: [PATCH 02/19] fix: add size bound for comparator_name manifest section Guard against corrupted/forged manifests that could advertise an extremely large comparator_name section. Reject names longer than 256 bytes before allocating. --- src/comparator.rs | 4 ++++ src/manifest.rs | 9 +++++++++ 2 files changed, 13 insertions(+) diff --git a/src/comparator.rs b/src/comparator.rs index 62ae3b68d..ca2819a12 100644 --- a/src/comparator.rs +++ b/src/comparator.rs @@ -71,6 +71,10 @@ pub trait UserComparator: Send + Sync + std::panic::RefUnwindSafe + 'static { /// /// Choose a name that uniquely identifies the ordering logic and will /// not change across releases (e.g. `"u64-big-endian"`, `"reverse-lexicographic"`). + // + // Intentionally required (no default impl): a shared fallback name would + // let two distinct comparators pass the mismatch check, silently producing + // corrupt reads on reopen — the exact scenario this method prevents. fn name(&self) -> &'static str; /// Compares two user keys, returning their ordering. diff --git a/src/manifest.rs b/src/manifest.rs index 1c59210ca..2b29ef508 100644 --- a/src/manifest.rs +++ b/src/manifest.rs @@ -96,6 +96,15 @@ impl Manifest { // were implicitly using `DefaultUserComparator` whose name is "default". let comparator_name = match toc.section(b"comparator_name") { Some(section) => { + const MAX_COMPARATOR_NAME_LEN: u64 = 256; + + if section.len() > MAX_COMPARATOR_NAME_LEN { + return Err(crate::Error::DecompressedSizeTooLarge { + declared: section.len(), + limit: MAX_COMPARATOR_NAME_LEN, + }); + } + let bytes: Vec = section .buf_reader(path)? .bytes() From 1e5df2d8251564eaec5ffe7155017982a80d6f4c Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sun, 22 Mar 2026 21:14:30 +0200 Subject: [PATCH 03/19] test(manifest): add unit tests for comparator_name backward compat Cover the three paths in Manifest::decode_from: - missing section defaults to "default" - present section round-trips correctly - oversized section (>256 bytes) is rejected --- src/manifest.rs | 74 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/src/manifest.rs b/src/manifest.rs index 2b29ef508..d3ef69605 100644 --- a/src/manifest.rs +++ b/src/manifest.rs @@ -123,3 +123,77 @@ impl Manifest { }) } } + +#[cfg(test)] +mod tests { + use super::*; + use byteorder::WriteBytesExt; + use std::io::Write; + + /// Write the mandatory manifest sections (format_version, tree_type, + /// level_count, filter_hash_type) into an sfa archive at `path`. + /// If `comparator_name` is `Some`, also writes that section. + fn write_test_manifest(path: &std::path::Path, comparator_name: Option<&str>) { + let file = std::fs::File::create(path).unwrap(); + let mut writer = sfa::Writer::from_writer(std::io::BufWriter::new(file)); + + writer.start("format_version").unwrap(); + writer.write_u8(FormatVersion::V4.into()).unwrap(); + + writer.start("tree_type").unwrap(); + writer.write_u8(TreeType::Standard.into()).unwrap(); + + writer.start("level_count").unwrap(); + writer.write_u8(7).unwrap(); + + writer.start("filter_hash_type").unwrap(); + writer.write_u8(u8::from(ChecksumType::Xxh3)).unwrap(); + + if let Some(name) = comparator_name { + writer.start("comparator_name").unwrap(); + writer.write_all(name.as_bytes()).unwrap(); + } + + writer.finish().unwrap(); + } + + #[test] + fn manifest_without_comparator_name_defaults_to_default() { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("manifest"); + + write_test_manifest(&path, None); + + let reader = sfa::Reader::new(&path).unwrap(); + let manifest = Manifest::decode_from(&path, &reader).unwrap(); + assert_eq!(manifest.comparator_name, "default"); + } + + #[test] + fn manifest_with_comparator_name_round_trips() { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("manifest"); + + write_test_manifest(&path, Some("u64-big-endian")); + + let reader = sfa::Reader::new(&path).unwrap(); + let manifest = Manifest::decode_from(&path, &reader).unwrap(); + assert_eq!(manifest.comparator_name, "u64-big-endian"); + } + + #[test] + fn manifest_rejects_oversized_comparator_name() { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("manifest"); + + let long_name = "x".repeat(300); + write_test_manifest(&path, Some(&long_name)); + + let reader = sfa::Reader::new(&path).unwrap(); + let result = Manifest::decode_from(&path, &reader); + assert!( + matches!(result, Err(crate::Error::DecompressedSizeTooLarge { .. })), + "expected DecompressedSizeTooLarge" + ); + } +} From 950dd77f846bfe631d9ce96e450729215d15b4e7 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sun, 22 Mar 2026 21:52:57 +0200 Subject: [PATCH 04/19] fix(manifest): add write-side name length assert, remove unwrap in tests - Assert comparator_name <= 256 bytes in persist_version (symmetric with the read-side bound in Manifest::decode_from) - Replace unwrap() with ? in manifest unit tests (clippy lint) - Clarify backward-compat comment: custom comparators cannot exist in pre-change manifests because UserComparator was introduced in the same release cycle --- src/manifest.rs | 70 +++++++++++++++++++++++++----------------- src/version/persist.rs | 9 ++++++ 2 files changed, 50 insertions(+), 29 deletions(-) diff --git a/src/manifest.rs b/src/manifest.rs index d3ef69605..526179bf4 100644 --- a/src/manifest.rs +++ b/src/manifest.rs @@ -92,8 +92,10 @@ impl Manifest { } // Optional section — absent in manifests written before comparator - // identity persistence was added. Trees created without this section - // were implicitly using `DefaultUserComparator` whose name is "default". + // identity persistence was added. The `UserComparator` trait was + // introduced in the same release cycle, so all pre-existing trees + // used `DefaultUserComparator` whose `name()` returns "default". + // Custom comparators cannot exist in old manifests. let comparator_name = match toc.section(b"comparator_name") { Some(section) => { const MAX_COMPARATOR_NAME_LEN: u64 = 256; @@ -133,67 +135,77 @@ mod tests { /// Write the mandatory manifest sections (format_version, tree_type, /// level_count, filter_hash_type) into an sfa archive at `path`. /// If `comparator_name` is `Some`, also writes that section. - fn write_test_manifest(path: &std::path::Path, comparator_name: Option<&str>) { - let file = std::fs::File::create(path).unwrap(); + fn write_test_manifest( + path: &std::path::Path, + comparator_name: Option<&str>, + ) -> crate::Result<()> { + let file = std::fs::File::create(path)?; let mut writer = sfa::Writer::from_writer(std::io::BufWriter::new(file)); - writer.start("format_version").unwrap(); - writer.write_u8(FormatVersion::V4.into()).unwrap(); + writer.start("format_version")?; + writer.write_u8(FormatVersion::V4.into())?; - writer.start("tree_type").unwrap(); - writer.write_u8(TreeType::Standard.into()).unwrap(); + writer.start("tree_type")?; + writer.write_u8(TreeType::Standard.into())?; - writer.start("level_count").unwrap(); - writer.write_u8(7).unwrap(); + writer.start("level_count")?; + writer.write_u8(7)?; - writer.start("filter_hash_type").unwrap(); - writer.write_u8(u8::from(ChecksumType::Xxh3)).unwrap(); + writer.start("filter_hash_type")?; + writer.write_u8(u8::from(ChecksumType::Xxh3))?; if let Some(name) = comparator_name { - writer.start("comparator_name").unwrap(); - writer.write_all(name.as_bytes()).unwrap(); + writer.start("comparator_name")?; + writer.write_all(name.as_bytes())?; } - writer.finish().unwrap(); + writer.finish().map_err(|e| match e { + sfa::Error::Io(e) => crate::Error::from(e), + _ => unreachable!(), + })?; + Ok(()) } #[test] - fn manifest_without_comparator_name_defaults_to_default() { - let dir = tempfile::tempdir().unwrap(); + fn manifest_without_comparator_name_defaults_to_default() -> crate::Result<()> { + let dir = tempfile::tempdir()?; let path = dir.path().join("manifest"); - write_test_manifest(&path, None); + write_test_manifest(&path, None)?; - let reader = sfa::Reader::new(&path).unwrap(); - let manifest = Manifest::decode_from(&path, &reader).unwrap(); + let reader = sfa::Reader::new(&path)?; + let manifest = Manifest::decode_from(&path, &reader)?; assert_eq!(manifest.comparator_name, "default"); + Ok(()) } #[test] - fn manifest_with_comparator_name_round_trips() { - let dir = tempfile::tempdir().unwrap(); + fn manifest_with_comparator_name_round_trips() -> crate::Result<()> { + let dir = tempfile::tempdir()?; let path = dir.path().join("manifest"); - write_test_manifest(&path, Some("u64-big-endian")); + write_test_manifest(&path, Some("u64-big-endian"))?; - let reader = sfa::Reader::new(&path).unwrap(); - let manifest = Manifest::decode_from(&path, &reader).unwrap(); + let reader = sfa::Reader::new(&path)?; + let manifest = Manifest::decode_from(&path, &reader)?; assert_eq!(manifest.comparator_name, "u64-big-endian"); + Ok(()) } #[test] - fn manifest_rejects_oversized_comparator_name() { - let dir = tempfile::tempdir().unwrap(); + fn manifest_rejects_oversized_comparator_name() -> crate::Result<()> { + let dir = tempfile::tempdir()?; let path = dir.path().join("manifest"); let long_name = "x".repeat(300); - write_test_manifest(&path, Some(&long_name)); + write_test_manifest(&path, Some(&long_name))?; - let reader = sfa::Reader::new(&path).unwrap(); + let reader = sfa::Reader::new(&path)?; let result = Manifest::decode_from(&path, &reader); assert!( matches!(result, Err(crate::Error::DecompressedSizeTooLarge { .. })), "expected DecompressedSizeTooLarge" ); + Ok(()) } } diff --git a/src/version/persist.rs b/src/version/persist.rs index 7cf6ac45a..fcd578594 100644 --- a/src/version/persist.rs +++ b/src/version/persist.rs @@ -6,11 +6,20 @@ use crate::{ use byteorder::{LittleEndian, WriteBytesExt}; use std::{io::BufWriter, path::Path}; +/// Maximum comparator name length, enforced symmetrically on write +/// (here) and read (`Manifest::decode_from`). +const MAX_COMPARATOR_NAME_LEN: usize = 256; + pub fn persist_version( folder: &Path, version: &Version, comparator_name: &str, ) -> crate::Result<()> { + assert!( + comparator_name.len() <= MAX_COMPARATOR_NAME_LEN, + "comparator name exceeds {MAX_COMPARATOR_NAME_LEN} bytes", + ); + log::trace!( "Persisting version {} in {}", version.id(), From 21274ba666fd554c342555dbe57dfe0b646ea8ce Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sun, 22 Mar 2026 22:10:07 +0200 Subject: [PATCH 05/19] test(comparator): add unit tests for DefaultUserComparator::name() Cover the name() and is_lexicographic() methods directly to satisfy codecov patch coverage threshold. --- src/comparator.rs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/comparator.rs b/src/comparator.rs index ca2819a12..fb29d78d7 100644 --- a/src/comparator.rs +++ b/src/comparator.rs @@ -126,3 +126,19 @@ pub fn default_comparator() -> SharedComparator { std::sync::LazyLock::new(|| Arc::new(DefaultUserComparator)); DEFAULT.clone() } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn default_comparator_name() { + assert_eq!(DefaultUserComparator.name(), "default"); + assert_eq!(default_comparator().name(), "default"); + } + + #[test] + fn default_comparator_is_lexicographic() { + assert!(DefaultUserComparator.is_lexicographic()); + } +} From 7ef2f7b17fedef6e6931fa3f95c51e9e48990d95 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sun, 22 Mar 2026 22:24:14 +0200 Subject: [PATCH 06/19] docs: qualify intra-doc links for UserComparator::name Use `crate::UserComparator::name` in doc comments within config and error modules where UserComparator is not directly in scope. --- src/config/mod.rs | 2 +- src/error.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/config/mod.rs b/src/config/mod.rs index 842cb8758..abb69745a 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -605,7 +605,7 @@ impl Config { /// /// # Important /// - /// The comparator's [`UserComparator::name`] is persisted when a tree is + /// The comparator's [`crate::UserComparator::name`] is persisted when a tree is /// first created. On subsequent opens the stored name is compared against /// the supplied comparator's name — a mismatch causes the open to fail /// with [`Error::ComparatorMismatch`](crate::Error::ComparatorMismatch). diff --git a/src/error.rs b/src/error.rs index 0b09f43ac..6ed815aa9 100644 --- a/src/error.rs +++ b/src/error.rs @@ -78,7 +78,7 @@ pub enum Error { /// Comparator mismatch on tree reopen. /// - /// The tree was created with a comparator whose [`UserComparator::name`] + /// The tree was created with a comparator whose [`crate::UserComparator::name`] /// differs from the one supplied at reopen time. ComparatorMismatch { /// Comparator name persisted in the tree metadata. From 599a90ccf240e26ee3ee3417b993ecd7d79d4ffe Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sun, 22 Mar 2026 22:24:50 +0200 Subject: [PATCH 07/19] docs(persist): clarify why assert is used for name length check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit UserComparator::name() returns &'static str — an oversized name is a programmer error, not a runtime condition. Assert is appropriate. --- src/version/persist.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/version/persist.rs b/src/version/persist.rs index fcd578594..b3907b434 100644 --- a/src/version/persist.rs +++ b/src/version/persist.rs @@ -15,6 +15,8 @@ pub fn persist_version( version: &Version, comparator_name: &str, ) -> crate::Result<()> { + // Panic is intentional: `UserComparator::name()` returns `&'static str`, + // so an oversized name is a programmer error, not a runtime condition. assert!( comparator_name.len() <= MAX_COMPARATOR_NAME_LEN, "comparator name exceeds {MAX_COMPARATOR_NAME_LEN} bytes", From e9c4fc88465a0ef8441c1f2360c286763e863344 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sun, 22 Mar 2026 22:37:43 +0200 Subject: [PATCH 08/19] fix(recover): validate manifest before recover_levels MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move comparator name and format version checks ahead of recover_levels() so a rejected open is side-effect free. recover_levels loads tables and cleans up orphans — running it before validation wastes work and can modify disk state for an open that will be rejected anyway. --- src/tree/mod.rs | 54 +++++++++++++++++++++++++++---------------------- 1 file changed, 30 insertions(+), 24 deletions(-) diff --git a/src/tree/mod.rs b/src/tree/mod.rs index 61b559964..88c27506f 100644 --- a/src/tree/mod.rs +++ b/src/tree/mod.rs @@ -1330,6 +1330,36 @@ impl Tree { log::info!("Recovering LSM-tree at {}", config.path.display()); + // Validate manifest metadata (format version, tree type, comparator + // name) BEFORE recover_levels, so a rejected open is side-effect free + // — recover_levels loads tables and cleans up orphans. + { + let version_id = crate::version::recovery::get_current_version(&config.path)?; + let manifest_path = config.path.join(format!("v{version_id}")); + let reader = sfa::Reader::new(&manifest_path)?; + let manifest = Manifest::decode_from(&manifest_path, &reader)?; + + if !matches!(manifest.version, FormatVersion::V3 | FormatVersion::V4) { + return Err(crate::Error::InvalidVersion(manifest.version.into())); + } + + let supplied_name = config.comparator.name(); + if manifest.comparator_name != supplied_name { + log::error!( + "Comparator mismatch: tree was created with {:?} but opened with {:?}", + manifest.comparator_name, + supplied_name, + ); + return Err(crate::Error::ComparatorMismatch { + stored: manifest.comparator_name, + supplied: supplied_name, + }); + } + + // IMPORTANT: Restore persisted config + config.level_count = manifest.level_count; + } + let tree_id = get_next_tree_id(); #[cfg(feature = "metrics")] @@ -1344,14 +1374,6 @@ impl Tree { )?; { - let manifest_path = config.path.join(format!("v{}", version.id())); - let reader = sfa::Reader::new(&manifest_path)?; - let manifest = Manifest::decode_from(&manifest_path, &reader)?; - - if !matches!(manifest.version, FormatVersion::V3 | FormatVersion::V4) { - return Err(crate::Error::InvalidVersion(manifest.version.into())); - } - let requested_tree_type = match config.kv_separation_opts { Some(_) => crate::TreeType::Blob, None => crate::TreeType::Standard, @@ -1364,22 +1386,6 @@ impl Tree { ); return Err(crate::Error::Unrecoverable); } - - let supplied_name = config.comparator.name(); - if manifest.comparator_name != supplied_name { - log::error!( - "Comparator mismatch: tree was created with {:?} but opened with {:?}", - manifest.comparator_name, - supplied_name, - ); - return Err(crate::Error::ComparatorMismatch { - stored: manifest.comparator_name, - supplied: supplied_name, - }); - } - - // IMPORTANT: Restore persisted config - config.level_count = manifest.level_count; } let highest_table_id = version From b203516ff9f86ca1efa7f761b19941c3a0e64b9b Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sun, 22 Mar 2026 22:37:52 +0200 Subject: [PATCH 09/19] test(manifest): add invalid UTF-8 comparator name rejection test Cover the Error::Utf8 path when comparator_name section contains non-UTF-8 bytes. --- src/manifest.rs | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/src/manifest.rs b/src/manifest.rs index 526179bf4..faa518b3b 100644 --- a/src/manifest.rs +++ b/src/manifest.rs @@ -208,4 +208,38 @@ mod tests { ); Ok(()) } + + #[test] + fn manifest_rejects_invalid_utf8_comparator_name() -> crate::Result<()> { + let dir = tempfile::tempdir()?; + let path = dir.path().join("manifest"); + + // Write manifest with invalid UTF-8 bytes in comparator_name + let file = std::fs::File::create(&path)?; + let mut writer = sfa::Writer::from_writer(std::io::BufWriter::new(file)); + + writer.start("format_version")?; + writer.write_u8(FormatVersion::V4.into())?; + writer.start("tree_type")?; + writer.write_u8(TreeType::Standard.into())?; + writer.start("level_count")?; + writer.write_u8(7)?; + writer.start("filter_hash_type")?; + writer.write_u8(u8::from(ChecksumType::Xxh3))?; + writer.start("comparator_name")?; + writer.write_all(&[0xFF, 0xFE])?; + + writer.finish().map_err(|e| match e { + sfa::Error::Io(e) => crate::Error::from(e), + _ => unreachable!(), + })?; + + let reader = sfa::Reader::new(&path)?; + let result = Manifest::decode_from(&path, &reader); + assert!( + matches!(result, Err(crate::Error::Utf8(_))), + "expected Utf8 error" + ); + Ok(()) + } } From e1881710b568cfa1bb1a129f221ca50c7161decb Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sun, 22 Mar 2026 23:20:20 +0200 Subject: [PATCH 10/19] test(table): exercise ReverseComparator::name() for coverage The test-only ReverseComparator's name() method was never called, leaving it uncovered in the patch diff. --- src/table/util.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/table/util.rs b/src/table/util.rs index 3caf141e5..600ce0fe8 100644 --- a/src/table/util.rs +++ b/src/table/util.rs @@ -390,6 +390,11 @@ mod tests { fn test_compare_prefixed_slice_custom_comparator() { use std::cmp::Ordering::{Equal, Greater, Less}; + assert_eq!( + crate::comparator::UserComparator::name(&ReverseComparator), + "test-reverse", + ); + // With reverse comparator, "abc" > "xyz" (reversed) assert_eq!( Greater, From df99015d72823a5fdc35ebe381ff2f82493837e5 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sun, 22 Mar 2026 23:23:05 +0200 Subject: [PATCH 11/19] refactor: unify MAX_COMPARATOR_NAME_BYTES, downgrade mismatch log level MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Extract shared MAX_COMPARATOR_NAME_BYTES constant to comparator module, replacing duplicated literals in persist.rs and manifest.rs - Downgrade comparator mismatch log from error to warn — this is a user-correctable misconfiguration, not an internal error --- src/comparator.rs | 5 +++++ src/manifest.rs | 6 +++--- src/tree/mod.rs | 2 +- src/version/persist.rs | 9 +++------ 4 files changed, 12 insertions(+), 10 deletions(-) diff --git a/src/comparator.rs b/src/comparator.rs index fb29d78d7..d4b7efd6a 100644 --- a/src/comparator.rs +++ b/src/comparator.rs @@ -116,6 +116,11 @@ impl UserComparator for DefaultUserComparator { /// Shared reference to a [`UserComparator`]. pub type SharedComparator = Arc; +/// Maximum byte length for a comparator name. +/// +/// Enforced on write (`persist_version`) and read (`Manifest::decode_from`). +pub(crate) const MAX_COMPARATOR_NAME_BYTES: usize = 256; + /// Returns the default comparator (lexicographic byte ordering). /// /// Uses a shared static instance to avoid repeated allocations. diff --git a/src/manifest.rs b/src/manifest.rs index faa518b3b..e950e48fa 100644 --- a/src/manifest.rs +++ b/src/manifest.rs @@ -98,12 +98,12 @@ impl Manifest { // Custom comparators cannot exist in old manifests. let comparator_name = match toc.section(b"comparator_name") { Some(section) => { - const MAX_COMPARATOR_NAME_LEN: u64 = 256; + let limit = crate::comparator::MAX_COMPARATOR_NAME_BYTES as u64; - if section.len() > MAX_COMPARATOR_NAME_LEN { + if section.len() > limit { return Err(crate::Error::DecompressedSizeTooLarge { declared: section.len(), - limit: MAX_COMPARATOR_NAME_LEN, + limit, }); } diff --git a/src/tree/mod.rs b/src/tree/mod.rs index 88c27506f..4c81f845e 100644 --- a/src/tree/mod.rs +++ b/src/tree/mod.rs @@ -1345,7 +1345,7 @@ impl Tree { let supplied_name = config.comparator.name(); if manifest.comparator_name != supplied_name { - log::error!( + log::warn!( "Comparator mismatch: tree was created with {:?} but opened with {:?}", manifest.comparator_name, supplied_name, diff --git a/src/version/persist.rs b/src/version/persist.rs index b3907b434..fb8e52cd1 100644 --- a/src/version/persist.rs +++ b/src/version/persist.rs @@ -6,10 +6,6 @@ use crate::{ use byteorder::{LittleEndian, WriteBytesExt}; use std::{io::BufWriter, path::Path}; -/// Maximum comparator name length, enforced symmetrically on write -/// (here) and read (`Manifest::decode_from`). -const MAX_COMPARATOR_NAME_LEN: usize = 256; - pub fn persist_version( folder: &Path, version: &Version, @@ -18,8 +14,9 @@ pub fn persist_version( // Panic is intentional: `UserComparator::name()` returns `&'static str`, // so an oversized name is a programmer error, not a runtime condition. assert!( - comparator_name.len() <= MAX_COMPARATOR_NAME_LEN, - "comparator name exceeds {MAX_COMPARATOR_NAME_LEN} bytes", + comparator_name.len() <= crate::comparator::MAX_COMPARATOR_NAME_BYTES, + "comparator name exceeds {} bytes", + crate::comparator::MAX_COMPARATOR_NAME_BYTES, ); log::trace!( From e5f75e9fa0c4b475feb9af475674c765f43ac540 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sun, 22 Mar 2026 23:42:55 +0200 Subject: [PATCH 12/19] fix(comparator): use pub for module-private const, fix stale doc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - MAX_COMPARATOR_NAME_BYTES: pub(crate) → pub (clippy::redundant_pub_crate) - Pre-recovery validation scope: remove tree_type from list (checked post-recovery) --- src/comparator.rs | 2 +- src/tree/mod.rs | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/comparator.rs b/src/comparator.rs index d4b7efd6a..a396e3fbe 100644 --- a/src/comparator.rs +++ b/src/comparator.rs @@ -119,7 +119,7 @@ pub type SharedComparator = Arc; /// Maximum byte length for a comparator name. /// /// Enforced on write (`persist_version`) and read (`Manifest::decode_from`). -pub(crate) const MAX_COMPARATOR_NAME_BYTES: usize = 256; +pub const MAX_COMPARATOR_NAME_BYTES: usize = 256; /// Returns the default comparator (lexicographic byte ordering). /// diff --git a/src/tree/mod.rs b/src/tree/mod.rs index 4c81f845e..6ea5fc5d6 100644 --- a/src/tree/mod.rs +++ b/src/tree/mod.rs @@ -1330,9 +1330,10 @@ impl Tree { log::info!("Recovering LSM-tree at {}", config.path.display()); - // Validate manifest metadata (format version, tree type, comparator - // name) BEFORE recover_levels, so a rejected open is side-effect free + // Validate manifest metadata (format version, comparator name) + // BEFORE recover_levels, so a rejected open is side-effect free // — recover_levels loads tables and cleans up orphans. + // Tree type is checked after recovery (needs the Version object). { let version_id = crate::version::recovery::get_current_version(&config.path)?; let manifest_path = config.path.join(format!("v{version_id}")); From ac28340c745f31565fddbd66066d240540305ed1 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Mon, 23 Mar 2026 00:16:51 +0200 Subject: [PATCH 13/19] fix(persist): return Result instead of panicking on oversized name Replace assert! with an Err return for comparator names exceeding MAX_COMPARATOR_NAME_BYTES, giving callers a recoverable error instead of a process crash. --- src/version/persist.rs | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/version/persist.rs b/src/version/persist.rs index fb8e52cd1..7cdd57ab6 100644 --- a/src/version/persist.rs +++ b/src/version/persist.rs @@ -11,13 +11,15 @@ pub fn persist_version( version: &Version, comparator_name: &str, ) -> crate::Result<()> { - // Panic is intentional: `UserComparator::name()` returns `&'static str`, - // so an oversized name is a programmer error, not a runtime condition. - assert!( - comparator_name.len() <= crate::comparator::MAX_COMPARATOR_NAME_BYTES, - "comparator name exceeds {} bytes", - crate::comparator::MAX_COMPARATOR_NAME_BYTES, - ); + if comparator_name.len() > crate::comparator::MAX_COMPARATOR_NAME_BYTES { + return Err(crate::Error::from(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + format!( + "comparator name exceeds {} bytes", + crate::comparator::MAX_COMPARATOR_NAME_BYTES, + ), + ))); + } log::trace!( "Persisting version {} in {}", From 25ababa993f0c95952c004f8dfc0b54268b99859 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Mon, 23 Mar 2026 00:17:52 +0200 Subject: [PATCH 14/19] style(table): use method syntax for ReverseComparator::name() call --- src/table/util.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/table/util.rs b/src/table/util.rs index 600ce0fe8..35a79035e 100644 --- a/src/table/util.rs +++ b/src/table/util.rs @@ -390,10 +390,8 @@ mod tests { fn test_compare_prefixed_slice_custom_comparator() { use std::cmp::Ordering::{Equal, Greater, Less}; - assert_eq!( - crate::comparator::UserComparator::name(&ReverseComparator), - "test-reverse", - ); + use crate::comparator::UserComparator as _; + assert_eq!(ReverseComparator.name(), "test-reverse"); // With reverse comparator, "abc" > "xyz" (reversed) assert_eq!( From 6774175a0ff706f50b33689c2af9badd05ef977a Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Mon, 23 Mar 2026 00:46:19 +0200 Subject: [PATCH 15/19] test(comparator): add oversized name rejection test Exercise the persist_version error path for comparator names exceeding MAX_COMPARATOR_NAME_BYTES (256 bytes). --- tests/custom_comparator.rs | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tests/custom_comparator.rs b/tests/custom_comparator.rs index ef49af125..7bd332f49 100644 --- a/tests/custom_comparator.rs +++ b/tests/custom_comparator.rs @@ -391,3 +391,32 @@ fn reopen_default_tree_with_default_comparator_succeeds() -> lsm_tree::Result<() Ok(()) } + +/// Comparator with a name exceeding the 256-byte limit. +struct OversizedNameComparator; + +impl UserComparator for OversizedNameComparator { + fn name(&self) -> &'static str { + // 300 chars — exceeds MAX_COMPARATOR_NAME_BYTES (256) + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + } + + fn compare(&self, a: &[u8], b: &[u8]) -> Ordering { + a.cmp(b) + } +} + +#[test] +fn oversized_comparator_name_rejected_on_create() { + let folder = tempfile::tempdir().unwrap(); + let cmp: SharedComparator = Arc::new(OversizedNameComparator); + + let result = Config::new(&folder, Default::default(), Default::default()) + .comparator(cmp) + .open(); + + assert!( + matches!(result, Err(lsm_tree::Error::Io(_))), + "expected Io error for oversized comparator name" + ); +} From bbeeed53ddcb44aab18a3cc93a7cce248a85a59a Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Mon, 23 Mar 2026 00:47:22 +0200 Subject: [PATCH 16/19] fix(persist): include actual name length in oversized error message --- src/tree/mod.rs | 3 +++ src/version/persist.rs | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/tree/mod.rs b/src/tree/mod.rs index 6ea5fc5d6..f9c0b79db 100644 --- a/src/tree/mod.rs +++ b/src/tree/mod.rs @@ -1334,6 +1334,9 @@ impl Tree { // BEFORE recover_levels, so a rejected open is side-effect free // — recover_levels loads tables and cleans up orphans. // Tree type is checked after recovery (needs the Version object). + // NOTE: the version file is read twice (here for metadata, then inside + // recover_levels for table/blob data). This is intentional — metadata + // validation must complete before any disk-mutating recovery work. { let version_id = crate::version::recovery::get_current_version(&config.path)?; let manifest_path = config.path.join(format!("v{version_id}")); diff --git a/src/version/persist.rs b/src/version/persist.rs index 7cdd57ab6..a8c33f5bf 100644 --- a/src/version/persist.rs +++ b/src/version/persist.rs @@ -15,7 +15,8 @@ pub fn persist_version( return Err(crate::Error::from(std::io::Error::new( std::io::ErrorKind::InvalidInput, format!( - "comparator name exceeds {} bytes", + "comparator name is {} bytes (max {})", + comparator_name.len(), crate::comparator::MAX_COMPARATOR_NAME_BYTES, ), ))); From c4e47807ac17c2cc23865ab35c437e06f84f50ca Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Mon, 23 Mar 2026 02:31:08 +0200 Subject: [PATCH 17/19] test(comparator): assert InvalidInput error kind for oversized name --- tests/custom_comparator.rs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tests/custom_comparator.rs b/tests/custom_comparator.rs index 7bd332f49..9e2404539 100644 --- a/tests/custom_comparator.rs +++ b/tests/custom_comparator.rs @@ -415,8 +415,11 @@ fn oversized_comparator_name_rejected_on_create() { .comparator(cmp) .open(); - assert!( - matches!(result, Err(lsm_tree::Error::Io(_))), - "expected Io error for oversized comparator name" - ); + match result { + Err(lsm_tree::Error::Io(e)) => { + assert_eq!(e.kind(), std::io::ErrorKind::InvalidInput); + } + Ok(_) => panic!("expected InvalidInput error for oversized comparator name"), + Err(e) => panic!("expected InvalidInput Io error, got {e:?}"), + } } From 27e5c0ef9987e8c9f99258be4dead064d37b48b1 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Mon, 23 Mar 2026 02:34:04 +0200 Subject: [PATCH 18/19] refactor(manifest): use read_to_end for comparator_name section --- src/manifest.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/manifest.rs b/src/manifest.rs index e950e48fa..64f4f78cb 100644 --- a/src/manifest.rs +++ b/src/manifest.rs @@ -107,10 +107,8 @@ impl Manifest { }); } - let bytes: Vec = section - .buf_reader(path)? - .bytes() - .collect::, _>>()?; + let mut bytes = Vec::new(); + section.buf_reader(path)?.read_to_end(&mut bytes)?; String::from_utf8(bytes).map_err(|e| crate::Error::Utf8(e.utf8_error()))? } From 5efe6099031d58b3598d1eb6a466f89ab5d35688 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Mon, 23 Mar 2026 03:08:18 +0200 Subject: [PATCH 19/19] refactor(manifest): simplify sfa::Writer::finish in test helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use ? directly — From for crate::Error is implemented, no need for manual map_err with unreachable branch. --- src/manifest.rs | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/src/manifest.rs b/src/manifest.rs index 64f4f78cb..e81d55e25 100644 --- a/src/manifest.rs +++ b/src/manifest.rs @@ -157,10 +157,7 @@ mod tests { writer.write_all(name.as_bytes())?; } - writer.finish().map_err(|e| match e { - sfa::Error::Io(e) => crate::Error::from(e), - _ => unreachable!(), - })?; + writer.finish()?; Ok(()) } @@ -227,10 +224,7 @@ mod tests { writer.start("comparator_name")?; writer.write_all(&[0xFF, 0xFE])?; - writer.finish().map_err(|e| match e { - sfa::Error::Io(e) => crate::Error::from(e), - _ => unreachable!(), - })?; + writer.finish()?; let reader = sfa::Reader::new(&path)?; let result = Manifest::decode_from(&path, &reader);