diff --git a/src/comparator.rs b/src/comparator.rs index b4b5d023d..a396e3fbe 100644 --- a/src/comparator.rs +++ b/src/comparator.rs @@ -29,8 +29,9 @@ use std::sync::Arc; /// # Important /// /// Once a tree is created with a comparator, it must always be opened with the -/// same comparator. Using a different comparator on an existing tree will produce -/// incorrect results. +/// same comparator. The comparator's [`name`](UserComparator::name) is +/// persisted and checked on every subsequent open — a mismatch causes the open +/// to fail with [`Error::ComparatorMismatch`](crate::Error::ComparatorMismatch). /// /// # Examples /// @@ -42,6 +43,10 @@ use std::sync::Arc; /// struct U64Comparator; /// /// impl UserComparator for U64Comparator { +/// fn name(&self) -> &'static str { +/// "u64-big-endian" +/// } +/// /// fn compare(&self, a: &[u8], b: &[u8]) -> Ordering { /// if a.len() == 8 && b.len() == 8 { /// // Length checked, conversion cannot fail. @@ -57,6 +62,21 @@ use std::sync::Arc; /// } /// ``` pub trait UserComparator: Send + Sync + std::panic::RefUnwindSafe + 'static { + /// Returns a stable identifier for this comparator. + /// + /// The name is persisted when a tree is first created. On subsequent + /// opens the stored name is compared against the caller-supplied + /// comparator's name — a mismatch causes the open to fail, preventing + /// silent data corruption from using an incompatible ordering. + /// + /// Choose a name that uniquely identifies the ordering logic and will + /// not change across releases (e.g. `"u64-big-endian"`, `"reverse-lexicographic"`). + // + // Intentionally required (no default impl): a shared fallback name would + // let two distinct comparators pass the mismatch check, silently producing + // corrupt reads on reopen — the exact scenario this method prevents. + fn name(&self) -> &'static str; + /// Compares two user keys, returning their ordering. fn compare(&self, a: &[u8], b: &[u8]) -> std::cmp::Ordering; @@ -78,6 +98,10 @@ pub trait UserComparator: Send + Sync + std::panic::RefUnwindSafe + 'static { pub struct DefaultUserComparator; impl UserComparator for DefaultUserComparator { + fn name(&self) -> &'static str { + "default" + } + #[inline] fn compare(&self, a: &[u8], b: &[u8]) -> std::cmp::Ordering { a.cmp(b) @@ -92,6 +116,11 @@ impl UserComparator for DefaultUserComparator { /// Shared reference to a [`UserComparator`]. pub type SharedComparator = Arc; +/// Maximum byte length for a comparator name. +/// +/// Enforced on write (`persist_version`) and read (`Manifest::decode_from`). +pub const MAX_COMPARATOR_NAME_BYTES: usize = 256; + /// Returns the default comparator (lexicographic byte ordering). /// /// Uses a shared static instance to avoid repeated allocations. @@ -102,3 +131,19 @@ pub fn default_comparator() -> SharedComparator { std::sync::LazyLock::new(|| Arc::new(DefaultUserComparator)); DEFAULT.clone() } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn default_comparator_name() { + assert_eq!(DefaultUserComparator.name(), "default"); + assert_eq!(default_comparator().name(), "default"); + } + + #[test] + fn default_comparator_is_lexicographic() { + assert!(DefaultUserComparator.is_lexicographic()); + } +} diff --git a/src/config/mod.rs b/src/config/mod.rs index 58e91b2fc..abb69745a 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -605,13 +605,10 @@ impl Config { /// /// # Important /// - /// Once a tree is created with a custom comparator, it **must** be - /// re-opened with the same comparator. Using a different comparator - /// on an existing tree produces incorrect results. - /// - /// The comparator identity is **not** persisted to disk — the caller - /// is responsible for ensuring the same comparator is used across - /// open/close cycles (same approach as `RocksDB`). + /// The comparator's [`crate::UserComparator::name`] is persisted when a tree is + /// first created. On subsequent opens the stored name is compared against + /// the supplied comparator's name — a mismatch causes the open to fail + /// with [`Error::ComparatorMismatch`](crate::Error::ComparatorMismatch). #[must_use] pub fn comparator(mut self, comparator: SharedComparator) -> Self { self.comparator = comparator; diff --git a/src/error.rs b/src/error.rs index f34c5036d..6ed815aa9 100644 --- a/src/error.rs +++ b/src/error.rs @@ -76,6 +76,18 @@ pub enum Error { /// Decryption failed Decrypt(&'static str), + /// Comparator mismatch on tree reopen. + /// + /// The tree was created with a comparator whose [`crate::UserComparator::name`] + /// differs from the one supplied at reopen time. + ComparatorMismatch { + /// Comparator name persisted in the tree metadata. + stored: String, + + /// Comparator name supplied by the caller. + supplied: &'static str, + }, + /// Range tombstone block decode failure. RangeTombstoneDecode { /// Which field or validation failed (e.g. `start_len`, `start`, `seqno`, `interval`) diff --git a/src/manifest.rs b/src/manifest.rs index f5a9886b8..e81d55e25 100644 --- a/src/manifest.rs +++ b/src/manifest.rs @@ -14,6 +14,7 @@ pub struct Manifest { )] pub tree_type: TreeType, pub level_count: u8, + pub comparator_name: String, } impl Manifest { @@ -90,10 +91,147 @@ impl Manifest { ); } + // Optional section — absent in manifests written before comparator + // identity persistence was added. The `UserComparator` trait was + // introduced in the same release cycle, so all pre-existing trees + // used `DefaultUserComparator` whose `name()` returns "default". + // Custom comparators cannot exist in old manifests. + let comparator_name = match toc.section(b"comparator_name") { + Some(section) => { + let limit = crate::comparator::MAX_COMPARATOR_NAME_BYTES as u64; + + if section.len() > limit { + return Err(crate::Error::DecompressedSizeTooLarge { + declared: section.len(), + limit, + }); + } + + let mut bytes = Vec::new(); + section.buf_reader(path)?.read_to_end(&mut bytes)?; + + String::from_utf8(bytes).map_err(|e| crate::Error::Utf8(e.utf8_error()))? + } + None => "default".to_owned(), + }; + Ok(Self { version, tree_type, level_count, + comparator_name, }) } } + +#[cfg(test)] +mod tests { + use super::*; + use byteorder::WriteBytesExt; + use std::io::Write; + + /// Write the mandatory manifest sections (format_version, tree_type, + /// level_count, filter_hash_type) into an sfa archive at `path`. + /// If `comparator_name` is `Some`, also writes that section. + fn write_test_manifest( + path: &std::path::Path, + comparator_name: Option<&str>, + ) -> crate::Result<()> { + let file = std::fs::File::create(path)?; + let mut writer = sfa::Writer::from_writer(std::io::BufWriter::new(file)); + + writer.start("format_version")?; + writer.write_u8(FormatVersion::V4.into())?; + + writer.start("tree_type")?; + writer.write_u8(TreeType::Standard.into())?; + + writer.start("level_count")?; + writer.write_u8(7)?; + + writer.start("filter_hash_type")?; + writer.write_u8(u8::from(ChecksumType::Xxh3))?; + + if let Some(name) = comparator_name { + writer.start("comparator_name")?; + writer.write_all(name.as_bytes())?; + } + + writer.finish()?; + Ok(()) + } + + #[test] + fn manifest_without_comparator_name_defaults_to_default() -> crate::Result<()> { + let dir = tempfile::tempdir()?; + let path = dir.path().join("manifest"); + + write_test_manifest(&path, None)?; + + let reader = sfa::Reader::new(&path)?; + let manifest = Manifest::decode_from(&path, &reader)?; + assert_eq!(manifest.comparator_name, "default"); + Ok(()) + } + + #[test] + fn manifest_with_comparator_name_round_trips() -> crate::Result<()> { + let dir = tempfile::tempdir()?; + let path = dir.path().join("manifest"); + + write_test_manifest(&path, Some("u64-big-endian"))?; + + let reader = sfa::Reader::new(&path)?; + let manifest = Manifest::decode_from(&path, &reader)?; + assert_eq!(manifest.comparator_name, "u64-big-endian"); + Ok(()) + } + + #[test] + fn manifest_rejects_oversized_comparator_name() -> crate::Result<()> { + let dir = tempfile::tempdir()?; + let path = dir.path().join("manifest"); + + let long_name = "x".repeat(300); + write_test_manifest(&path, Some(&long_name))?; + + let reader = sfa::Reader::new(&path)?; + let result = Manifest::decode_from(&path, &reader); + assert!( + matches!(result, Err(crate::Error::DecompressedSizeTooLarge { .. })), + "expected DecompressedSizeTooLarge" + ); + Ok(()) + } + + #[test] + fn manifest_rejects_invalid_utf8_comparator_name() -> crate::Result<()> { + let dir = tempfile::tempdir()?; + let path = dir.path().join("manifest"); + + // Write manifest with invalid UTF-8 bytes in comparator_name + let file = std::fs::File::create(&path)?; + let mut writer = sfa::Writer::from_writer(std::io::BufWriter::new(file)); + + writer.start("format_version")?; + writer.write_u8(FormatVersion::V4.into())?; + writer.start("tree_type")?; + writer.write_u8(TreeType::Standard.into())?; + writer.start("level_count")?; + writer.write_u8(7)?; + writer.start("filter_hash_type")?; + writer.write_u8(u8::from(ChecksumType::Xxh3))?; + writer.start("comparator_name")?; + writer.write_all(&[0xFF, 0xFE])?; + + writer.finish()?; + + let reader = sfa::Reader::new(&path)?; + let result = Manifest::decode_from(&path, &reader); + assert!( + matches!(result, Err(crate::Error::Utf8(_))), + "expected Utf8 error" + ); + Ok(()) + } +} diff --git a/src/table/util.rs b/src/table/util.rs index bfe21839c..35a79035e 100644 --- a/src/table/util.rs +++ b/src/table/util.rs @@ -377,6 +377,10 @@ mod tests { /// Reverse comparator to exercise the Vec-allocation slow path. struct ReverseComparator; impl crate::comparator::UserComparator for ReverseComparator { + fn name(&self) -> &'static str { + "test-reverse" + } + fn compare(&self, a: &[u8], b: &[u8]) -> std::cmp::Ordering { b.cmp(a) } @@ -386,6 +390,9 @@ mod tests { fn test_compare_prefixed_slice_custom_comparator() { use std::cmp::Ordering::{Equal, Greater, Less}; + use crate::comparator::UserComparator as _; + assert_eq!(ReverseComparator.name(), "test-reverse"); + // With reverse comparator, "abc" > "xyz" (reversed) assert_eq!( Greater, diff --git a/src/tree/inner.rs b/src/tree/inner.rs index 8bee97659..45b6a69f6 100644 --- a/src/tree/inner.rs +++ b/src/tree/inner.rs @@ -80,7 +80,7 @@ impl TreeInner { crate::TreeType::Standard }, ); - persist_version(&config.path, &version)?; + persist_version(&config.path, &version, config.comparator.name())?; let comparator = config.comparator.clone(); diff --git a/src/tree/mod.rs b/src/tree/mod.rs index 579cfb52d..f9c0b79db 100644 --- a/src/tree/mod.rs +++ b/src/tree/mod.rs @@ -1330,6 +1330,40 @@ impl Tree { log::info!("Recovering LSM-tree at {}", config.path.display()); + // Validate manifest metadata (format version, comparator name) + // BEFORE recover_levels, so a rejected open is side-effect free + // — recover_levels loads tables and cleans up orphans. + // Tree type is checked after recovery (needs the Version object). + // NOTE: the version file is read twice (here for metadata, then inside + // recover_levels for table/blob data). This is intentional — metadata + // validation must complete before any disk-mutating recovery work. + { + let version_id = crate::version::recovery::get_current_version(&config.path)?; + let manifest_path = config.path.join(format!("v{version_id}")); + let reader = sfa::Reader::new(&manifest_path)?; + let manifest = Manifest::decode_from(&manifest_path, &reader)?; + + if !matches!(manifest.version, FormatVersion::V3 | FormatVersion::V4) { + return Err(crate::Error::InvalidVersion(manifest.version.into())); + } + + let supplied_name = config.comparator.name(); + if manifest.comparator_name != supplied_name { + log::warn!( + "Comparator mismatch: tree was created with {:?} but opened with {:?}", + manifest.comparator_name, + supplied_name, + ); + return Err(crate::Error::ComparatorMismatch { + stored: manifest.comparator_name, + supplied: supplied_name, + }); + } + + // IMPORTANT: Restore persisted config + config.level_count = manifest.level_count; + } + let tree_id = get_next_tree_id(); #[cfg(feature = "metrics")] @@ -1344,14 +1378,6 @@ impl Tree { )?; { - let manifest_path = config.path.join(format!("v{}", version.id())); - let reader = sfa::Reader::new(&manifest_path)?; - let manifest = Manifest::decode_from(&manifest_path, &reader)?; - - if !matches!(manifest.version, FormatVersion::V3 | FormatVersion::V4) { - return Err(crate::Error::InvalidVersion(manifest.version.into())); - } - let requested_tree_type = match config.kv_separation_opts { Some(_) => crate::TreeType::Blob, None => crate::TreeType::Standard, @@ -1364,9 +1390,6 @@ impl Tree { ); return Err(crate::Error::Unrecoverable); } - - // IMPORTANT: Restore persisted config - config.level_count = manifest.level_count; } let highest_table_id = version diff --git a/src/version/mod.rs b/src/version/mod.rs index 4abb70db2..0bcc2442f 100644 --- a/src/version/mod.rs +++ b/src/version/mod.rs @@ -631,6 +631,7 @@ impl Version { pub(crate) fn encode_into( &self, writer: &mut sfa::Writer, + comparator_name: &str, ) -> Result<(), crate::Error> { use crate::FormatVersion; use byteorder::{LittleEndian, WriteBytesExt}; @@ -659,6 +660,9 @@ impl Version { writer.start("filter_hash_type")?; writer.write_u8(u8::from(ChecksumType::Xxh3))?; + writer.start("comparator_name")?; + writer.write_all(comparator_name.as_bytes())?; + // // Levels // diff --git a/src/version/persist.rs b/src/version/persist.rs index ad42fcf6c..a8c33f5bf 100644 --- a/src/version/persist.rs +++ b/src/version/persist.rs @@ -6,7 +6,22 @@ use crate::{ use byteorder::{LittleEndian, WriteBytesExt}; use std::{io::BufWriter, path::Path}; -pub fn persist_version(folder: &Path, version: &Version) -> crate::Result<()> { +pub fn persist_version( + folder: &Path, + version: &Version, + comparator_name: &str, +) -> crate::Result<()> { + if comparator_name.len() > crate::comparator::MAX_COMPARATOR_NAME_BYTES { + return Err(crate::Error::from(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + format!( + "comparator name is {} bytes (max {})", + comparator_name.len(), + crate::comparator::MAX_COMPARATOR_NAME_BYTES, + ), + ))); + } + log::trace!( "Persisting version {} in {}", version.id(), @@ -21,7 +36,7 @@ pub fn persist_version(folder: &Path, version: &Version) -> crate::Result<()> { { let mut writer = sfa::Writer::from_writer(&mut writer); - version.encode_into(&mut writer)?; + version.encode_into(&mut writer, comparator_name)?; writer.finish().map_err(|e| match e { sfa::Error::Io(e) => crate::Error::from(e), diff --git a/src/version/super_version.rs b/src/version/super_version.rs index b0c11350c..5d945a87d 100644 --- a/src/version/super_version.rs +++ b/src/version/super_version.rs @@ -27,25 +27,33 @@ pub struct SuperVersion { pub(crate) seqno: SeqNo, } -pub struct SuperVersions(VecDeque); +pub struct SuperVersions { + versions: VecDeque, + + /// Stable comparator identity persisted in every version file. + comparator_name: Arc, +} impl SuperVersions { pub fn new(version: Version, comparator: SharedComparator) -> Self { - Self( - vec![SuperVersion { + let comparator_name: Arc = comparator.name().into(); + + Self { + versions: vec![SuperVersion { active_memtable: Arc::new(Memtable::new(0, comparator)), sealed_memtables: Arc::default(), version, seqno: 0, }] .into(), - ) + comparator_name, + } } pub fn memtable_size_sum(&self) -> u64 { let mut set = crate::HashMap::default(); - for super_version in &self.0 { + for super_version in &self.versions { set.entry(super_version.active_memtable.id) .and_modify(|bytes| *bytes += super_version.active_memtable.size()) .or_insert_with(|| super_version.active_memtable.size()); @@ -61,7 +69,7 @@ impl SuperVersions { } pub fn len(&self) -> usize { - self.0.len() + self.versions.len() } pub fn free_list_len(&self) -> usize { @@ -79,9 +87,9 @@ impl SuperVersions { log::trace!("Running manifest GC with watermark={gc_watermark}"); - if let Some(hi_idx) = self.0.iter().rposition(|x| x.seqno < gc_watermark) { + if let Some(hi_idx) = self.versions.iter().rposition(|x| x.seqno < gc_watermark) { for _ in 0..hi_idx { - let Some(head) = self.0.front() else { + let Some(head) = self.versions.front() else { break; }; @@ -96,11 +104,14 @@ impl SuperVersions { std::fs::remove_file(path)?; } - self.0.pop_front(); + self.versions.pop_front(); } } - log::trace!("Manifest GC done, version length now {}", self.0.len()); + log::trace!( + "Manifest GC done, version length now {}", + self.versions.len() + ); Ok(()) } @@ -140,7 +151,7 @@ impl SuperVersions { next_version.seqno = seqno; log::trace!("Next version seqno={}", next_version.seqno); - persist_version(tree_path, &next_version.version)?; + persist_version(tree_path, &next_version.version, &self.comparator_name)?; self.append_version(next_version); // Clamp to stay below the reserved MSB range. @@ -151,18 +162,18 @@ impl SuperVersions { } pub fn append_version(&mut self, version: SuperVersion) { - self.0.push_back(version); + self.versions.push_back(version); } pub fn replace_latest_version(&mut self, version: SuperVersion) { - if self.0.pop_back().is_some() { - self.0.push_back(version); + if self.versions.pop_back().is_some() { + self.versions.push_back(version); } } pub fn latest_version(&self) -> SuperVersion { #[expect(clippy::expect_used, reason = "SuperVersion is expected to exist")] - self.0 + self.versions .iter() .last() .cloned() @@ -173,14 +184,14 @@ impl SuperVersions { if seqno == 0 { #[expect(clippy::expect_used, reason = "SuperVersion is expected to exist")] return self - .0 + .versions .front() .cloned() .expect("should always find a SuperVersion"); } let version = self - .0 + .versions .iter() .rev() .find(|version| version.seqno < seqno) @@ -190,7 +201,7 @@ impl SuperVersions { log::error!("Failed to find a SuperVersion for snapshot with seqno={seqno}"); log::error!("SuperVersions:"); - for version in self.0.iter().rev() { + for version in self.versions.iter().rev() { log::error!("-> {}, seqno={}", version.version.id(), version.seqno); } } @@ -210,31 +221,35 @@ mod tests { Memtable::new(id, default_comparator()) } + fn test_super_versions(versions: Vec) -> SuperVersions { + SuperVersions { + versions: versions.into(), + comparator_name: "default".into(), + } + } + #[test] fn super_version_gc_above_watermark() -> crate::Result<()> { - let mut history = SuperVersions( - vec![ - SuperVersion { - active_memtable: Arc::new(new_memtable(0)), - sealed_memtables: Arc::default(), - version: Version::new(0, crate::TreeType::Standard), - seqno: 0, - }, - SuperVersion { - active_memtable: Arc::new(new_memtable(0)), - sealed_memtables: Arc::default(), - version: Version::new(0, crate::TreeType::Standard), - seqno: 1, - }, - SuperVersion { - active_memtable: Arc::new(new_memtable(0)), - sealed_memtables: Arc::default(), - version: Version::new(0, crate::TreeType::Standard), - seqno: 2, - }, - ] - .into(), - ); + let mut history = test_super_versions(vec![ + SuperVersion { + active_memtable: Arc::new(new_memtable(0)), + sealed_memtables: Arc::default(), + version: Version::new(0, crate::TreeType::Standard), + seqno: 0, + }, + SuperVersion { + active_memtable: Arc::new(new_memtable(0)), + sealed_memtables: Arc::default(), + version: Version::new(0, crate::TreeType::Standard), + seqno: 1, + }, + SuperVersion { + active_memtable: Arc::new(new_memtable(0)), + sealed_memtables: Arc::default(), + version: Version::new(0, crate::TreeType::Standard), + seqno: 2, + }, + ]); history.maintenance(Path::new("."), 0)?; @@ -245,29 +260,26 @@ mod tests { #[test] fn super_version_gc_below_watermark_simple() -> crate::Result<()> { - let mut history = SuperVersions( - vec![ - SuperVersion { - active_memtable: Arc::new(new_memtable(0)), - sealed_memtables: Arc::default(), - version: Version::new(0, crate::TreeType::Standard), - seqno: 0, - }, - SuperVersion { - active_memtable: Arc::new(new_memtable(0)), - sealed_memtables: Arc::default(), - version: Version::new(0, crate::TreeType::Standard), - seqno: 1, - }, - SuperVersion { - active_memtable: Arc::new(new_memtable(0)), - sealed_memtables: Arc::default(), - version: Version::new(0, crate::TreeType::Standard), - seqno: 2, - }, - ] - .into(), - ); + let mut history = test_super_versions(vec![ + SuperVersion { + active_memtable: Arc::new(new_memtable(0)), + sealed_memtables: Arc::default(), + version: Version::new(0, crate::TreeType::Standard), + seqno: 0, + }, + SuperVersion { + active_memtable: Arc::new(new_memtable(0)), + sealed_memtables: Arc::default(), + version: Version::new(0, crate::TreeType::Standard), + seqno: 1, + }, + SuperVersion { + active_memtable: Arc::new(new_memtable(0)), + sealed_memtables: Arc::default(), + version: Version::new(0, crate::TreeType::Standard), + seqno: 2, + }, + ]); history.maintenance(Path::new("."), 3)?; @@ -278,35 +290,32 @@ mod tests { #[test] fn super_version_gc_below_watermark_simple_2() -> crate::Result<()> { - let mut history = SuperVersions( - vec![ - SuperVersion { - active_memtable: Arc::new(new_memtable(0)), - sealed_memtables: Arc::default(), - version: Version::new(0, crate::TreeType::Standard), - seqno: 0, - }, - SuperVersion { - active_memtable: Arc::new(new_memtable(0)), - sealed_memtables: Arc::default(), - version: Version::new(0, crate::TreeType::Standard), - seqno: 1, - }, - SuperVersion { - active_memtable: Arc::new(new_memtable(0)), - sealed_memtables: Arc::default(), - version: Version::new(0, crate::TreeType::Standard), - seqno: 2, - }, - SuperVersion { - active_memtable: Arc::new(new_memtable(0)), - sealed_memtables: Arc::default(), - version: Version::new(0, crate::TreeType::Standard), - seqno: 8, - }, - ] - .into(), - ); + let mut history = test_super_versions(vec![ + SuperVersion { + active_memtable: Arc::new(new_memtable(0)), + sealed_memtables: Arc::default(), + version: Version::new(0, crate::TreeType::Standard), + seqno: 0, + }, + SuperVersion { + active_memtable: Arc::new(new_memtable(0)), + sealed_memtables: Arc::default(), + version: Version::new(0, crate::TreeType::Standard), + seqno: 1, + }, + SuperVersion { + active_memtable: Arc::new(new_memtable(0)), + sealed_memtables: Arc::default(), + version: Version::new(0, crate::TreeType::Standard), + seqno: 2, + }, + SuperVersion { + active_memtable: Arc::new(new_memtable(0)), + sealed_memtables: Arc::default(), + version: Version::new(0, crate::TreeType::Standard), + seqno: 8, + }, + ]); history.maintenance(Path::new("."), 3)?; @@ -317,23 +326,20 @@ mod tests { #[test] fn super_version_gc_below_watermark_keep() -> crate::Result<()> { - let mut history = SuperVersions( - vec![ - SuperVersion { - active_memtable: Arc::new(new_memtable(0)), - sealed_memtables: Arc::default(), - version: Version::new(0, crate::TreeType::Standard), - seqno: 0, - }, - SuperVersion { - active_memtable: Arc::new(new_memtable(0)), - sealed_memtables: Arc::default(), - version: Version::new(0, crate::TreeType::Standard), - seqno: 8, - }, - ] - .into(), - ); + let mut history = test_super_versions(vec![ + SuperVersion { + active_memtable: Arc::new(new_memtable(0)), + sealed_memtables: Arc::default(), + version: Version::new(0, crate::TreeType::Standard), + seqno: 0, + }, + SuperVersion { + active_memtable: Arc::new(new_memtable(0)), + sealed_memtables: Arc::default(), + version: Version::new(0, crate::TreeType::Standard), + seqno: 8, + }, + ]); history.maintenance(Path::new("."), 3)?; @@ -344,23 +350,20 @@ mod tests { #[test] fn super_version_gc_below_watermark_shadowed() -> crate::Result<()> { - let mut history = SuperVersions( - vec![ - SuperVersion { - active_memtable: Arc::new(new_memtable(0)), - sealed_memtables: Arc::default(), - version: Version::new(0, crate::TreeType::Standard), - seqno: 0, - }, - SuperVersion { - active_memtable: Arc::new(new_memtable(0)), - sealed_memtables: Arc::default(), - version: Version::new(0, crate::TreeType::Standard), - seqno: 2, - }, - ] - .into(), - ); + let mut history = test_super_versions(vec![ + SuperVersion { + active_memtable: Arc::new(new_memtable(0)), + sealed_memtables: Arc::default(), + version: Version::new(0, crate::TreeType::Standard), + seqno: 0, + }, + SuperVersion { + active_memtable: Arc::new(new_memtable(0)), + sealed_memtables: Arc::default(), + version: Version::new(0, crate::TreeType::Standard), + seqno: 2, + }, + ]); history.maintenance(Path::new("."), 3)?; diff --git a/tests/custom_comparator.rs b/tests/custom_comparator.rs index 71ce102ac..9e2404539 100644 --- a/tests/custom_comparator.rs +++ b/tests/custom_comparator.rs @@ -6,6 +6,10 @@ use std::sync::Arc; struct ReverseComparator; impl UserComparator for ReverseComparator { + fn name(&self) -> &'static str { + "reverse-lexicographic" + } + fn compare(&self, a: &[u8], b: &[u8]) -> Ordering { b.cmp(a) // reversed } @@ -15,6 +19,10 @@ impl UserComparator for ReverseComparator { struct U64BigEndianComparator; impl UserComparator for U64BigEndianComparator { + fn name(&self) -> &'static str { + "u64-big-endian" + } + fn compare(&self, a: &[u8], b: &[u8]) -> Ordering { if a.len() == 8 && b.len() == 8 { let a_u64 = u64::from_be_bytes(a.try_into().unwrap()); @@ -279,3 +287,139 @@ fn u64_comparator_bounded_range_scan() -> lsm_tree::Result<()> { Ok(()) } + +#[test] +fn reopen_with_same_comparator_succeeds() -> lsm_tree::Result<()> { + let folder = tempfile::tempdir()?; + + // Create tree with reverse comparator + { + let cmp: SharedComparator = Arc::new(ReverseComparator); + let tree = Config::new(&folder, Default::default(), Default::default()) + .comparator(cmp) + .open()?; + tree.insert("a", "1", 0); + tree.flush_active_memtable(1)?; + } + + // Reopen with the same comparator — must succeed + let cmp: SharedComparator = Arc::new(ReverseComparator); + let tree = Config::new(&folder, Default::default(), Default::default()) + .comparator(cmp) + .open()?; + + assert_eq!(tree.get("a", 2)?, Some("1".as_bytes().into())); + + Ok(()) +} + +#[test] +fn reopen_with_different_comparator_fails() -> lsm_tree::Result<()> { + let folder = tempfile::tempdir()?; + + // Create tree with reverse comparator + { + let cmp: SharedComparator = Arc::new(ReverseComparator); + let tree = Config::new(&folder, Default::default(), Default::default()) + .comparator(cmp) + .open()?; + tree.insert("a", "1", 0); + tree.flush_active_memtable(1)?; + } + + // Reopen with u64 comparator — must fail + let cmp: SharedComparator = Arc::new(U64BigEndianComparator); + let result = Config::new(&folder, Default::default(), Default::default()) + .comparator(cmp) + .open(); + + match result { + Err(lsm_tree::Error::ComparatorMismatch { stored, supplied }) => { + assert_eq!(stored, "reverse-lexicographic"); + assert_eq!(supplied, "u64-big-endian"); + } + Ok(_) => panic!("expected ComparatorMismatch, got Ok"), + Err(e) => panic!("expected ComparatorMismatch, got {e:?}"), + } + + Ok(()) +} + +#[test] +fn reopen_custom_with_default_comparator_fails() -> lsm_tree::Result<()> { + let folder = tempfile::tempdir()?; + + // Create tree with reverse comparator + { + let cmp: SharedComparator = Arc::new(ReverseComparator); + let tree = Config::new(&folder, Default::default(), Default::default()) + .comparator(cmp) + .open()?; + tree.insert("a", "1", 0); + tree.flush_active_memtable(1)?; + } + + // Reopen without specifying a comparator (uses default) — must fail + let result = Config::new(&folder, Default::default(), Default::default()).open(); + + match result { + Err(lsm_tree::Error::ComparatorMismatch { stored, supplied }) => { + assert_eq!(stored, "reverse-lexicographic"); + assert_eq!(supplied, "default"); + } + Ok(_) => panic!("expected ComparatorMismatch, got Ok"), + Err(e) => panic!("expected ComparatorMismatch, got {e:?}"), + } + + Ok(()) +} + +#[test] +fn reopen_default_tree_with_default_comparator_succeeds() -> lsm_tree::Result<()> { + let folder = tempfile::tempdir()?; + + // Create tree with default comparator + { + let tree = Config::new(&folder, Default::default(), Default::default()).open()?; + tree.insert("a", "1", 0); + tree.flush_active_memtable(1)?; + } + + // Reopen with default comparator — must succeed + let tree = Config::new(&folder, Default::default(), Default::default()).open()?; + assert_eq!(tree.get("a", 2)?, Some("1".as_bytes().into())); + + Ok(()) +} + +/// Comparator with a name exceeding the 256-byte limit. +struct OversizedNameComparator; + +impl UserComparator for OversizedNameComparator { + fn name(&self) -> &'static str { + // 300 chars — exceeds MAX_COMPARATOR_NAME_BYTES (256) + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + } + + fn compare(&self, a: &[u8], b: &[u8]) -> Ordering { + a.cmp(b) + } +} + +#[test] +fn oversized_comparator_name_rejected_on_create() { + let folder = tempfile::tempdir().unwrap(); + let cmp: SharedComparator = Arc::new(OversizedNameComparator); + + let result = Config::new(&folder, Default::default(), Default::default()) + .comparator(cmp) + .open(); + + match result { + Err(lsm_tree::Error::Io(e)) => { + assert_eq!(e.kind(), std::io::ErrorKind::InvalidInput); + } + Ok(_) => panic!("expected InvalidInput error for oversized comparator name"), + Err(e) => panic!("expected InvalidInput Io error, got {e:?}"), + } +}