From 3acf51745c41c820ec8a29529bdec2452fd53de9 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Mon, 16 Mar 2026 15:41:44 +0200 Subject: [PATCH 1/2] feat: add multi_get() for batch point reads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add `multi_get()` to `AbstractTree` trait with default implementation that calls `get()` in a loop - `Tree` and `BlobTree` provide optimized versions - BlobTree extracts a `resolve_key` helper for shared blob resolution logic - Accepts unsorted keys — callers don't need to pre-sort - New integration test covering basic retrieval, snapshot isolation, missing keys, and unsorted input Closes #96 --- src/abstract_tree.rs | 40 ++++++++ src/blob_tree/mod.rs | 57 +++++++---- src/tree/mod.rs | 17 ++++ tests/multi_get.rs | 237 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 329 insertions(+), 22 deletions(-) create mode 100644 tests/multi_get.rs diff --git a/src/abstract_tree.rs b/src/abstract_tree.rs index 0a3f123a2..b45101e1c 100644 --- a/src/abstract_tree.rs +++ b/src/abstract_tree.rs @@ -511,6 +511,46 @@ pub trait AbstractTree { self.get(key, seqno).map(|x| x.is_some()) } + /// Reads multiple keys from the tree. + /// + /// Implementations may choose to perform all lookups against a single + /// version snapshot and acquire the version lock only once, which can be + /// more efficient than calling [`AbstractTree::get`] in a loop. The + /// default trait implementation, however, is a convenience wrapper that + /// simply calls [`AbstractTree::get`] for each key and therefore does not + /// guarantee a single-snapshot or single-lock acquisition. Optimized + /// implementations (such as [`Tree`] and [`BlobTree`]) provide the + /// single-snapshot/one-lock behavior. + /// + /// # Examples + /// + /// ``` + /// # let folder = tempfile::tempdir()?; + /// use lsm_tree::{AbstractTree, Config, Tree}; + /// + /// let tree = Config::new(folder, Default::default(), Default::default()).open()?; + /// tree.insert("a", "value_a", 0); + /// tree.insert("b", "value_b", 1); + /// + /// let results = tree.multi_get(["a", "b", "c"], 2)?; + /// assert_eq!(results[0], Some("value_a".as_bytes().into())); + /// assert_eq!(results[1], Some("value_b".as_bytes().into())); + /// assert_eq!(results[2], None); + /// # + /// # Ok::<(), lsm_tree::Error>(()) + /// ``` + /// + /// # Errors + /// + /// Will return `Err` if an IO error occurs. + fn multi_get>( + &self, + keys: impl IntoIterator, + seqno: SeqNo, + ) -> crate::Result>> { + keys.into_iter().map(|key| self.get(key, seqno)).collect() + } + /// Inserts a key-value pair into the tree. /// /// If the key already exists, the item will be overwritten. diff --git a/src/blob_tree/mod.rs b/src/blob_tree/mod.rs index 73ea1c119..31ab09ee3 100644 --- a/src/blob_tree/mod.rs +++ b/src/blob_tree/mod.rs @@ -163,6 +163,29 @@ impl BlobTree { blobs_folder: Arc::new(blobs_folder), }) } + + /// Resolves a single key against a pre-acquired [`SuperVersion`]. + fn resolve_key( + &self, + super_version: &crate::version::SuperVersion, + key: &[u8], + seqno: SeqNo, + ) -> crate::Result> { + let Some(item) = crate::Tree::get_internal_entry_from_version(super_version, key, seqno)? + else { + return Ok(None); + }; + + let (_, v) = resolve_value_handle( + self.id(), + self.blobs_folder.as_path(), + &self.index.config.cache, + &super_version.version, + item, + )?; + + Ok(Some(v)) + } } impl AbstractTree for BlobTree { @@ -584,30 +607,20 @@ impl AbstractTree for BlobTree { } fn get>(&self, key: K, seqno: SeqNo) -> crate::Result> { - let key = key.as_ref(); - - #[expect(clippy::expect_used, reason = "lock is expected to not be poisoned")] - let super_version = self - .index - .version_history - .read() - .expect("lock is poisoned") - .get_version_for_snapshot(seqno); - - let Some(item) = crate::Tree::get_internal_entry_from_version(&super_version, key, seqno)? - else { - return Ok(None); - }; + let super_version = self.index.get_version_for_snapshot(seqno); + self.resolve_key(&super_version, key.as_ref(), seqno) + } - let (_, v) = resolve_value_handle( - self.id(), - self.blobs_folder.as_path(), - &self.index.config.cache, - &super_version.version, - item, - )?; + fn multi_get>( + &self, + keys: impl IntoIterator, + seqno: SeqNo, + ) -> crate::Result>> { + let super_version = self.index.get_version_for_snapshot(seqno); - Ok(Some(v)) + keys.into_iter() + .map(|key| self.resolve_key(&super_version, key.as_ref(), seqno)) + .collect() } fn remove>(&self, key: K, seqno: SeqNo) -> (u64, u64) { diff --git a/src/tree/mod.rs b/src/tree/mod.rs index 453e9891c..be8b78feb 100644 --- a/src/tree/mod.rs +++ b/src/tree/mod.rs @@ -645,6 +645,23 @@ impl AbstractTree for Tree { .map(|x| x.value)) } + fn multi_get>( + &self, + keys: impl IntoIterator, + seqno: SeqNo, + ) -> crate::Result>> { + let super_version = self.get_version_for_snapshot(seqno); + + keys.into_iter() + .map(|key| { + Ok( + Self::get_internal_entry_from_version(&super_version, key.as_ref(), seqno)? + .map(|x| x.value), + ) + }) + .collect() + } + fn insert, V: Into>( &self, key: K, diff --git a/tests/multi_get.rs b/tests/multi_get.rs new file mode 100644 index 000000000..9357d96ca --- /dev/null +++ b/tests/multi_get.rs @@ -0,0 +1,237 @@ +use lsm_tree::{ + get_tmp_folder, AbstractTree, Config, KvSeparationOptions, SeqNo, SequenceNumberCounter, +}; +use test_log::test; + +#[test] +fn multi_get_all_existing() -> lsm_tree::Result<()> { + let folder = get_tmp_folder(); + + let tree = Config::new( + &folder, + SequenceNumberCounter::default(), + SequenceNumberCounter::default(), + ) + .open()?; + + for i in 0..100u64 { + tree.insert(format!("key_{i:04}"), format!("value_{i}"), i); + } + + tree.flush_active_memtable(0)?; + + let keys: Vec = (0..100u64).map(|i| format!("key_{i:04}")).collect(); + let results = tree.multi_get(&keys, SeqNo::MAX)?; + + assert_eq!(results.len(), 100); + for (i, result) in results.iter().enumerate() { + let expected = format!("value_{i}"); + assert_eq!( + result.as_deref(), + Some(expected.as_bytes()), + "mismatch at index {i}", + ); + } + + Ok(()) +} + +#[test] +fn multi_get_mixed_existing_and_missing() -> lsm_tree::Result<()> { + let folder = get_tmp_folder(); + + let tree = Config::new( + &folder, + SequenceNumberCounter::default(), + SequenceNumberCounter::default(), + ) + .open()?; + + tree.insert("a", "val_a", 0); + tree.insert("c", "val_c", 1); + tree.insert("e", "val_e", 2); + + let results = tree.multi_get(["a", "b", "c", "d", "e"], 3)?; + + assert_eq!(results.len(), 5); + assert_eq!(results[0].as_deref(), Some(b"val_a".as_slice())); + assert_eq!(results[1], None); + assert_eq!(results[2].as_deref(), Some(b"val_c".as_slice())); + assert_eq!(results[3], None); + assert_eq!(results[4].as_deref(), Some(b"val_e".as_slice())); + + Ok(()) +} + +#[test] +fn multi_get_empty_keys() -> lsm_tree::Result<()> { + let folder = get_tmp_folder(); + + let tree = Config::new( + &folder, + SequenceNumberCounter::default(), + SequenceNumberCounter::default(), + ) + .open()?; + + tree.insert("a", "val_a", 0); + + let results = tree.multi_get(Vec::<&str>::new(), 1)?; + assert!(results.is_empty()); + + Ok(()) +} + +#[test] +fn multi_get_snapshot_isolation() -> lsm_tree::Result<()> { + let folder = get_tmp_folder(); + + let tree = Config::new( + &folder, + SequenceNumberCounter::default(), + SequenceNumberCounter::default(), + ) + .open()?; + + tree.insert("a", "v1", 0); + tree.insert("b", "v1", 1); + + // Update values at higher seqno + tree.insert("a", "v2", 2); + tree.insert("b", "v2", 3); + + // Read at snapshot seqno=2: should see a=v1, b=v1 + // Snapshot semantics: entry visible iff entry.seqno < snapshot_seqno + // (memtable lookup uses `seqno - 1` as upper bound, see Memtable::get). + // So a@2 (v2) is NOT visible at seqno=2, only a@0 (v1) is. + let results = tree.multi_get(["a", "b"], 2)?; + assert_eq!(results[0].as_deref(), Some(b"v1".as_slice())); + assert_eq!(results[1].as_deref(), Some(b"v1".as_slice())); + + // Read at snapshot seqno=4: should see a=v2, b=v2 + let results = tree.multi_get(["a", "b"], 4)?; + assert_eq!(results[0].as_deref(), Some(b"v2".as_slice())); + assert_eq!(results[1].as_deref(), Some(b"v2".as_slice())); + + Ok(()) +} + +#[test] +fn multi_get_with_tombstones() -> lsm_tree::Result<()> { + let folder = get_tmp_folder(); + + let tree = Config::new( + &folder, + SequenceNumberCounter::default(), + SequenceNumberCounter::default(), + ) + .open()?; + + tree.insert("a", "val_a", 0); + tree.insert("b", "val_b", 1); + tree.remove("a", 2); + + let results = tree.multi_get(["a", "b"], 3)?; + assert_eq!(results[0], None); + assert_eq!(results[1].as_deref(), Some(b"val_b".as_slice())); + + Ok(()) +} + +#[test] +fn multi_get_from_disk() -> lsm_tree::Result<()> { + let folder = get_tmp_folder(); + + let tree = Config::new( + &folder, + SequenceNumberCounter::default(), + SequenceNumberCounter::default(), + ) + .open()?; + + tree.insert("a", "val_a", 0); + tree.insert("b", "val_b", 1); + tree.insert("c", "val_c", 2); + tree.flush_active_memtable(0)?; + + // Insert more to memtable + tree.insert("d", "val_d", 3); + + // Multi-get spanning both disk and memtable + let results = tree.multi_get(["a", "b", "c", "d", "e"], SeqNo::MAX)?; + assert_eq!(results.len(), 5); + assert_eq!(results[0].as_deref(), Some(b"val_a".as_slice())); + assert_eq!(results[1].as_deref(), Some(b"val_b".as_slice())); + assert_eq!(results[2].as_deref(), Some(b"val_c".as_slice())); + assert_eq!(results[3].as_deref(), Some(b"val_d".as_slice())); + assert_eq!(results[4], None); + + Ok(()) +} + +#[test] +fn multi_get_blob_tree_with_kv_separation() -> lsm_tree::Result<()> { + let folder = get_tmp_folder(); + + let tree = Config::new( + &folder, + SequenceNumberCounter::default(), + SequenceNumberCounter::default(), + ) + .with_kv_separation(Some(KvSeparationOptions { + separation_threshold: 1, // separate all values + ..Default::default() + })) + .open()?; + + let big_val_a = b"aaa".repeat(1000); + let big_val_b = b"bbb".repeat(1000); + + tree.insert("a", big_val_a.as_slice(), 0); + tree.insert("b", big_val_b.as_slice(), 1); + tree.insert("c", b"ccc".repeat(1000).as_slice(), 2); + tree.remove("c", 3); + + tree.flush_active_memtable(0)?; + + // Verify blob indirections were created + assert!(tree.blob_file_count() > 0); + + let results = tree.multi_get(["a", "b", "c", "missing"], SeqNo::MAX)?; + + assert_eq!(results.len(), 4); + assert_eq!(results[0].as_deref(), Some(big_val_a.as_slice())); + assert_eq!(results[1].as_deref(), Some(big_val_b.as_slice())); + assert_eq!(results[2], None); // tombstoned + assert_eq!(results[3], None); // never existed + + Ok(()) +} + +#[test] +fn multi_get_unsorted_and_duplicate_keys() -> lsm_tree::Result<()> { + let folder = get_tmp_folder(); + + let tree = Config::new( + &folder, + SequenceNumberCounter::default(), + SequenceNumberCounter::default(), + ) + .open()?; + + tree.insert("a", "val_a", 0); + tree.insert("b", "val_b", 1); + tree.insert("c", "val_c", 2); + + // Unsorted keys with a duplicate — results must match input order 1:1 + let results = tree.multi_get(["c", "a", "b", "a", "missing"], 3)?; + + assert_eq!(results.len(), 5); + assert_eq!(results[0].as_deref(), Some(b"val_c".as_slice())); + assert_eq!(results[1].as_deref(), Some(b"val_a".as_slice())); + assert_eq!(results[2].as_deref(), Some(b"val_b".as_slice())); + assert_eq!(results[3].as_deref(), Some(b"val_a".as_slice())); // duplicate + assert_eq!(results[4], None); + + Ok(()) +} From bae6679564bd9d97fc24746402c0a087e16ee576 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Mon, 16 Mar 2026 16:53:02 +0200 Subject: [PATCH 2/2] docs: document multi_get() output contract Explicitly state that the returned Vec has the same length as the input, preserves input order, returns None for missing keys, and produces duplicate entries for duplicate input keys. --- src/abstract_tree.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/abstract_tree.rs b/src/abstract_tree.rs index b45101e1c..924318137 100644 --- a/src/abstract_tree.rs +++ b/src/abstract_tree.rs @@ -513,6 +513,12 @@ pub trait AbstractTree { /// Reads multiple keys from the tree. /// + /// Returns a `Vec` with exactly one entry per input key, in the same order + /// as the input. Each entry is `Some(value)` if the key was found, or + /// `None` if it was missing. Duplicate keys in the input produce duplicate + /// entries in the output. This contract is guaranteed for all + /// implementations, including optimized ones. + /// /// Implementations may choose to perform all lookups against a single /// version snapshot and acquire the version lock only once, which can be /// more efficient than calling [`AbstractTree::get`] in a loop. The