From 071c2c2f79fd33f27e5aceac2874b89554b4ba01 Mon Sep 17 00:00:00 2001 From: Kunal Mohan Date: Sun, 19 Apr 2026 22:42:47 +0530 Subject: [PATCH 1/5] Fix/update benchmarks --- Cargo.toml | 11 ------ benches/bloom.rs | 74 -------------------------------------- benches/level_manifest.rs | 67 ++++++---------------------------- benches/memtable.rs | 14 ++++---- benches/merge.rs | 8 ++--- benches/partition_point.rs | 30 ---------------- benches/tli.rs | 51 -------------------------- 7 files changed, 22 insertions(+), 233 deletions(-) delete mode 100644 benches/partition_point.rs delete mode 100644 benches/tli.rs diff --git a/Cargo.toml b/Cargo.toml index c54096744..e11c727e2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -55,12 +55,6 @@ unexpected_cfgs = { level = "warn", check-cfg = [ [package.metadata.cargo-all-features] denylist = [] -[[bench]] -name = "tli" -harness = false -path = "benches/tli.rs" -required-features = [] - [[bench]] name = "merge" harness = false @@ -103,8 +97,3 @@ harness = false path = "benches/fd_table.rs" required-features = [] -[[bench]] -name = "partition_point" -harness = false -path = "benches/partition_point.rs" -required-features = [] diff --git a/benches/bloom.rs b/benches/bloom.rs index 1b67e9c4a..4c2d8dfe1 100644 --- a/benches/bloom.rs +++ b/benches/bloom.rs @@ -54,34 +54,6 @@ fn standard_filter_construction(c: &mut Criterion) { }); } -fn blocked_filter_construction(c: &mut Criterion) { - use lsm_tree::table::filter::blocked_bloom::Builder; - - let mut rng = rand::rng(); - - c.bench_function("blocked bloom filter add key, 1M", |b| { - let mut filter = Builder::with_fp_rate(1_000_000, 0.01); - - b.iter(|| { - let mut key = [0; 16]; - rng.fill_bytes(&mut key); - - filter.set_with_hash(Builder::get_hash(&key)); - }); - }); - - c.bench_function("blocked bloom filter add key, 10M", |b| { - let mut filter = Builder::with_fp_rate(10_000_000, 0.01); - - b.iter(|| { - let mut key = [0; 16]; - rng.fill_bytes(&mut key); - - filter.set_with_hash(Builder::get_hash(&key)); - }); - }); -} - fn standard_filter_contains(c: &mut Criterion) { use lsm_tree::table::filter::standard_bloom::Builder; @@ -126,56 +98,10 @@ fn standard_filter_contains(c: &mut Criterion) { } } -fn blocked_filter_contains(c: &mut Criterion) { - use lsm_tree::table::filter::blocked_bloom::Builder; - - let keys = (0..100_000u128) - .map(|x| x.to_be_bytes().to_vec()) - .collect::>(); - - for fpr in [0.1, 0.01, 0.001, 0.0001, 0.00001] { - // NOTE: Purposefully bloat bloom filter size to run into more CPU cache misses - let n = 100_000_000; - - let mut filter = Builder::with_fp_rate(n, fpr); - - for key in &keys { - filter.set_with_hash(Builder::get_hash(key)); - } - - let mut rng = rand::rng(); - - let filter_bytes = filter.build(); - - c.bench_function( - &format!( - "blocked bloom filter contains key, true positive ({}%)", - fpr * 100.0, - ), - |b| { - b.iter(|| { - use lsm_tree::table::filter::blocked_bloom::BlockedBloomFilterReader as Reader; - use rand::seq::IndexedRandom; - - // NOTE: To make the costs more realistic, we - // pretend we are reading the filter straight from the block - let filter = Reader::new(&filter_bytes).unwrap(); - - let sample = keys.choose(&mut rng).unwrap(); - let hash = Builder::get_hash(sample); - assert!(filter.contains_hash(hash)); - }); - }, - ); - } -} - criterion_group!( benches, fast_block_index, standard_filter_construction, - blocked_filter_construction, standard_filter_contains, - blocked_filter_contains, ); criterion_main!(benches); diff --git a/benches/level_manifest.rs b/benches/level_manifest.rs index 1834f8600..af7cdcc13 100644 --- a/benches/level_manifest.rs +++ b/benches/level_manifest.rs @@ -1,5 +1,5 @@ use criterion::{criterion_group, criterion_main, Criterion}; -use lsm_tree::{AbstractTree, Config}; +use lsm_tree::{config::BlockSizePolicy, AbstractTree, Config, SequenceNumberCounter}; fn iterate_segments(c: &mut Criterion) { let mut group = c.benchmark_group("Iterate level manifest"); @@ -10,71 +10,26 @@ fn iterate_segments(c: &mut Criterion) { for segment_count in [0, 1, 5, 10, 100, 500, 1_000, 2_000, 4_000] { group.bench_function(format!("iterate {segment_count} segments"), |b| { let folder = tempfile::tempdir_in(".bench").unwrap(); - let tree = Config::new(folder).data_block_size(1_024).open().unwrap(); + let tree = Config::new( + folder, + SequenceNumberCounter::default(), + SequenceNumberCounter::default(), + ) + .data_block_size_policy(BlockSizePolicy::all(1_024)) + .open() + .unwrap(); for x in 0_u64..segment_count { tree.insert("a", "b", x); tree.flush_active_memtable(0).unwrap(); } - let levels = tree.levels.read().unwrap(); - b.iter(|| { - assert_eq!(levels.iter().count(), segment_count as usize); + assert_eq!(tree.table_count(), segment_count as usize); }); }); } } -fn find_segment(c: &mut Criterion) { - let mut group = c.benchmark_group("Find segment in disjoint level"); - group.sample_size(10); - - std::fs::create_dir_all(".bench").unwrap(); - - for segment_count in [1u16, 2, 3, 4, 5, 10, 100, 1_000] { - let folder = tempfile::tempdir_in(".bench").unwrap(); - let tree = Config::new(folder).data_block_size(1_024).open().unwrap(); - - for x in 0..segment_count { - tree.insert(x.to_be_bytes(), "", x.into()); - tree.flush_active_memtable(0).unwrap(); - } - - let key = (segment_count / 2).to_be_bytes(); - - group.bench_function( - format!("find segment in {segment_count} segments - binary search"), - |b| { - let levels = tree.levels.read().unwrap(); - let first_level = levels.levels.first().expect("should exist"); - - b.iter(|| { - first_level - .as_disjoint() - .expect("should be disjoint") - .get_segment_containing_key(&key) - .expect("should exist") - }); - }, - ); - - group.bench_function( - format!("find segment in {segment_count} segments - linear search"), - |b| { - let levels = tree.levels.read().unwrap(); - let first_level = levels.levels.first().expect("should exist"); - - b.iter(|| { - first_level - .iter() - .find(|x| x.metadata.key_range.contains_key(&key)) - .expect("should exist"); - }); - }, - ); - } -} - -criterion_group!(benches, iterate_segments, find_segment); +criterion_group!(benches, iterate_segments); criterion_main!(benches); diff --git a/benches/memtable.rs b/benches/memtable.rs index e7d201fe4..6033c0781 100644 --- a/benches/memtable.rs +++ b/benches/memtable.rs @@ -3,7 +3,7 @@ use lsm_tree::{InternalValue, Memtable}; use nanoid::nanoid; fn memtable_get_hit(c: &mut Criterion) { - let memtable = Memtable::default(); + let memtable = Memtable::new(0_u64); memtable.insert(InternalValue::from_components( "abc_w5wa35aw35naw", @@ -25,14 +25,14 @@ fn memtable_get_hit(c: &mut Criterion) { b.iter(|| { assert_eq!( [1, 2, 3], - &*memtable.get(b"abc_w5wa35aw35naw", None).unwrap().value, + &*memtable.get(b"abc_w5wa35aw35naw", u64::MAX).unwrap().value, ) }); }); } fn memtable_get_snapshot(c: &mut Criterion) { - let memtable = Memtable::default(); + let memtable = Memtable::new(0_u64); memtable.insert(InternalValue::from_components( "abc_w5wa35aw35naw", @@ -60,14 +60,14 @@ fn memtable_get_snapshot(c: &mut Criterion) { b.iter(|| { assert_eq!( [1, 2, 3], - &*memtable.get(b"abc_w5wa35aw35naw", Some(1)).unwrap().value, + &*memtable.get(b"abc_w5wa35aw35naw", 1).unwrap().value, ); }); }); } fn memtable_get_miss(c: &mut Criterion) { - let memtable = Memtable::default(); + let memtable = Memtable::new(0_u64); for _ in 0..1_000_000 { memtable.insert(InternalValue::from_components( @@ -79,13 +79,13 @@ fn memtable_get_miss(c: &mut Criterion) { } c.bench_function("memtable get miss", |b| { - b.iter(|| assert!(memtable.get(b"abc_564321", None).is_none())); + b.iter(|| assert!(memtable.get(b"abc_564321", u64::MAX).is_none())); }); } fn memtable_highest_seqno(c: &mut Criterion) { c.bench_function("memtable highest seqno", |b| { - let memtable = Memtable::default(); + let memtable = Memtable::new(0_u64); for x in 0..100_000 { memtable.insert(InternalValue::from_components( diff --git a/benches/merge.rs b/benches/merge.rs index 8072e893c..6b52b145a 100644 --- a/benches/merge.rs +++ b/benches/merge.rs @@ -8,7 +8,7 @@ fn merger(c: &mut Criterion) { c.bench_function(&format!("Merge {num}"), |b| { let memtables = (0..num) .map(|_| { - let table = Memtable::default(); + let table = Memtable::new(0_u64); for _ in 0..100 { table.insert(InternalValue::from_components( @@ -26,7 +26,7 @@ fn merger(c: &mut Criterion) { b.iter_with_large_drop(|| { let iters = memtables .iter() - .map(|x| x.iter().map(Ok)) + .map(|x| x.iter().map(Ok::<_, lsm_tree::Error>)) .map(|x| Box::new(x) as BoxedIterator<'_>) .collect(); @@ -43,7 +43,7 @@ fn mvcc_stream(c: &mut Criterion) { c.bench_function(&format!("MVCC stream {num} versions"), |b| { let memtables = (0..num) .map(|_| { - let table = Memtable::default(); + let table = Memtable::new(0_u64); for key in 'a'..='z' { table.insert(InternalValue::from_components( @@ -61,7 +61,7 @@ fn mvcc_stream(c: &mut Criterion) { b.iter_with_large_drop(|| { let iters = memtables .iter() - .map(|x| x.iter().map(Ok)) + .map(|x| x.iter().map(Ok::<_, lsm_tree::Error>)) .map(|x| Box::new(x) as BoxedIterator<'_>) .collect(); diff --git a/benches/partition_point.rs b/benches/partition_point.rs deleted file mode 100644 index dbbe8382a..000000000 --- a/benches/partition_point.rs +++ /dev/null @@ -1,30 +0,0 @@ -use criterion::{criterion_group, criterion_main, Criterion}; -use lsm_tree::binary_search::partition_point; -use rand::Rng; - -fn bench_partition_point(c: &mut Criterion) { - let mut group = c.benchmark_group("partition_point"); - - let mut rng = rand::rng(); - - for item_count in [10, 100, 1_000, 10_000, 100_000, 1_000_000] { - let items = (0..item_count).collect::>(); - - group.bench_function(format!("native {item_count}"), |b| { - b.iter(|| { - let needle = rng.random_range(0..item_count); - items.partition_point(|&x| x <= needle) - }) - }); - - group.bench_function(format!("rewrite {item_count}"), |b| { - b.iter(|| { - let needle = rng.random_range(0..item_count); - partition_point(&items, |&x| x <= needle) - }) - }); - } -} - -criterion_group!(benches, bench_partition_point); -criterion_main!(benches); diff --git a/benches/tli.rs b/benches/tli.rs deleted file mode 100644 index 1b9dea651..000000000 --- a/benches/tli.rs +++ /dev/null @@ -1,51 +0,0 @@ -use criterion::{criterion_group, criterion_main, Criterion}; -use lsm_tree::table::{ - block::offset::BlockOffset, block_index::KeyedBlockIndex, value_block::CachePolicy, -}; -use rand::Rng; - -fn tli_find_item(c: &mut Criterion) { - use lsm_tree::table::block_index::{block_handle::KeyedBlockHandle, top_level::TopLevelIndex}; - - let mut group = c.benchmark_group("TLI find item"); - - for item_count in [10u64, 100, 1_000, 10_000, 25_000, 100_000] { - let items = { - let mut items = Vec::with_capacity(item_count as usize); - - for x in 0..item_count { - items.push(KeyedBlockHandle { - end_key: x.to_be_bytes().into(), - offset: BlockOffset(x), - }); - } - - items - }; - - let index = TopLevelIndex::from_boxed_slice(items.into()); - - let mut rng = rand::rng(); - - group.bench_function( - format!("TLI get_block_containing_item ({item_count} items)"), - |b| { - b.iter(|| { - let needle = rng.random_range(0..item_count).to_be_bytes(); - - assert_eq!( - needle, - &*index - .get_lowest_block_containing_key(&needle, CachePolicy::Read) - .unwrap() - .unwrap() - .end_key, - ); - }) - }, - ); - } -} - -criterion_group!(benches, tli_find_item); -criterion_main!(benches); From 3f4c496134ed6d9b7309267bec04b8f3ca9ad034 Mon Sep 17 00:00:00 2001 From: Kunal Mohan Date: Tue, 21 Apr 2026 22:26:40 +0530 Subject: [PATCH 2/5] fix benches with lz4 feature --- Cargo.toml | 6 -- benches/block.rs | 200 ---------------------------------------------- benches/tree.rs | 202 +++++++++++++++++++++++++++++------------------ 3 files changed, 124 insertions(+), 284 deletions(-) delete mode 100644 benches/block.rs diff --git a/Cargo.toml b/Cargo.toml index e11c727e2..6065c6a40 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -73,12 +73,6 @@ harness = false path = "benches/bloom.rs" required-features = [] -[[bench]] -name = "block" -harness = false -path = "benches/block.rs" -required-features = ["lz4"] - [[bench]] name = "tree" harness = false diff --git a/benches/block.rs b/benches/block.rs deleted file mode 100644 index 0e02eff7a..000000000 --- a/benches/block.rs +++ /dev/null @@ -1,200 +0,0 @@ -use criterion::{criterion_group, criterion_main, Criterion}; -use lsm_tree::{ - coding::Encode, - table::{ - block::{header::Header as BlockHeader, offset::BlockOffset, ItemSize}, - meta::CompressionType, - value_block::ValueBlock, - }, - Checksum, InternalValue, -}; -use rand::Rng; -use std::io::Write; - -/* fn value_block_size(c: &mut Criterion) { - let mut group = c.benchmark_group("ValueBlock::size"); - - for item_count in [10, 100, 1_000] { - group.bench_function(format!("{item_count} items"), |b| { - let items = (0..item_count) - .map(|_| { - InternalValue::from_components( - "a".repeat(16).as_bytes(), - "a".repeat(100).as_bytes(), - 63, - lsm_tree::ValueType::Value, - ) - }) - .collect(); - - let block = ValueBlock { - items, - header: BlockHeader { - compression: CompressionType::Lz4, - checksum: Checksum::from_raw(0), - data_length: 0, - previous_block_offset: 0, - uncompressed_length: 0, - }, - }; - - b.iter(|| { - (&*block.items).size(); - }) - }); - } -} */ - -fn value_block_find(c: &mut Criterion) { - let mut group = c.benchmark_group("ValueBlock::find_latest"); - - for item_count in [10, 100, 1_000, 10_000] { - let mut items = vec![]; - - for item in 0u64..item_count { - items.push(InternalValue::from_components( - item.to_be_bytes(), - b"", - 0, - lsm_tree::ValueType::Value, - )); - } - - let block = ValueBlock { - items: items.into_boxed_slice(), - header: BlockHeader { - compression: CompressionType::Lz4, - checksum: Checksum::from_raw(0), - data_length: 0, - previous_block_offset: BlockOffset(0), - uncompressed_length: 0, - }, - }; - - let mut rng = rand::rng(); - - group.bench_function(format!("{item_count} items (linear)"), |b| { - b.iter(|| { - let needle = rng.random_range(0..item_count).to_be_bytes(); - - let item = block - .items - .iter() - .find(|item| &*item.key.user_key == needle) - .cloned() - .unwrap(); - - assert_eq!(item.key.user_key, needle); - }) - }); - - group.bench_function(format!("{item_count} items (binary search)"), |b| { - b.iter(|| { - let needle = rng.random_range(0..item_count).to_be_bytes(); - - let item = block.get_latest(&needle).unwrap(); - assert_eq!(item.key.user_key, needle); - }) - }); - } -} - -fn encode_block(c: &mut Criterion) { - let mut group = c.benchmark_group("Encode block"); - - for comp_type in [CompressionType::None, CompressionType::Lz4] { - for block_size in [4, 8, 16, 32, 64, 128] { - let block_size = block_size * 1_024; - - let mut size = 0; - - let mut items = vec![]; - - for x in 0u64.. { - let value = InternalValue::from_components( - x.to_be_bytes(), - x.to_string().repeat(50).as_bytes(), - 63, - lsm_tree::ValueType::Value, - ); - - size += value.size(); - - items.push(value); - - if size >= block_size { - break; - } - } - - group.bench_function(format!("{block_size} KiB [{comp_type}]"), |b| { - b.iter(|| { - // Serialize block - let (mut header, data) = - ValueBlock::to_bytes_compressed(&items, BlockOffset(0), comp_type).unwrap(); - }); - }); - } - } -} - -fn load_value_block_from_disk(c: &mut Criterion) { - let mut group = c.benchmark_group("Load block from disk"); - - for comp_type in [CompressionType::None, CompressionType::Lz4] { - for block_size in [4, 8, 16, 32, 64, 128] { - let block_size = block_size * 1_024; - - let mut size = 0; - - let mut items = vec![]; - - for x in 0u64.. { - let value = InternalValue::from_components( - x.to_be_bytes(), - x.to_string().repeat(50).as_bytes(), - 63, - lsm_tree::ValueType::Value, - ); - - size += value.size(); - - items.push(value); - - if size >= block_size { - break; - } - } - - // Serialize block - let (mut header, data) = - ValueBlock::to_bytes_compressed(&items, BlockOffset(0), comp_type).unwrap(); - - let mut file = tempfile::tempfile().unwrap(); - header.encode_into(&mut file).unwrap(); - file.write_all(&data).unwrap(); - - let expected_block = ValueBlock { - items: items.clone().into_boxed_slice(), - header, - }; - - group.bench_function(format!("{block_size} KiB [{comp_type}]"), |b| { - b.iter(|| { - let loaded_block = ValueBlock::from_file(&mut file, BlockOffset(0)).unwrap(); - - assert_eq!(loaded_block.items.len(), expected_block.items.len()); - assert_eq!(loaded_block.header.checksum, expected_block.header.checksum); - }); - }); - } - } -} - -criterion_group!( - benches, - encode_block, - value_block_find, - load_value_block_from_disk, -); -criterion_main!(benches); diff --git a/benches/tree.rs b/benches/tree.rs index 067308aa4..99c50ff7d 100644 --- a/benches/tree.rs +++ b/benches/tree.rs @@ -1,5 +1,7 @@ use criterion::{criterion_group, criterion_main, Criterion}; -use lsm_tree::{AbstractTree, BlockCache, Config}; +use lsm_tree::{ + config::BlockSizePolicy, AbstractTree, Cache, Config, Guard, SeqNo, SequenceNumberCounter, +}; use std::sync::Arc; use tempfile::tempdir; @@ -11,10 +13,14 @@ fn full_scan(c: &mut Criterion) { group.bench_function(format!("scan all uncached, {item_count} items"), |b| { let path = tempdir().unwrap(); - let tree = Config::new(path) - .block_cache(BlockCache::with_capacity_bytes(0).into()) - .open() - .unwrap(); + let tree = Config::new( + path, + SequenceNumberCounter::default(), + SequenceNumberCounter::default(), + ) + .use_cache(Arc::new(Cache::with_capacity_bytes(0))) + .open() + .unwrap(); for x in 0_u32..item_count { let key = x.to_be_bytes(); @@ -25,17 +31,21 @@ fn full_scan(c: &mut Criterion) { tree.flush_active_memtable(0).unwrap(); b.iter(|| { - assert_eq!(tree.len(None, None).unwrap(), item_count as usize); + assert_eq!(tree.len(SeqNo::MAX, None).unwrap(), item_count as usize); }) }); group.bench_function(format!("scan all cached, {item_count} items"), |b| { let path = tempdir().unwrap(); - let tree = Config::new(path) - .block_cache(BlockCache::with_capacity_bytes(100_000_000).into()) - .open() - .unwrap(); + let tree = Config::new( + path, + SequenceNumberCounter::default(), + SequenceNumberCounter::default(), + ) + .use_cache(Arc::new(Cache::with_capacity_bytes(100_000_000))) + .open() + .unwrap(); for x in 0_u32..item_count { let key = x.to_be_bytes(); @@ -44,10 +54,10 @@ fn full_scan(c: &mut Criterion) { } tree.flush_active_memtable(0).unwrap(); - assert_eq!(tree.len(None, None).unwrap(), item_count as usize); + assert_eq!(tree.len(SeqNo::MAX, None).unwrap(), item_count as usize); b.iter(|| { - assert_eq!(tree.len(None, None).unwrap(), item_count as usize); + assert_eq!(tree.len(SeqNo::MAX, None).unwrap(), item_count as usize); }) }); } @@ -61,10 +71,14 @@ fn scan_vs_query(c: &mut Criterion) { for size in [100_000, 1_000_000] { let path = tempdir().unwrap(); - let tree = Config::new(path) - .block_cache(BlockCache::with_capacity_bytes(0).into()) - .open() - .unwrap(); + let tree = Config::new( + path, + SequenceNumberCounter::default(), + SequenceNumberCounter::default(), + ) + .use_cache(Arc::new(Cache::with_capacity_bytes(0))) + .open() + .unwrap(); for x in 0..size as u64 { let key = x.to_be_bytes().to_vec(); @@ -73,22 +87,19 @@ fn scan_vs_query(c: &mut Criterion) { } tree.flush_active_memtable(0).unwrap(); - assert_eq!(tree.len(None, None).unwrap(), size); + assert_eq!(tree.len(SeqNo::MAX, None).unwrap(), size); group.sample_size(10); group.bench_function(format!("scan {} (uncached)", size), |b| { b.iter(|| { - let iter = tree.iter(None, None); - let iter = iter.into_iter(); + let iter = tree.iter(SeqNo::MAX, None); let count = iter - .filter(|x| match x { - Ok((key, _)) => { - let buf = &key[..8]; - let (int_bytes, _rest) = buf.split_at(std::mem::size_of::()); - let num = u64::from_be_bytes(int_bytes.try_into().unwrap()); - (60000..60010).contains(&num) - } - Err(_) => false, + .filter_map(|guard| { + let (key, _) = guard.into_inner().ok()?; + let buf = &key[..8]; + let (int_bytes, _rest) = buf.split_at(std::mem::size_of::()); + let num = u64::from_be_bytes(int_bytes.try_into().unwrap()); + (60000..60010).contains(&num).then_some(()) }) .count(); assert_eq!(count, 10); @@ -101,7 +112,7 @@ fn scan_vs_query(c: &mut Criterion) { Included(60000_u64.to_be_bytes().to_vec()), Excluded(60010_u64.to_be_bytes().to_vec()), ), - None, + SeqNo::MAX, None, ); let iter = iter.into_iter(); @@ -115,7 +126,7 @@ fn scan_vs_query(c: &mut Criterion) { Included(60000_u64.to_be_bytes().to_vec()), Excluded(60010_u64.to_be_bytes().to_vec()), ), - None, + SeqNo::MAX, None, ); let iter = iter.into_iter(); @@ -131,10 +142,14 @@ fn scan_vs_prefix(c: &mut Criterion) { for size in [10_000, 100_000, 1_000_000] { let path = tempdir().unwrap(); - let tree = Config::new(path) - .block_cache(BlockCache::with_capacity_bytes(0).into()) - .open() - .unwrap(); + let tree = Config::new( + path, + SequenceNumberCounter::default(), + SequenceNumberCounter::default(), + ) + .use_cache(Arc::new(Cache::with_capacity_bytes(0))) + .open() + .unwrap(); for _ in 0..size { let key = nanoid::nanoid!(); @@ -151,28 +166,30 @@ fn scan_vs_prefix(c: &mut Criterion) { } tree.flush_active_memtable(0).unwrap(); - assert_eq!(tree.len(None, None).unwrap() as u64, size + 10); + assert_eq!(tree.len(SeqNo::MAX, None).unwrap() as u64, size + 10); group.sample_size(10); group.bench_function(format!("scan {} (uncached)", size), |b| { b.iter(|| { - let iter = tree.iter(None, None); - let iter = iter.filter(|x| match x { - Ok((key, _)) => key.starts_with(prefix.as_bytes()), - Err(_) => false, - }); - assert_eq!(iter.count(), 10); + let count = tree + .iter(SeqNo::MAX, None) + .filter_map(|guard| { + let (key, _) = guard.into_inner().ok()?; + key.starts_with(prefix.as_bytes()).then_some(()) + }) + .count(); + assert_eq!(count, 10); }); }); group.bench_function(format!("prefix {} (uncached)", size), |b| { b.iter(|| { - let iter = tree.prefix(prefix, None, None); + let iter = tree.prefix(prefix, SeqNo::MAX, None); assert_eq!(iter.count(), 10); }); }); group.bench_function(format!("prefix rev {} (uncached)", size), |b| { b.iter(|| { - let iter = tree.prefix(prefix, None, None); + let iter = tree.prefix(prefix, SeqNo::MAX, None); assert_eq!(iter.rev().count(), 10); }); }); @@ -186,11 +203,15 @@ fn tree_get_pairs(c: &mut Criterion) { for segment_count in [1, 2, 4, 8, 16, 32, 64, 128, 256, 512] { { let folder = tempfile::tempdir().unwrap(); - let tree = Config::new(folder) - .data_block_size(1_024) - .block_cache(Arc::new(BlockCache::with_capacity_bytes(0))) - .open() - .unwrap(); + let tree = Config::new( + folder, + SequenceNumberCounter::default(), + SequenceNumberCounter::default(), + ) + .data_block_size_policy(BlockSizePolicy::all(1_024)) + .use_cache(Arc::new(Cache::with_capacity_bytes(0))) + .open() + .unwrap(); let mut x = 0_u64; @@ -207,7 +228,7 @@ fn tree_get_pairs(c: &mut Criterion) { &format!("Tree::first_key_value (disjoint), {segment_count} segments"), |b| { b.iter(|| { - assert!(tree.first_key_value(None, None).unwrap().is_some()); + assert!(tree.first_key_value(SeqNo::MAX, None).is_some()); }); }, ); @@ -216,7 +237,7 @@ fn tree_get_pairs(c: &mut Criterion) { &format!("Tree::last_key_value (disjoint), {segment_count} segments"), |b| { b.iter(|| { - assert!(tree.last_key_value(None, None).unwrap().is_some()); + assert!(tree.last_key_value(SeqNo::MAX, None).is_some()); }); }, ); @@ -224,11 +245,15 @@ fn tree_get_pairs(c: &mut Criterion) { { let folder = tempfile::tempdir().unwrap(); - let tree = Config::new(folder) - .data_block_size(1_024) - .block_cache(Arc::new(BlockCache::with_capacity_bytes(0))) - .open() - .unwrap(); + let tree = Config::new( + folder, + SequenceNumberCounter::default(), + SequenceNumberCounter::default(), + ) + .data_block_size_policy(BlockSizePolicy::all(1_024)) + .use_cache(Arc::new(Cache::with_capacity_bytes(0))) + .open() + .unwrap(); let mut x = 0_u64; @@ -247,7 +272,7 @@ fn tree_get_pairs(c: &mut Criterion) { &format!("Tree::first_key_value (non-disjoint), {segment_count} segments"), |b| { b.iter(|| { - assert!(tree.first_key_value(None, None).unwrap().is_some()); + assert!(tree.first_key_value(SeqNo::MAX, None).is_some()); }); }, ); @@ -256,7 +281,7 @@ fn tree_get_pairs(c: &mut Criterion) { &format!("Tree::last_key_value (non-disjoint), {segment_count} segments"), |b| { b.iter(|| { - assert!(tree.last_key_value(None, None).unwrap().is_some()); + assert!(tree.last_key_value(SeqNo::MAX, None).is_some()); }); }, ); @@ -267,11 +292,15 @@ fn tree_get_pairs(c: &mut Criterion) { fn disk_point_read(c: &mut Criterion) { let folder = tempdir().unwrap(); - let tree = Config::new(folder) - .data_block_size(1_024) - .block_cache(Arc::new(BlockCache::with_capacity_bytes(0))) - .open() - .unwrap(); + let tree = Config::new( + folder, + SequenceNumberCounter::default(), + SequenceNumberCounter::default(), + ) + .data_block_size_policy(BlockSizePolicy::all(1_024)) + .use_cache(Arc::new(Cache::with_capacity_bytes(0))) + .open() + .unwrap(); for seqno in 0..5 { tree.insert("a", "b", seqno); @@ -287,15 +316,13 @@ fn disk_point_read(c: &mut Criterion) { let tree = tree.clone(); b.iter(|| { - tree.get("a", None).unwrap().unwrap(); + tree.get("a", SeqNo::MAX).unwrap().unwrap(); }); }); - c.bench_function("point read w/ seqno latest (uncached)", |b| { - let snapshot = tree.snapshot(5); - + c.bench_function("point read w/ seqno (uncached)", |b| { b.iter(|| { - snapshot.get("a").unwrap().unwrap(); + tree.get("a", 5).unwrap().unwrap(); }); }); } @@ -305,11 +332,15 @@ fn disjoint_tree_minmax(c: &mut Criterion) { let folder = tempfile::tempdir().unwrap(); - let tree = Config::new(folder) - .data_block_size(1_024) - .block_cache(Arc::new(BlockCache::with_capacity_bytes(0))) - .open() - .unwrap(); + let tree = Config::new( + folder, + SequenceNumberCounter::default(), + SequenceNumberCounter::default(), + ) + .data_block_size_policy(BlockSizePolicy::all(1_024)) + .use_cache(Arc::new(Cache::with_capacity_bytes(0))) + .open() + .unwrap(); tree.insert("a", "a", 0); tree.flush_active_memtable(0).unwrap(); @@ -346,13 +377,23 @@ fn disjoint_tree_minmax(c: &mut Criterion) { group.bench_function("Tree::first_key_value".to_string(), |b| { b.iter(|| { - assert_eq!(&*tree.first_key_value(None, None).unwrap().unwrap().1, b"a"); + let (_, val) = tree + .first_key_value(SeqNo::MAX, None) + .unwrap() + .into_inner() + .unwrap(); + assert_eq!(&*val, b"a"); }); }); group.bench_function("Tree::last_key_value".to_string(), |b| { b.iter(|| { - assert_eq!(&*tree.last_key_value(None, None).unwrap().unwrap().1, b"g"); + let (_, val) = tree + .last_key_value(SeqNo::MAX, None) + .unwrap() + .into_inner() + .unwrap(); + assert_eq!(&*val, b"g"); }); }); } @@ -360,10 +401,15 @@ fn disjoint_tree_minmax(c: &mut Criterion) { fn blob_tree_get(c: &mut Criterion) { let folder = tempfile::tempdir().unwrap(); - let tree = Config::new(folder.path()) - .block_cache(BlockCache::with_capacity_bytes(0).into()) - .open_as_blob_tree() - .unwrap(); + let tree = Config::new( + folder.path(), + SequenceNumberCounter::default(), + SequenceNumberCounter::default(), + ) + .use_cache(Arc::new(Cache::with_capacity_bytes(0))) + .with_kv_separation(Some(Default::default())) + .open() + .unwrap(); let value = b"powek5bowa".repeat(100); @@ -371,7 +417,7 @@ fn blob_tree_get(c: &mut Criterion) { c.bench_function("blob tree get", |b| { b.iter(|| { - tree.get("mykey", None).unwrap().unwrap(); + tree.get("mykey", SeqNo::MAX).unwrap().unwrap(); }); }); } From 2410660ea36448b48ff9e9d074ee162dda16b945 Mon Sep 17 00:00:00 2001 From: Kunal Mohan Date: Tue, 21 Apr 2026 22:40:06 +0530 Subject: [PATCH 3/5] address review comments --- benches/tree.rs | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/benches/tree.rs b/benches/tree.rs index 99c50ff7d..262d6248e 100644 --- a/benches/tree.rs +++ b/benches/tree.rs @@ -14,7 +14,7 @@ fn full_scan(c: &mut Criterion) { let path = tempdir().unwrap(); let tree = Config::new( - path, + path.path(), SequenceNumberCounter::default(), SequenceNumberCounter::default(), ) @@ -39,7 +39,7 @@ fn full_scan(c: &mut Criterion) { let path = tempdir().unwrap(); let tree = Config::new( - path, + path.path(), SequenceNumberCounter::default(), SequenceNumberCounter::default(), ) @@ -72,7 +72,7 @@ fn scan_vs_query(c: &mut Criterion) { let path = tempdir().unwrap(); let tree = Config::new( - path, + path.path(), SequenceNumberCounter::default(), SequenceNumberCounter::default(), ) @@ -143,7 +143,7 @@ fn scan_vs_prefix(c: &mut Criterion) { let path = tempdir().unwrap(); let tree = Config::new( - path, + path.path(), SequenceNumberCounter::default(), SequenceNumberCounter::default(), ) @@ -204,7 +204,7 @@ fn tree_get_pairs(c: &mut Criterion) { { let folder = tempfile::tempdir().unwrap(); let tree = Config::new( - folder, + folder.path(), SequenceNumberCounter::default(), SequenceNumberCounter::default(), ) @@ -246,7 +246,7 @@ fn tree_get_pairs(c: &mut Criterion) { { let folder = tempfile::tempdir().unwrap(); let tree = Config::new( - folder, + folder.path(), SequenceNumberCounter::default(), SequenceNumberCounter::default(), ) @@ -293,7 +293,7 @@ fn disk_point_read(c: &mut Criterion) { let folder = tempdir().unwrap(); let tree = Config::new( - folder, + folder.path(), SequenceNumberCounter::default(), SequenceNumberCounter::default(), ) @@ -333,7 +333,7 @@ fn disjoint_tree_minmax(c: &mut Criterion) { let folder = tempfile::tempdir().unwrap(); let tree = Config::new( - folder, + folder.path(), SequenceNumberCounter::default(), SequenceNumberCounter::default(), ) @@ -414,6 +414,7 @@ fn blob_tree_get(c: &mut Criterion) { let value = b"powek5bowa".repeat(100); tree.insert("mykey", &value, 0); + tree.flush_active_memtable(0).unwrap(); c.bench_function("blob tree get", |b| { b.iter(|| { From ac8526585d88fa140b206807065cd31026a12852 Mon Sep 17 00:00:00 2001 From: Kunal Mohan Date: Fri, 8 May 2026 22:16:59 +0530 Subject: [PATCH 4/5] remove benches/level_manifest.rs --- benches/level_manifest.rs | 35 ----------------------------------- 1 file changed, 35 deletions(-) delete mode 100644 benches/level_manifest.rs diff --git a/benches/level_manifest.rs b/benches/level_manifest.rs deleted file mode 100644 index af7cdcc13..000000000 --- a/benches/level_manifest.rs +++ /dev/null @@ -1,35 +0,0 @@ -use criterion::{criterion_group, criterion_main, Criterion}; -use lsm_tree::{config::BlockSizePolicy, AbstractTree, Config, SequenceNumberCounter}; - -fn iterate_segments(c: &mut Criterion) { - let mut group = c.benchmark_group("Iterate level manifest"); - group.sample_size(10); - - std::fs::create_dir_all(".bench").unwrap(); - - for segment_count in [0, 1, 5, 10, 100, 500, 1_000, 2_000, 4_000] { - group.bench_function(format!("iterate {segment_count} segments"), |b| { - let folder = tempfile::tempdir_in(".bench").unwrap(); - let tree = Config::new( - folder, - SequenceNumberCounter::default(), - SequenceNumberCounter::default(), - ) - .data_block_size_policy(BlockSizePolicy::all(1_024)) - .open() - .unwrap(); - - for x in 0_u64..segment_count { - tree.insert("a", "b", x); - tree.flush_active_memtable(0).unwrap(); - } - - b.iter(|| { - assert_eq!(tree.table_count(), segment_count as usize); - }); - }); - } -} - -criterion_group!(benches, iterate_segments); -criterion_main!(benches); From 9396cf22a34325908304831288519c0d4df13a6c Mon Sep 17 00:00:00 2001 From: Kunal Mohan Date: Fri, 8 May 2026 22:19:36 +0530 Subject: [PATCH 5/5] remove file reference from cargo.toml --- Cargo.toml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 6065c6a40..0aa4c26d3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -79,12 +79,6 @@ harness = false path = "benches/tree.rs" required-features = ["lz4"] -[[bench]] -name = "level_manifest" -harness = false -path = "benches/level_manifest.rs" -required-features = [] - [[bench]] name = "fd_table" harness = false