From bbe2a1a30c9914b024f019baad0ade1cd0c2eeec Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Tue, 24 Mar 2026 21:12:17 +0200 Subject: [PATCH 01/13] feat(error): add RouteMismatch error for route-compatibility on reopen MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add Error::RouteMismatch variant returned when recovery finds fewer tables than expected AND level_routes is configured, distinguishing misconfiguration from actual data corruption (Unrecoverable) - Re-enable blocked_bloom filter module (was disabled via ._rs rename) with proper clippy expect annotations - Fix never-looping for loops in prop_mvcc and prop_range_tombstone oracles (replace with iterator .next()) - Update benchmarks for current API: Config::new(3 args), Cache (was BlockCache), use_cache (was block_cache), SeqNo params, IterGuardImpl iteration pattern - Stub out benchmarks for removed internal types (ValueBlock, TopLevelIndex, binary_search) - Add #[allow] for clippy lints in #[cfg(test)] modules - Fix map_or → is_none_or and &[key] → [key] in test code Closes #164 --- benches/block.rs | 203 +---------------- benches/level_manifest.rs | 83 +------ benches/partition_point.rs | 32 +-- benches/tli.rs | 54 +---- benches/tree.rs | 213 ++++++++++-------- src/compaction/leveled/mod.rs | 6 + src/compaction/leveled/test.rs | 8 +- src/compaction/stream.rs | 1 + src/compression.rs | 1 + src/error.rs | 17 ++ src/file.rs | 1 + src/fs/std_fs.rs | 1 + src/memtable/arena.rs | 1 + src/mvcc_stream.rs | 1 + src/prefix.rs | 1 + src/table/block/mod.rs | 1 + src/table/block_index/iter.rs | 1 + .../blocked_bloom/{builder._rs => builder.rs} | 18 ++ .../filter/blocked_bloom/{mod._rs => mod.rs} | 22 +- src/table/filter/mod.rs | 2 +- src/table/meta.rs | 1 + src/table/mod.rs | 6 + src/table/multi_writer.rs | 1 + src/table/tests.rs | 2 +- src/tree/mod.rs | 15 ++ src/version/run.rs | 2 +- src/vlog/blob_file/reader.rs | 2 +- src/vlog/blob_file/scanner.rs | 2 +- tests/level_routing.rs | 43 ++++ tests/prop_mvcc.rs | 12 +- tests/prop_range_tombstone.rs | 4 +- 31 files changed, 284 insertions(+), 473 deletions(-) rename src/table/filter/blocked_bloom/{builder._rs => builder.rs} (87%) rename src/table/filter/blocked_bloom/{mod._rs => mod.rs} (91%) diff --git a/benches/block.rs b/benches/block.rs index 0e02eff7a..9ecc06f7b 100644 --- a/benches/block.rs +++ b/benches/block.rs @@ -1,200 +1,11 @@ -use criterion::{criterion_group, criterion_main, Criterion}; -use lsm_tree::{ - coding::Encode, - table::{ - block::{header::Header as BlockHeader, offset::BlockOffset, ItemSize}, - meta::CompressionType, - value_block::ValueBlock, - }, - Checksum, InternalValue, -}; -use rand::Rng; -use std::io::Write; - -/* fn value_block_size(c: &mut Criterion) { - let mut group = c.benchmark_group("ValueBlock::size"); - - for item_count in [10, 100, 1_000] { - group.bench_function(format!("{item_count} items"), |b| { - let items = (0..item_count) - .map(|_| { - InternalValue::from_components( - "a".repeat(16).as_bytes(), - "a".repeat(100).as_bytes(), - 63, - lsm_tree::ValueType::Value, - ) - }) - .collect(); - - let block = ValueBlock { - items, - header: BlockHeader { - compression: CompressionType::Lz4, - checksum: Checksum::from_raw(0), - data_length: 0, - previous_block_offset: 0, - uncompressed_length: 0, - }, - }; - - b.iter(|| { - (&*block.items).size(); - }) - }); - } -} */ - -fn value_block_find(c: &mut Criterion) { - let mut group = c.benchmark_group("ValueBlock::find_latest"); - - for item_count in [10, 100, 1_000, 10_000] { - let mut items = vec![]; - - for item in 0u64..item_count { - items.push(InternalValue::from_components( - item.to_be_bytes(), - b"", - 0, - lsm_tree::ValueType::Value, - )); - } - - let block = ValueBlock { - items: items.into_boxed_slice(), - header: BlockHeader { - compression: CompressionType::Lz4, - checksum: Checksum::from_raw(0), - data_length: 0, - previous_block_offset: BlockOffset(0), - uncompressed_length: 0, - }, - }; - - let mut rng = rand::rng(); - - group.bench_function(format!("{item_count} items (linear)"), |b| { - b.iter(|| { - let needle = rng.random_range(0..item_count).to_be_bytes(); - - let item = block - .items - .iter() - .find(|item| &*item.key.user_key == needle) - .cloned() - .unwrap(); - - assert_eq!(item.key.user_key, needle); - }) - }); - - group.bench_function(format!("{item_count} items (binary search)"), |b| { - b.iter(|| { - let needle = rng.random_range(0..item_count).to_be_bytes(); - - let item = block.get_latest(&needle).unwrap(); - assert_eq!(item.key.user_key, needle); - }) - }); - } -} - -fn encode_block(c: &mut Criterion) { - let mut group = c.benchmark_group("Encode block"); - - for comp_type in [CompressionType::None, CompressionType::Lz4] { - for block_size in [4, 8, 16, 32, 64, 128] { - let block_size = block_size * 1_024; - - let mut size = 0; +// TODO: These benchmarks used ValueBlock (now DataBlock) and other internal +// types that were restructured. DataBlock no longer exposes items/header +// directly — the block format changed to raw bytes with binary index. +// Needs rewrite against the current DataBlock API. - let mut items = vec![]; - - for x in 0u64.. { - let value = InternalValue::from_components( - x.to_be_bytes(), - x.to_string().repeat(50).as_bytes(), - 63, - lsm_tree::ValueType::Value, - ); - - size += value.size(); - - items.push(value); - - if size >= block_size { - break; - } - } - - group.bench_function(format!("{block_size} KiB [{comp_type}]"), |b| { - b.iter(|| { - // Serialize block - let (mut header, data) = - ValueBlock::to_bytes_compressed(&items, BlockOffset(0), comp_type).unwrap(); - }); - }); - } - } -} - -fn load_value_block_from_disk(c: &mut Criterion) { - let mut group = c.benchmark_group("Load block from disk"); - - for comp_type in [CompressionType::None, CompressionType::Lz4] { - for block_size in [4, 8, 16, 32, 64, 128] { - let block_size = block_size * 1_024; - - let mut size = 0; - - let mut items = vec![]; - - for x in 0u64.. { - let value = InternalValue::from_components( - x.to_be_bytes(), - x.to_string().repeat(50).as_bytes(), - 63, - lsm_tree::ValueType::Value, - ); - - size += value.size(); - - items.push(value); - - if size >= block_size { - break; - } - } - - // Serialize block - let (mut header, data) = - ValueBlock::to_bytes_compressed(&items, BlockOffset(0), comp_type).unwrap(); - - let mut file = tempfile::tempfile().unwrap(); - header.encode_into(&mut file).unwrap(); - file.write_all(&data).unwrap(); - - let expected_block = ValueBlock { - items: items.clone().into_boxed_slice(), - header, - }; - - group.bench_function(format!("{block_size} KiB [{comp_type}]"), |b| { - b.iter(|| { - let loaded_block = ValueBlock::from_file(&mut file, BlockOffset(0)).unwrap(); +use criterion::{criterion_group, criterion_main, Criterion}; - assert_eq!(loaded_block.items.len(), expected_block.items.len()); - assert_eq!(loaded_block.header.checksum, expected_block.header.checksum); - }); - }); - } - } -} +fn placeholder(_c: &mut Criterion) {} -criterion_group!( - benches, - encode_block, - value_block_find, - load_value_block_from_disk, -); +criterion_group!(benches, placeholder); criterion_main!(benches); diff --git a/benches/level_manifest.rs b/benches/level_manifest.rs index 9b6896da7..337ba1725 100644 --- a/benches/level_manifest.rs +++ b/benches/level_manifest.rs @@ -1,80 +1,11 @@ -use criterion::{criterion_group, criterion_main, Criterion}; -use lsm_tree::{AbstractTree, Config}; - -fn iterate_segments(c: &mut Criterion) { - let mut group = c.benchmark_group("Iterate level manifest"); - group.sample_size(10); - - std::fs::create_dir_all(".bench").unwrap(); - - for segment_count in [0, 1, 10, 100, 500, 1_000] { - group.bench_function(format!("iterate {segment_count} segments"), |b| { - let folder = tempfile::tempdir_in(".bench").unwrap(); - let tree = Config::new(folder).data_block_size(1_024).open().unwrap(); - - for x in 0_u64..segment_count { - tree.insert("a", "b", x); - tree.flush_active_memtable(0).unwrap(); - } - - let levels = tree.levels.read().unwrap(); - - b.iter(|| { - assert_eq!(levels.iter().count(), segment_count as usize); - }); - }); - } -} - -fn find_segment(c: &mut Criterion) { - let mut group = c.benchmark_group("Find segment in disjoint level"); - group.sample_size(10); +// TODO: These benchmarks accessed `tree.levels` field directly which is no +// longer public. Level iteration moved to `Version::iter_levels()` which is +// internal. Needs rewrite to benchmark level manifest performance through +// the public API. - std::fs::create_dir_all(".bench").unwrap(); - - for segment_count in [1u16, 2, 3, 4, 5, 10, 100, 1_000] { - let folder = tempfile::tempdir_in(".bench").unwrap(); - let tree = Config::new(folder).data_block_size(1_024).open().unwrap(); - - for x in 0..segment_count { - tree.insert(x.to_be_bytes(), "", x.into()); - tree.flush_active_memtable(0).unwrap(); - } - - let key = (segment_count / 2).to_be_bytes(); - - group.bench_function( - format!("find segment in {segment_count} segments - binary search"), - |b| { - let levels = tree.levels.read().unwrap(); - let first_level = levels.levels.first().expect("should exist"); - - b.iter(|| { - first_level - .as_disjoint() - .expect("should be disjoint") - .get_segment_containing_key(&key) - .expect("should exist") - }); - }, - ); - - group.bench_function( - format!("find segment in {segment_count} segments - linear search"), - |b| { - let levels = tree.levels.read().unwrap(); - let first_level = levels.levels.first().expect("should exist"); +use criterion::{criterion_group, criterion_main, Criterion}; - b.iter(|| { - first_level - .iter() - .find(|x| x.metadata.key_range.contains_key(&key)) - .expect("should exist"); - }); - }, - ); - } -} +fn placeholder(_c: &mut Criterion) {} -criterion_group!(benches, iterate_segments, find_segment); +criterion_group!(benches, placeholder); criterion_main!(benches); diff --git a/benches/partition_point.rs b/benches/partition_point.rs index dbbe8382a..46ba0612c 100644 --- a/benches/partition_point.rs +++ b/benches/partition_point.rs @@ -1,30 +1,10 @@ -use criterion::{criterion_group, criterion_main, Criterion}; -use lsm_tree::binary_search::partition_point; -use rand::Rng; - -fn bench_partition_point(c: &mut Criterion) { - let mut group = c.benchmark_group("partition_point"); - - let mut rng = rand::rng(); +// TODO: The binary_search module was removed from public API. +// partition_point is now an internal utility. Rewrite needed if +// benchmarking custom binary search is still desired. - for item_count in [10, 100, 1_000, 10_000, 100_000, 1_000_000] { - let items = (0..item_count).collect::>(); - - group.bench_function(format!("native {item_count}"), |b| { - b.iter(|| { - let needle = rng.random_range(0..item_count); - items.partition_point(|&x| x <= needle) - }) - }); +use criterion::{criterion_group, criterion_main, Criterion}; - group.bench_function(format!("rewrite {item_count}"), |b| { - b.iter(|| { - let needle = rng.random_range(0..item_count); - partition_point(&items, |&x| x <= needle) - }) - }); - } -} +fn placeholder(_c: &mut Criterion) {} -criterion_group!(benches, bench_partition_point); +criterion_group!(benches, placeholder); criterion_main!(benches); diff --git a/benches/tli.rs b/benches/tli.rs index 1b9dea651..aaf8db744 100644 --- a/benches/tli.rs +++ b/benches/tli.rs @@ -1,51 +1,11 @@ -use criterion::{criterion_group, criterion_main, Criterion}; -use lsm_tree::table::{ - block::offset::BlockOffset, block_index::KeyedBlockIndex, value_block::CachePolicy, -}; -use rand::Rng; - -fn tli_find_item(c: &mut Criterion) { - use lsm_tree::table::block_index::{block_handle::KeyedBlockHandle, top_level::TopLevelIndex}; - - let mut group = c.benchmark_group("TLI find item"); - - for item_count in [10u64, 100, 1_000, 10_000, 25_000, 100_000] { - let items = { - let mut items = Vec::with_capacity(item_count as usize); - - for x in 0..item_count { - items.push(KeyedBlockHandle { - end_key: x.to_be_bytes().into(), - offset: BlockOffset(x), - }); - } +// TODO: This benchmark used TopLevelIndex and KeyedBlockIndex which were +// removed during the block_index → index_block restructuring. The index_block +// module is pub(crate) and does not expose TopLevelIndex externally. +// Needs rewrite against the current IndexBlock/KeyedBlockHandle API. - items - }; - - let index = TopLevelIndex::from_boxed_slice(items.into()); - - let mut rng = rand::rng(); - - group.bench_function( - format!("TLI get_block_containing_item ({item_count} items)"), - |b| { - b.iter(|| { - let needle = rng.random_range(0..item_count).to_be_bytes(); +use criterion::{criterion_group, criterion_main, Criterion}; - assert_eq!( - needle, - &*index - .get_lowest_block_containing_key(&needle, CachePolicy::Read) - .unwrap() - .unwrap() - .end_key, - ); - }) - }, - ); - } -} +fn placeholder(_c: &mut Criterion) {} -criterion_group!(benches, tli_find_item); +criterion_group!(benches, placeholder); criterion_main!(benches); diff --git a/benches/tree.rs b/benches/tree.rs index b20bb2e6b..4cc5c1f46 100644 --- a/benches/tree.rs +++ b/benches/tree.rs @@ -1,8 +1,10 @@ use criterion::{criterion_group, criterion_main, Criterion}; -use lsm_tree::{AbstractTree, BlockCache, Config}; +use lsm_tree::{AbstractTree, Cache, Config, Guard, SeqNo, SequenceNumberCounter}; use std::sync::Arc; use tempfile::tempdir; +const MAX: SeqNo = SeqNo::MAX; + fn full_scan(c: &mut Criterion) { let mut group = c.benchmark_group("scan all"); group.sample_size(10); @@ -11,10 +13,14 @@ fn full_scan(c: &mut Criterion) { group.bench_function(format!("scan all uncached, {item_count} items"), |b| { let path = tempdir().unwrap(); - let tree = Config::new(path) - .block_cache(BlockCache::with_capacity_bytes(0).into()) - .open() - .unwrap(); + let tree = Config::new( + path, + SequenceNumberCounter::default(), + SequenceNumberCounter::default(), + ) + .use_cache(Cache::with_capacity_bytes(0).into()) + .open() + .unwrap(); for x in 0_u32..item_count { let key = x.to_be_bytes(); @@ -25,17 +31,21 @@ fn full_scan(c: &mut Criterion) { tree.flush_active_memtable(0).unwrap(); b.iter(|| { - assert_eq!(tree.len(None, None).unwrap(), item_count as usize); + assert_eq!(tree.len(MAX, None).unwrap(), item_count as usize); }) }); group.bench_function(format!("scan all cached, {item_count} items"), |b| { let path = tempdir().unwrap(); - let tree = Config::new(path) - .block_cache(BlockCache::with_capacity_bytes(100_000_000).into()) - .open() - .unwrap(); + let tree = Config::new( + path, + SequenceNumberCounter::default(), + SequenceNumberCounter::default(), + ) + .use_cache(Cache::with_capacity_bytes(100_000_000).into()) + .open() + .unwrap(); for x in 0_u32..item_count { let key = x.to_be_bytes(); @@ -44,10 +54,10 @@ fn full_scan(c: &mut Criterion) { } tree.flush_active_memtable(0).unwrap(); - assert_eq!(tree.len(None, None).unwrap(), item_count as usize); + assert_eq!(tree.len(MAX, None).unwrap(), item_count as usize); b.iter(|| { - assert_eq!(tree.len(None, None).unwrap(), item_count as usize); + assert_eq!(tree.len(MAX, None).unwrap(), item_count as usize); }) }); } @@ -65,10 +75,14 @@ fn scan_vs_query(c: &mut Criterion) { let path = tempdir().unwrap(); - let tree = Config::new(path) - .block_cache(BlockCache::with_capacity_bytes(0).into()) - .open() - .unwrap(); + let tree = Config::new( + path, + SequenceNumberCounter::default(), + SequenceNumberCounter::default(), + ) + .use_cache(Cache::with_capacity_bytes(0).into()) + .open() + .unwrap(); for x in 0..size as u64 { let key = x.to_be_bytes().to_vec(); @@ -77,22 +91,19 @@ fn scan_vs_query(c: &mut Criterion) { } tree.flush_active_memtable(0).unwrap(); - assert_eq!(tree.len(None, None).unwrap(), size); + assert_eq!(tree.len(MAX, None).unwrap(), size); group.sample_size(10); group.bench_function(format!("scan {} (uncached)", size), |b| { b.iter(|| { - let iter = tree.iter(None, None); - let iter = iter.into_iter(); - let count = iter - .filter(|x| match x { - Ok((key, _)) => { - let buf = &key[..8]; - let (int_bytes, _rest) = buf.split_at(std::mem::size_of::()); - let num = u64::from_be_bytes(int_bytes.try_into().unwrap()); - (query_start..query_end).contains(&num) - } - Err(_) => false, + let count = tree + .iter(MAX, None) + .filter_map(|guard| guard.into_inner().ok()) + .filter(|(key, _)| { + let buf = &key[..8]; + let (int_bytes, _rest) = buf.split_at(std::mem::size_of::()); + let num = u64::from_be_bytes(int_bytes.try_into().unwrap()); + (query_start..query_end).contains(&num) }) .count(); assert_eq!(count, 10); @@ -105,10 +116,9 @@ fn scan_vs_query(c: &mut Criterion) { Included(query_start.to_be_bytes().to_vec()), Excluded(query_end.to_be_bytes().to_vec()), ), - None, + MAX, None, ); - let iter = iter.into_iter(); assert_eq!(iter.count(), 10); }) }); @@ -119,10 +129,9 @@ fn scan_vs_query(c: &mut Criterion) { Included(query_start.to_be_bytes().to_vec()), Excluded(query_end.to_be_bytes().to_vec()), ), - None, + MAX, None, ); - let iter = iter.into_iter(); assert_eq!(iter.rev().count(), 10); }) }); @@ -135,10 +144,14 @@ fn scan_vs_prefix(c: &mut Criterion) { for size in [10_000, 100_000] { let path = tempdir().unwrap(); - let tree = Config::new(path) - .block_cache(BlockCache::with_capacity_bytes(0).into()) - .open() - .unwrap(); + let tree = Config::new( + path, + SequenceNumberCounter::default(), + SequenceNumberCounter::default(), + ) + .use_cache(Cache::with_capacity_bytes(0).into()) + .open() + .unwrap(); for _ in 0..size { let key = nanoid::nanoid!(); @@ -155,28 +168,28 @@ fn scan_vs_prefix(c: &mut Criterion) { } tree.flush_active_memtable(0).unwrap(); - assert_eq!(tree.len(None, None).unwrap() as u64, size + 10); + assert_eq!(tree.len(MAX, None).unwrap() as u64, size + 10); group.sample_size(10); group.bench_function(format!("scan {} (uncached)", size), |b| { b.iter(|| { - let iter = tree.iter(None, None); - let iter = iter.filter(|x| match x { - Ok((key, _)) => key.starts_with(prefix.as_bytes()), - Err(_) => false, - }); - assert_eq!(iter.count(), 10); + let count = tree + .iter(MAX, None) + .filter_map(|guard| guard.into_inner().ok()) + .filter(|(key, _)| key.starts_with(prefix.as_bytes())) + .count(); + assert_eq!(count, 10); }); }); group.bench_function(format!("prefix {} (uncached)", size), |b| { b.iter(|| { - let iter = tree.prefix(prefix, None, None); + let iter = tree.prefix(prefix, MAX, None); assert_eq!(iter.count(), 10); }); }); group.bench_function(format!("prefix rev {} (uncached)", size), |b| { b.iter(|| { - let iter = tree.prefix(prefix, None, None); + let iter = tree.prefix(prefix, MAX, None); assert_eq!(iter.rev().count(), 10); }); }); @@ -190,11 +203,14 @@ fn tree_get_pairs(c: &mut Criterion) { for segment_count in [1, 4, 16, 64, 128] { { let folder = tempfile::tempdir().unwrap(); - let tree = Config::new(folder) - .data_block_size(1_024) - .block_cache(Arc::new(BlockCache::with_capacity_bytes(0))) - .open() - .unwrap(); + let tree = Config::new( + folder, + SequenceNumberCounter::default(), + SequenceNumberCounter::default(), + ) + .use_cache(Arc::new(Cache::with_capacity_bytes(0))) + .open() + .unwrap(); let mut x = 0_u64; @@ -211,7 +227,8 @@ fn tree_get_pairs(c: &mut Criterion) { &format!("Tree::first_key_value (disjoint), {segment_count} segments"), |b| { b.iter(|| { - assert!(tree.first_key_value(None, None).unwrap().is_some()); + let guard = tree.first_key_value(MAX, None).unwrap(); + assert!(guard.into_inner().is_ok()); }); }, ); @@ -220,7 +237,8 @@ fn tree_get_pairs(c: &mut Criterion) { &format!("Tree::last_key_value (disjoint), {segment_count} segments"), |b| { b.iter(|| { - assert!(tree.last_key_value(None, None).unwrap().is_some()); + let guard = tree.last_key_value(MAX, None).unwrap(); + assert!(guard.into_inner().is_ok()); }); }, ); @@ -228,11 +246,14 @@ fn tree_get_pairs(c: &mut Criterion) { { let folder = tempfile::tempdir().unwrap(); - let tree = Config::new(folder) - .data_block_size(1_024) - .block_cache(Arc::new(BlockCache::with_capacity_bytes(0))) - .open() - .unwrap(); + let tree = Config::new( + folder, + SequenceNumberCounter::default(), + SequenceNumberCounter::default(), + ) + .use_cache(Arc::new(Cache::with_capacity_bytes(0))) + .open() + .unwrap(); let mut x = 0_u64; @@ -251,7 +272,8 @@ fn tree_get_pairs(c: &mut Criterion) { &format!("Tree::first_key_value (non-disjoint), {segment_count} segments"), |b| { b.iter(|| { - assert!(tree.first_key_value(None, None).unwrap().is_some()); + let guard = tree.first_key_value(MAX, None).unwrap(); + assert!(guard.into_inner().is_ok()); }); }, ); @@ -260,7 +282,8 @@ fn tree_get_pairs(c: &mut Criterion) { &format!("Tree::last_key_value (non-disjoint), {segment_count} segments"), |b| { b.iter(|| { - assert!(tree.last_key_value(None, None).unwrap().is_some()); + let guard = tree.last_key_value(MAX, None).unwrap(); + assert!(guard.into_inner().is_ok()); }); }, ); @@ -271,11 +294,14 @@ fn tree_get_pairs(c: &mut Criterion) { fn disk_point_read(c: &mut Criterion) { let folder = tempdir().unwrap(); - let tree = Config::new(folder) - .data_block_size(1_024) - .block_cache(Arc::new(BlockCache::with_capacity_bytes(0))) - .open() - .unwrap(); + let tree = Config::new( + folder, + SequenceNumberCounter::default(), + SequenceNumberCounter::default(), + ) + .use_cache(Arc::new(Cache::with_capacity_bytes(0))) + .open() + .unwrap(); for seqno in 0..5 { tree.insert("a", "b", seqno); @@ -291,17 +317,12 @@ fn disk_point_read(c: &mut Criterion) { let tree = tree.clone(); b.iter(|| { - tree.get("a", None).unwrap().unwrap(); + tree.get("a", MAX).unwrap().unwrap(); }); }); - c.bench_function("point read w/ seqno latest (uncached)", |b| { - let snapshot = tree.snapshot(5); - - b.iter(|| { - snapshot.get("a").unwrap().unwrap(); - }); - }); + // TODO: snapshot() API was removed — re-enable when snapshot + // benchmarking is possible through the public API. } fn disjoint_tree_minmax(c: &mut Criterion) { @@ -309,11 +330,14 @@ fn disjoint_tree_minmax(c: &mut Criterion) { let folder = tempfile::tempdir().unwrap(); - let tree = Config::new(folder) - .data_block_size(1_024) - .block_cache(Arc::new(BlockCache::with_capacity_bytes(0))) - .open() - .unwrap(); + let tree = Config::new( + folder, + SequenceNumberCounter::default(), + SequenceNumberCounter::default(), + ) + .use_cache(Arc::new(Cache::with_capacity_bytes(0))) + .open() + .unwrap(); tree.insert("a", "a", 0); tree.flush_active_memtable(0).unwrap(); @@ -350,42 +374,35 @@ fn disjoint_tree_minmax(c: &mut Criterion) { group.bench_function("Tree::first_key_value".to_string(), |b| { b.iter(|| { - assert_eq!(&*tree.first_key_value(None, None).unwrap().unwrap().1, b"a"); + let (_, v) = tree + .first_key_value(MAX, None) + .unwrap() + .into_inner() + .unwrap(); + assert_eq!(&*v, b"a"); }); }); group.bench_function("Tree::last_key_value".to_string(), |b| { b.iter(|| { - assert_eq!(&*tree.last_key_value(None, None).unwrap().unwrap().1, b"g"); + let (_, v) = tree + .last_key_value(MAX, None) + .unwrap() + .into_inner() + .unwrap(); + assert_eq!(&*v, b"g"); }); }); } -fn blob_tree_get(c: &mut Criterion) { - let folder = tempfile::tempdir().unwrap(); - - let tree = Config::new(folder.path()) - .block_cache(BlockCache::with_capacity_bytes(0).into()) - .open_as_blob_tree() - .unwrap(); - - let value = b"powek5bowa".repeat(100); - - tree.insert("mykey", &value, 0); - - c.bench_function("blob tree get", |b| { - b.iter(|| { - tree.get("mykey", None).unwrap().unwrap(); - }); - }); -} +// TODO: blob_tree_get — open_as_blob_tree() was removed from Config. +// BlobTree is now created via kv_separation_opts. Rewrite needed. // TODO: benchmark point read disjoint vs non-disjoint level vs disjoint *tree* // TODO: benchmark .prefix().next() and .next_back(), disjoint and non-disjoint criterion_group!( benches, - blob_tree_get, disjoint_tree_minmax, disk_point_read, full_scan, diff --git a/src/compaction/leveled/mod.rs b/src/compaction/leveled/mod.rs index a2161f4c4..1b3c1b84c 100644 --- a/src/compaction/leveled/mod.rs +++ b/src/compaction/leveled/mod.rs @@ -3,6 +3,12 @@ // (found in the LICENSE-* files in the repository) #[cfg(test)] +#[allow( + clippy::unwrap_used, + clippy::indexing_slicing, + clippy::useless_vec, + clippy::unnecessary_map_or +)] mod test; use super::{Choice, CompactionStrategy, Input as CompactionInput}; diff --git a/src/compaction/leveled/test.rs b/src/compaction/leveled/test.rs index 91b15e034..bf589c5fc 100644 --- a/src/compaction/leveled/test.rs +++ b/src/compaction/leveled/test.rs @@ -105,9 +105,7 @@ fn leveled_intra_l0_compaction() -> crate::Result<()> { // Verify data stayed in L0 (not pushed to L1) assert!( - tree.current_version() - .level(1) - .map_or(true, |l| l.is_empty()), + tree.current_version().level(1).is_none_or(|l| l.is_empty()), "L1 should remain empty after intra-L0 compaction" ); @@ -419,9 +417,7 @@ fn multi_level_no_skip_when_l1_has_room() -> crate::Result<()> { // Verify data went to L1 (not skipped to L2) — L2 should be empty assert!( - tree.current_version() - .level(2) - .map_or(true, |l| l.is_empty()), + tree.current_version().level(2).is_none_or(|l| l.is_empty()), "L2 should remain empty when L1 has room (no multi-level skip)", ); diff --git a/src/compaction/stream.rs b/src/compaction/stream.rs index 826bdb3d5..41ee48109 100644 --- a/src/compaction/stream.rs +++ b/src/compaction/stream.rs @@ -406,6 +406,7 @@ impl<'a, I: Iterator, F: StreamFilter + 'a> Iterator for Compaction } #[cfg(test)] +#[allow(clippy::unwrap_used, clippy::indexing_slicing, clippy::useless_vec)] mod tests { use super::*; use crate::{value::InternalValue, ValueType}; diff --git a/src/compression.rs b/src/compression.rs index 026dbf569..21e6af1f1 100644 --- a/src/compression.rs +++ b/src/compression.rs @@ -284,6 +284,7 @@ impl Decode for CompressionType { } #[cfg(test)] +#[allow(clippy::unwrap_used, clippy::indexing_slicing, clippy::useless_vec)] mod tests { use super::*; use test_log::test; diff --git a/src/error.rs b/src/error.rs index af3bc1e26..e190a636d 100644 --- a/src/error.rs +++ b/src/error.rs @@ -106,6 +106,23 @@ pub enum Error { /// (captured before reading bytes for that field). offset: u64, }, + + /// Route-compatibility mismatch on reopen. + /// + /// The current [`level_routes`](crate::Config::level_routes) configuration + /// does not cover all table folders that were used when the tree was last + /// written. Recovery found fewer tables on disk than the manifest expects, + /// which means some previously routed directories are no longer reachable. + /// + /// Unlike [`Unrecoverable`](Self::Unrecoverable), this is a configuration + /// error, not data corruption — re-adding the missing route(s) will fix it. + RouteMismatch { + /// Number of tables listed in the manifest. + expected: usize, + + /// Number of tables actually found across all configured routes. + found: usize, + }, } impl std::fmt::Display for Error { diff --git a/src/file.rs b/src/file.rs index b530ff81c..034a159d1 100644 --- a/src/file.rs +++ b/src/file.rs @@ -94,6 +94,7 @@ pub fn fsync_directory(_path: &Path, _fs: &dyn Fs) -> std::io::Result<()> { } #[cfg(test)] +#[allow(clippy::unwrap_used, clippy::indexing_slicing, clippy::useless_vec)] mod tests { use super::*; use crate::fs::StdFs; diff --git a/src/fs/std_fs.rs b/src/fs/std_fs.rs index 205b7ebd1..9a6815cbb 100644 --- a/src/fs/std_fs.rs +++ b/src/fs/std_fs.rs @@ -311,6 +311,7 @@ mod sys { // --------------------------------------------------------------------------- #[cfg(test)] +#[allow(clippy::unwrap_used, clippy::indexing_slicing, clippy::useless_vec)] mod tests { use super::*; use std::io::{Read, Write}; diff --git a/src/memtable/arena.rs b/src/memtable/arena.rs index 71c5a610e..ded994092 100644 --- a/src/memtable/arena.rs +++ b/src/memtable/arena.rs @@ -303,6 +303,7 @@ impl Drop for Arena { #[cfg(test)] #[expect(clippy::expect_used, reason = "tests use expect for brevity")] +#[allow(clippy::unwrap_used, clippy::indexing_slicing, clippy::useless_vec)] mod tests { use super::*; diff --git a/src/mvcc_stream.rs b/src/mvcc_stream.rs index f98d97f3c..aaea28913 100644 --- a/src/mvcc_stream.rs +++ b/src/mvcc_stream.rs @@ -324,6 +324,7 @@ impl>> DoubleEndedIte #[cfg(test)] #[expect(clippy::string_lit_as_bytes)] +#[allow(clippy::unwrap_used, clippy::indexing_slicing, clippy::useless_vec)] mod tests { use super::*; use crate::{value::InternalValue, ValueType}; diff --git a/src/prefix.rs b/src/prefix.rs index b912db475..abbd690de 100644 --- a/src/prefix.rs +++ b/src/prefix.rs @@ -121,6 +121,7 @@ pub fn compute_prefix_hash( } #[cfg(test)] +#[allow(clippy::unwrap_used, clippy::indexing_slicing, clippy::useless_vec)] mod tests { use super::*; // Shadows std's #[test] with test_log's version for structured logging. diff --git a/src/table/block/mod.rs b/src/table/block/mod.rs index caa943063..5c5160563 100644 --- a/src/table/block/mod.rs +++ b/src/table/block/mod.rs @@ -717,6 +717,7 @@ impl Block { } #[cfg(test)] +#[allow(clippy::unwrap_used, clippy::indexing_slicing, clippy::useless_vec)] mod tests { use super::*; use test_log::test; diff --git a/src/table/block_index/iter.rs b/src/table/block_index/iter.rs index 9b87dd966..468c0b333 100644 --- a/src/table/block_index/iter.rs +++ b/src/table/block_index/iter.rs @@ -88,6 +88,7 @@ impl DoubleEndedIterator for OwnedIndexBlockIter { } #[cfg(test)] +#[allow(clippy::unwrap_used, clippy::indexing_slicing, clippy::useless_vec)] mod tests { use super::*; use crate::{ diff --git a/src/table/filter/blocked_bloom/builder._rs b/src/table/filter/blocked_bloom/builder.rs similarity index 87% rename from src/table/filter/blocked_bloom/builder._rs rename to src/table/filter/blocked_bloom/builder.rs index 4aac77c8b..77c66de00 100644 --- a/src/table/filter/blocked_bloom/builder._rs +++ b/src/table/filter/blocked_bloom/builder.rs @@ -53,6 +53,12 @@ impl Builder { /// Constructs a bloom filter that can hold `n` items /// while maintaining a certain false positive rate `fpr`. + #[expect( + clippy::cast_precision_loss, + clippy::cast_possible_truncation, + clippy::cast_sign_loss, + reason = "truncation and precision loss are fine because this is a probabilistic filter sizing" + )] #[must_use] pub fn with_fp_rate(n: usize, fpr: f32) -> Self { use std::f32::consts::LN_2; @@ -88,6 +94,12 @@ impl Builder { /// with `bpk` bits per key. /// /// 10 bits per key is a sensible default. + #[expect( + clippy::cast_precision_loss, + clippy::cast_possible_truncation, + clippy::cast_sign_loss, + reason = "truncation and precision loss are fine because this is a probabilistic filter sizing" + )] #[must_use] pub fn with_bpk(n: usize, bpk: u8) -> Self { use std::f32::consts::LN_2; @@ -109,6 +121,12 @@ impl Builder { } } + #[expect( + clippy::cast_precision_loss, + clippy::cast_possible_truncation, + clippy::cast_sign_loss, + reason = "truncation and precision loss are fine because this is a probabilistic filter sizing" + )] fn calculate_m(n: usize, fp_rate: f32) -> usize { use std::f32::consts::LN_2; diff --git a/src/table/filter/blocked_bloom/mod._rs b/src/table/filter/blocked_bloom/mod.rs similarity index 91% rename from src/table/filter/blocked_bloom/mod._rs rename to src/table/filter/blocked_bloom/mod.rs index da0fb689a..58089eed7 100644 --- a/src/table/filter/blocked_bloom/mod._rs +++ b/src/table/filter/blocked_bloom/mod.rs @@ -37,6 +37,14 @@ pub struct BlockedBloomFilterReader<'a> { } impl<'a> BlockedBloomFilterReader<'a> { + #[expect( + clippy::cast_possible_truncation, + reason = "bloom filter metadata (num_blocks, k, offset) always fits in usize" + )] + #[expect( + clippy::expect_used, + reason = "offset is always within slice bounds after reading the fixed-size header" + )] pub fn new(slice: &'a [u8]) -> crate::Result { let mut reader = Cursor::new(slice); @@ -74,20 +82,14 @@ impl<'a> BlockedBloomFilterReader<'a> { }) } - fn bytes(&self) -> &[u8] { - self.inner.bytes() - } - - /// Size of bloom filter in bytes - #[must_use] - fn len(&self) -> usize { - self.inner.bytes().len() - } - /// Returns `true` if the hash may be contained. /// /// Will never have a false negative. #[must_use] + #[expect( + clippy::cast_possible_truncation, + reason = "block_idx and bit_idx are bounded by filter dimensions which fit in usize" + )] pub fn contains_hash(&self, mut h1: u64) -> bool { let mut h2 = secondary_hash(h1); diff --git a/src/table/filter/mod.rs b/src/table/filter/mod.rs index 89d829174..d0b1d794c 100644 --- a/src/table/filter/mod.rs +++ b/src/table/filter/mod.rs @@ -4,7 +4,7 @@ pub mod bit_array; pub mod block; -// pub mod blocked_bloom; +pub mod blocked_bloom; pub mod standard_bloom; use standard_bloom::Builder as StandardBloomFilterBuilder; diff --git a/src/table/meta.rs b/src/table/meta.rs index 98d0e1dde..4dc2678a1 100644 --- a/src/table/meta.rs +++ b/src/table/meta.rs @@ -278,6 +278,7 @@ impl ParsedMeta { } #[cfg(test)] +#[allow(clippy::unwrap_used, clippy::indexing_slicing, clippy::useless_vec)] mod tests { use super::*; diff --git a/src/table/mod.rs b/src/table/mod.rs index 2529a5432..054932159 100644 --- a/src/table/mod.rs +++ b/src/table/mod.rs @@ -18,6 +18,12 @@ pub mod util; pub mod writer; #[cfg(test)] +#[allow( + clippy::unwrap_used, + clippy::indexing_slicing, + clippy::useless_vec, + clippy::needless_borrows_for_generic_args +)] mod tests; pub use block::{Block, BlockOffset}; diff --git a/src/table/multi_writer.rs b/src/table/multi_writer.rs index 6fd29ab66..446ab5f1e 100644 --- a/src/table/multi_writer.rs +++ b/src/table/multi_writer.rs @@ -497,6 +497,7 @@ impl MultiWriter { } #[cfg(test)] +#[allow(clippy::unwrap_used, clippy::indexing_slicing, clippy::useless_vec)] mod tests { use crate::{config::CompressionPolicy, AbstractTree, Config, SeqNo, SequenceNumberCounter}; use test_log::test; diff --git a/src/table/tests.rs b/src/table/tests.rs index 8ba6bc2e3..6d8890932 100644 --- a/src/table/tests.rs +++ b/src/table/tests.rs @@ -1762,7 +1762,7 @@ fn meta_seqno_kv_max_corruption_returns_invalid_data() -> crate::Result<()> { let mut writer = Writer::new(file.clone(), 0, 0, Arc::new(StdFs))?; for (i, key) in (b'a'..=b'e').enumerate() { writer.write(InternalValue::from_components( - &[key], + [key], b"val", (i as u64) + 1, crate::ValueType::Value, diff --git a/src/tree/mod.rs b/src/tree/mod.rs index c6bf6bbe8..ab99982b8 100644 --- a/src/tree/mod.rs +++ b/src/tree/mod.rs @@ -1626,6 +1626,21 @@ impl Tree { } if tables.len() < cnt { + if config.level_routes.is_some() { + log::error!( + "Route mismatch: expected {} tables but found {} — \ + level_routes may not cover all previously used folders. \ + Missing table IDs: {:?}", + cnt, + tables.len(), + table_map.keys(), + ); + return Err(crate::Error::RouteMismatch { + expected: cnt, + found: tables.len(), + }); + } + log::error!( "Recovered less tables than expected: {:?}", table_map.keys(), diff --git a/src/version/run.rs b/src/version/run.rs index 64c3a2a4d..6bbbf4c5c 100644 --- a/src/version/run.rs +++ b/src/version/run.rs @@ -362,7 +362,7 @@ impl Run { } #[cfg(test)] -#[expect(clippy::unwrap_used)] +#[allow(clippy::unwrap_used, clippy::indexing_slicing, clippy::useless_vec)] mod tests { use super::*; use test_log::test; diff --git a/src/vlog/blob_file/reader.rs b/src/vlog/blob_file/reader.rs index 8b9674522..0f2fe25d7 100644 --- a/src/vlog/blob_file/reader.rs +++ b/src/vlog/blob_file/reader.rs @@ -243,7 +243,7 @@ impl<'a> Reader<'a> { } #[cfg(test)] -#[expect(clippy::unwrap_used)] +#[allow(clippy::unwrap_used, clippy::indexing_slicing, clippy::useless_vec)] mod tests { use super::*; use crate::fs::StdFs; diff --git a/src/vlog/blob_file/scanner.rs b/src/vlog/blob_file/scanner.rs index 0efca6ed5..b5bfbf3d8 100644 --- a/src/vlog/blob_file/scanner.rs +++ b/src/vlog/blob_file/scanner.rs @@ -198,7 +198,7 @@ impl Iterator for Scanner { } #[cfg(test)] -#[expect(clippy::unwrap_used)] +#[allow(clippy::unwrap_used, clippy::indexing_slicing, clippy::useless_vec)] mod tests { use super::*; use crate::{fs::StdFs, vlog::blob_file::writer::Writer as BlobFileWriter, Slice}; diff --git a/tests/level_routing.rs b/tests/level_routing.rs index 4fb3e590b..7c01e93d9 100644 --- a/tests/level_routing.rs +++ b/tests/level_routing.rs @@ -648,6 +648,49 @@ fn recovery_creates_missing_routed_tables_dir() -> lsm_tree::Result<()> { Ok(()) } +#[test] +fn reopen_missing_route_returns_route_mismatch() -> lsm_tree::Result<()> { + let dir = tempfile::tempdir()?; + + // Phase 1: write data with hot tier at L0-L1 + { + let config = three_tier_config(dir.path()); + let tree = config.open()?; + + tree.insert("a", "value_a", 0); + tree.insert("b", "value_b", 1); + tree.flush_active_memtable(0)?; + // Tables now live in the hot tier directory + } + + // Phase 2: reopen WITHOUT the hot tier route — tables are unreachable + { + let config = Config::new( + dir.path().join("primary"), + SequenceNumberCounter::default(), + SequenceNumberCounter::default(), + ) + .level_routes(vec![ + // Only warm tier; hot tier (0..2) is missing + LevelRoute { + levels: 2..5, + path: dir.path().join("warm"), + fs: Arc::new(StdFs), + }, + ]); + + match config.open() { + Err(lsm_tree::Error::RouteMismatch { expected, found }) => { + assert!(expected > found, "expected={expected} found={found}"); + } + Err(e) => panic!("expected RouteMismatch, got: {e:?}"), + Ok(_) => panic!("expected RouteMismatch error, but open succeeded"), + } + } + + Ok(()) +} + #[test] #[should_panic(expected = "empty or inverted level route range")] fn empty_range_panics() { diff --git a/tests/prop_mvcc.rs b/tests/prop_mvcc.rs index 4f39064d9..98ab99274 100644 --- a/tests/prop_mvcc.rs +++ b/tests/prop_mvcc.rs @@ -44,13 +44,11 @@ impl MvccOracle { let start = (key.to_vec(), Reverse(read_seqno - 1)); let end = (key.to_vec(), Reverse(0)); - for ((k, Reverse(_)), val) in self.data.range(start..=end) { - if k != key { - break; - } - return val.clone(); - } - None + self.data + .range(start..=end) + .next() + .filter(|((k, _), _)| k == key) + .and_then(|(_, val)| val.clone()) } // lsm-tree uses exclusive upper bound: entry_seqno < read_seqno. diff --git a/tests/prop_range_tombstone.rs b/tests/prop_range_tombstone.rs index eb90f8c2d..5c3ddc004 100644 --- a/tests/prop_range_tombstone.rs +++ b/tests/prop_range_tombstone.rs @@ -57,9 +57,9 @@ impl RtOracle { let start = (key.to_vec(), Reverse(read_seqno - 1)); let end = (key.to_vec(), Reverse(0)); - for ((k, Reverse(entry_seqno)), val) in self.data.range(start..=end) { + if let Some(((k, Reverse(entry_seqno)), val)) = self.data.range(start..=end).next() { if k != key { - break; + return None; } if self.is_range_deleted(key, *entry_seqno, read_seqno) { return None; From 8124db4206b712a9e76f0e7caf03d156d0bf705c Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Tue, 24 Mar 2026 21:18:19 +0200 Subject: [PATCH 02/13] test: cover Unrecoverable path when tables missing without routes Adds missing_table_without_routes_returns_unrecoverable test to cover the Unrecoverable branch (no level_routes configured but table file deleted), bringing patch coverage to 100%. --- tests/level_routing.rs | 46 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/tests/level_routing.rs b/tests/level_routing.rs index 7c01e93d9..b2f199fe6 100644 --- a/tests/level_routing.rs +++ b/tests/level_routing.rs @@ -691,6 +691,52 @@ fn reopen_missing_route_returns_route_mismatch() -> lsm_tree::Result<()> { Ok(()) } +#[test] +fn missing_table_without_routes_returns_unrecoverable() -> lsm_tree::Result<()> { + let dir = tempfile::tempdir()?; + + // Phase 1: write data without level_routes + { + let config = Config::new( + dir.path().join("primary"), + SequenceNumberCounter::default(), + SequenceNumberCounter::default(), + ); + let tree = config.open()?; + + tree.insert("a", "value_a", 0); + tree.insert("b", "value_b", 1); + tree.flush_active_memtable(0)?; + } + + // Phase 2: delete a table file to simulate corruption + let tables_dir = dir.path().join("primary").join("tables"); + for entry in std::fs::read_dir(&tables_dir)? { + let entry = entry?; + if entry.file_type()?.is_file() { + std::fs::remove_file(entry.path())?; + break; // remove just one + } + } + + // Phase 3: reopen without routes — should get Unrecoverable, not RouteMismatch + { + let config = Config::new( + dir.path().join("primary"), + SequenceNumberCounter::default(), + SequenceNumberCounter::default(), + ); + + match config.open() { + Err(lsm_tree::Error::Unrecoverable) => {} + Err(e) => panic!("expected Unrecoverable, got: {e:?}"), + Ok(_) => panic!("expected Unrecoverable error, but open succeeded"), + } + } + + Ok(()) +} + #[test] #[should_panic(expected = "empty or inverted level route range")] fn empty_range_panics() { From d0c81e764abd6fa56d5c42d66f1453abc37dd36a Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Tue, 24 Mar 2026 21:32:50 +0200 Subject: [PATCH 03/13] fix(error): tighten RouteMismatch detection to prevent masking corruption MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - RouteMismatch is now returned ONLY when all missing tables are on levels not covered by any current route (strongly suggests removed routes). If any missing table is on a level covered by a current route, return Unrecoverable (the SST was genuinely deleted/corrupted) - Soften RouteMismatch doc: "often indicates" instead of "this is" - Update level_routes reopen contract doc to mention RouteMismatch - Add regression test: deleted_sst_with_routes_configured_returns_unrecoverable - Convert #[allow] → #[expect] for clippy lint suppressions in 14 test modules - Remove 4 dead placeholder bench targets (block, level_manifest, partition_point, tli) and their Cargo.toml entries - Unify Cache wrapping to Arc::new() in bench/tree.rs --- Cargo.toml | 24 ------------------ benches/block.rs | 11 --------- benches/level_manifest.rs | 11 --------- benches/partition_point.rs | 10 -------- benches/tli.rs | 11 --------- benches/tree.rs | 8 +++--- src/compaction/stream.rs | 7 +++++- src/compression.rs | 7 +++++- src/config/mod.rs | 3 ++- src/error.rs | 6 +++-- src/file.rs | 7 +++++- src/fs/std_fs.rs | 7 +++++- src/memtable/arena.rs | 7 +++++- src/mvcc_stream.rs | 7 +++++- src/prefix.rs | 7 +++++- src/table/block/mod.rs | 7 +++++- src/table/block_index/iter.rs | 7 +++++- src/table/meta.rs | 7 +++++- src/table/multi_writer.rs | 7 +++++- src/tree/mod.rs | 45 ++++++++++++++++++++++++---------- src/version/run.rs | 7 +++++- src/vlog/blob_file/reader.rs | 7 +++++- src/vlog/blob_file/scanner.rs | 7 +++++- tests/level_routing.rs | 46 +++++++++++++++++++++++++++++++++++ 24 files changed, 172 insertions(+), 101 deletions(-) delete mode 100644 benches/block.rs delete mode 100644 benches/level_manifest.rs delete mode 100644 benches/partition_point.rs delete mode 100644 benches/tli.rs diff --git a/Cargo.toml b/Cargo.toml index edd2c8cb1..464e1bab6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -64,12 +64,6 @@ unexpected_cfgs = { level = "warn", check-cfg = [ [package.metadata.cargo-all-features] denylist = [] -[[bench]] -name = "tli" -harness = false -path = "benches/tli.rs" -required-features = [] - [[bench]] name = "merge" harness = false @@ -88,36 +82,18 @@ harness = false path = "benches/bloom.rs" required-features = [] -[[bench]] -name = "block" -harness = false -path = "benches/block.rs" -required-features = ["lz4"] - [[bench]] name = "tree" harness = false path = "benches/tree.rs" required-features = ["lz4"] -[[bench]] -name = "level_manifest" -harness = false -path = "benches/level_manifest.rs" -required-features = [] - [[bench]] name = "fd_table" harness = false path = "benches/fd_table.rs" required-features = [] -[[bench]] -name = "partition_point" -harness = false -path = "benches/partition_point.rs" -required-features = [] - [[bench]] name = "prefix_bloom" harness = false diff --git a/benches/block.rs b/benches/block.rs deleted file mode 100644 index 9ecc06f7b..000000000 --- a/benches/block.rs +++ /dev/null @@ -1,11 +0,0 @@ -// TODO: These benchmarks used ValueBlock (now DataBlock) and other internal -// types that were restructured. DataBlock no longer exposes items/header -// directly — the block format changed to raw bytes with binary index. -// Needs rewrite against the current DataBlock API. - -use criterion::{criterion_group, criterion_main, Criterion}; - -fn placeholder(_c: &mut Criterion) {} - -criterion_group!(benches, placeholder); -criterion_main!(benches); diff --git a/benches/level_manifest.rs b/benches/level_manifest.rs deleted file mode 100644 index 337ba1725..000000000 --- a/benches/level_manifest.rs +++ /dev/null @@ -1,11 +0,0 @@ -// TODO: These benchmarks accessed `tree.levels` field directly which is no -// longer public. Level iteration moved to `Version::iter_levels()` which is -// internal. Needs rewrite to benchmark level manifest performance through -// the public API. - -use criterion::{criterion_group, criterion_main, Criterion}; - -fn placeholder(_c: &mut Criterion) {} - -criterion_group!(benches, placeholder); -criterion_main!(benches); diff --git a/benches/partition_point.rs b/benches/partition_point.rs deleted file mode 100644 index 46ba0612c..000000000 --- a/benches/partition_point.rs +++ /dev/null @@ -1,10 +0,0 @@ -// TODO: The binary_search module was removed from public API. -// partition_point is now an internal utility. Rewrite needed if -// benchmarking custom binary search is still desired. - -use criterion::{criterion_group, criterion_main, Criterion}; - -fn placeholder(_c: &mut Criterion) {} - -criterion_group!(benches, placeholder); -criterion_main!(benches); diff --git a/benches/tli.rs b/benches/tli.rs deleted file mode 100644 index aaf8db744..000000000 --- a/benches/tli.rs +++ /dev/null @@ -1,11 +0,0 @@ -// TODO: This benchmark used TopLevelIndex and KeyedBlockIndex which were -// removed during the block_index → index_block restructuring. The index_block -// module is pub(crate) and does not expose TopLevelIndex externally. -// Needs rewrite against the current IndexBlock/KeyedBlockHandle API. - -use criterion::{criterion_group, criterion_main, Criterion}; - -fn placeholder(_c: &mut Criterion) {} - -criterion_group!(benches, placeholder); -criterion_main!(benches); diff --git a/benches/tree.rs b/benches/tree.rs index 4cc5c1f46..b04c964ec 100644 --- a/benches/tree.rs +++ b/benches/tree.rs @@ -18,7 +18,7 @@ fn full_scan(c: &mut Criterion) { SequenceNumberCounter::default(), SequenceNumberCounter::default(), ) - .use_cache(Cache::with_capacity_bytes(0).into()) + .use_cache(Arc::new(Cache::with_capacity_bytes(0))) .open() .unwrap(); @@ -43,7 +43,7 @@ fn full_scan(c: &mut Criterion) { SequenceNumberCounter::default(), SequenceNumberCounter::default(), ) - .use_cache(Cache::with_capacity_bytes(100_000_000).into()) + .use_cache(Arc::new(Cache::with_capacity_bytes(100_000_000))) .open() .unwrap(); @@ -80,7 +80,7 @@ fn scan_vs_query(c: &mut Criterion) { SequenceNumberCounter::default(), SequenceNumberCounter::default(), ) - .use_cache(Cache::with_capacity_bytes(0).into()) + .use_cache(Arc::new(Cache::with_capacity_bytes(0))) .open() .unwrap(); @@ -149,7 +149,7 @@ fn scan_vs_prefix(c: &mut Criterion) { SequenceNumberCounter::default(), SequenceNumberCounter::default(), ) - .use_cache(Cache::with_capacity_bytes(0).into()) + .use_cache(Arc::new(Cache::with_capacity_bytes(0))) .open() .unwrap(); diff --git a/src/compaction/stream.rs b/src/compaction/stream.rs index 41ee48109..a66497f56 100644 --- a/src/compaction/stream.rs +++ b/src/compaction/stream.rs @@ -406,7 +406,12 @@ impl<'a, I: Iterator, F: StreamFilter + 'a> Iterator for Compaction } #[cfg(test)] -#[allow(clippy::unwrap_used, clippy::indexing_slicing, clippy::useless_vec)] +#[expect( + clippy::unwrap_used, + clippy::indexing_slicing, + clippy::useless_vec, + reason = "test code" +)] mod tests { use super::*; use crate::{value::InternalValue, ValueType}; diff --git a/src/compression.rs b/src/compression.rs index 21e6af1f1..60817b1d6 100644 --- a/src/compression.rs +++ b/src/compression.rs @@ -284,7 +284,12 @@ impl Decode for CompressionType { } #[cfg(test)] -#[allow(clippy::unwrap_used, clippy::indexing_slicing, clippy::useless_vec)] +#[expect( + clippy::unwrap_used, + clippy::indexing_slicing, + clippy::useless_vec, + reason = "test code" +)] mod tests { use super::*; use test_log::test; diff --git a/src/config/mod.rs b/src/config/mod.rs index 0ee292dbd..0c362a11f 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -589,7 +589,8 @@ impl Config { /// /// Changing the mapping from levels to paths is allowed as long as /// the previously used folders remain covered. If old folders are - /// omitted, recovery will fail (`Unrecoverable`) because the missing + /// omitted, recovery will fail with + /// [`RouteMismatch`](crate::Error::RouteMismatch) because the missing /// tables cannot be found. /// /// # Panics diff --git a/src/error.rs b/src/error.rs index e190a636d..f62856673 100644 --- a/src/error.rs +++ b/src/error.rs @@ -114,8 +114,10 @@ pub enum Error { /// written. Recovery found fewer tables on disk than the manifest expects, /// which means some previously routed directories are no longer reachable. /// - /// Unlike [`Unrecoverable`](Self::Unrecoverable), this is a configuration - /// error, not data corruption — re-adding the missing route(s) will fix it. + /// Unlike [`Unrecoverable`](Self::Unrecoverable), this often indicates a + /// configuration error rather than data corruption — re-adding the missing + /// route(s) will typically fix it. However, the same condition can arise + /// if SST files were deleted while `level_routes` is configured. RouteMismatch { /// Number of tables listed in the manifest. expected: usize, diff --git a/src/file.rs b/src/file.rs index 034a159d1..76dcedbf4 100644 --- a/src/file.rs +++ b/src/file.rs @@ -94,7 +94,12 @@ pub fn fsync_directory(_path: &Path, _fs: &dyn Fs) -> std::io::Result<()> { } #[cfg(test)] -#[allow(clippy::unwrap_used, clippy::indexing_slicing, clippy::useless_vec)] +#[expect( + clippy::unwrap_used, + clippy::indexing_slicing, + clippy::useless_vec, + reason = "test code" +)] mod tests { use super::*; use crate::fs::StdFs; diff --git a/src/fs/std_fs.rs b/src/fs/std_fs.rs index 9a6815cbb..4598b43e3 100644 --- a/src/fs/std_fs.rs +++ b/src/fs/std_fs.rs @@ -311,7 +311,12 @@ mod sys { // --------------------------------------------------------------------------- #[cfg(test)] -#[allow(clippy::unwrap_used, clippy::indexing_slicing, clippy::useless_vec)] +#[expect( + clippy::unwrap_used, + clippy::indexing_slicing, + clippy::useless_vec, + reason = "test code" +)] mod tests { use super::*; use std::io::{Read, Write}; diff --git a/src/memtable/arena.rs b/src/memtable/arena.rs index ded994092..b91d50fc4 100644 --- a/src/memtable/arena.rs +++ b/src/memtable/arena.rs @@ -303,7 +303,12 @@ impl Drop for Arena { #[cfg(test)] #[expect(clippy::expect_used, reason = "tests use expect for brevity")] -#[allow(clippy::unwrap_used, clippy::indexing_slicing, clippy::useless_vec)] +#[expect( + clippy::unwrap_used, + clippy::indexing_slicing, + clippy::useless_vec, + reason = "test code" +)] mod tests { use super::*; diff --git a/src/mvcc_stream.rs b/src/mvcc_stream.rs index aaea28913..9127c0466 100644 --- a/src/mvcc_stream.rs +++ b/src/mvcc_stream.rs @@ -324,7 +324,12 @@ impl>> DoubleEndedIte #[cfg(test)] #[expect(clippy::string_lit_as_bytes)] -#[allow(clippy::unwrap_used, clippy::indexing_slicing, clippy::useless_vec)] +#[expect( + clippy::unwrap_used, + clippy::indexing_slicing, + clippy::useless_vec, + reason = "test code" +)] mod tests { use super::*; use crate::{value::InternalValue, ValueType}; diff --git a/src/prefix.rs b/src/prefix.rs index abbd690de..b853da4e9 100644 --- a/src/prefix.rs +++ b/src/prefix.rs @@ -121,7 +121,12 @@ pub fn compute_prefix_hash( } #[cfg(test)] -#[allow(clippy::unwrap_used, clippy::indexing_slicing, clippy::useless_vec)] +#[expect( + clippy::unwrap_used, + clippy::indexing_slicing, + clippy::useless_vec, + reason = "test code" +)] mod tests { use super::*; // Shadows std's #[test] with test_log's version for structured logging. diff --git a/src/table/block/mod.rs b/src/table/block/mod.rs index 5c5160563..af352f547 100644 --- a/src/table/block/mod.rs +++ b/src/table/block/mod.rs @@ -717,7 +717,12 @@ impl Block { } #[cfg(test)] -#[allow(clippy::unwrap_used, clippy::indexing_slicing, clippy::useless_vec)] +#[expect( + clippy::unwrap_used, + clippy::indexing_slicing, + clippy::useless_vec, + reason = "test code" +)] mod tests { use super::*; use test_log::test; diff --git a/src/table/block_index/iter.rs b/src/table/block_index/iter.rs index 468c0b333..0f4a1bc5b 100644 --- a/src/table/block_index/iter.rs +++ b/src/table/block_index/iter.rs @@ -88,7 +88,12 @@ impl DoubleEndedIterator for OwnedIndexBlockIter { } #[cfg(test)] -#[allow(clippy::unwrap_used, clippy::indexing_slicing, clippy::useless_vec)] +#[expect( + clippy::unwrap_used, + clippy::indexing_slicing, + clippy::useless_vec, + reason = "test code" +)] mod tests { use super::*; use crate::{ diff --git a/src/table/meta.rs b/src/table/meta.rs index 4dc2678a1..16496f8ed 100644 --- a/src/table/meta.rs +++ b/src/table/meta.rs @@ -278,7 +278,12 @@ impl ParsedMeta { } #[cfg(test)] -#[allow(clippy::unwrap_used, clippy::indexing_slicing, clippy::useless_vec)] +#[expect( + clippy::unwrap_used, + clippy::indexing_slicing, + clippy::useless_vec, + reason = "test code" +)] mod tests { use super::*; diff --git a/src/table/multi_writer.rs b/src/table/multi_writer.rs index 446ab5f1e..ead3f32fb 100644 --- a/src/table/multi_writer.rs +++ b/src/table/multi_writer.rs @@ -497,7 +497,12 @@ impl MultiWriter { } #[cfg(test)] -#[allow(clippy::unwrap_used, clippy::indexing_slicing, clippy::useless_vec)] +#[expect( + clippy::unwrap_used, + clippy::indexing_slicing, + clippy::useless_vec, + reason = "test code" +)] mod tests { use crate::{config::CompressionPolicy, AbstractTree, Config, SeqNo, SequenceNumberCounter}; use test_log::test; diff --git a/src/tree/mod.rs b/src/tree/mod.rs index ab99982b8..fd2c0bfaa 100644 --- a/src/tree/mod.rs +++ b/src/tree/mod.rs @@ -1626,19 +1626,38 @@ impl Tree { } if tables.len() < cnt { - if config.level_routes.is_some() { - log::error!( - "Route mismatch: expected {} tables but found {} — \ - level_routes may not cover all previously used folders. \ - Missing table IDs: {:?}", - cnt, - tables.len(), - table_map.keys(), - ); - return Err(crate::Error::RouteMismatch { - expected: cnt, - found: tables.len(), - }); + // Route configuration is NOT persisted. To distinguish a removed + // route (config error) from a deleted SST (data corruption), check + // each missing table's level against the current routes: + // + // - Level IS covered by a current route → its directory was scanned + // and the file was not found → data corruption / deletion. + // - Level is NOT covered → falls back to primary (always scanned). + // If the table isn't there, it was likely in a route that has + // since been removed from the config. + // + // Return RouteMismatch only when ALL missing tables are on levels + // not covered by any current route. If ANY missing table is on a + // covered level, at least one SST was genuinely lost. + if let Some(routes) = &config.level_routes { + let all_missing_uncovered = table_map + .values() + .all(|(level, _, _)| !routes.iter().any(|r| r.levels.contains(level))); + + if all_missing_uncovered { + log::error!( + "Route mismatch: expected {} tables but found {} — \ + level_routes do not cover all previously used levels. \ + Missing table IDs: {:?}", + cnt, + tables.len(), + table_map.keys(), + ); + return Err(crate::Error::RouteMismatch { + expected: cnt, + found: tables.len(), + }); + } } log::error!( diff --git a/src/version/run.rs b/src/version/run.rs index 6bbbf4c5c..5baa61d88 100644 --- a/src/version/run.rs +++ b/src/version/run.rs @@ -362,7 +362,12 @@ impl Run { } #[cfg(test)] -#[allow(clippy::unwrap_used, clippy::indexing_slicing, clippy::useless_vec)] +#[expect( + clippy::unwrap_used, + clippy::indexing_slicing, + clippy::useless_vec, + reason = "test code" +)] mod tests { use super::*; use test_log::test; diff --git a/src/vlog/blob_file/reader.rs b/src/vlog/blob_file/reader.rs index 0f2fe25d7..fe888d7b6 100644 --- a/src/vlog/blob_file/reader.rs +++ b/src/vlog/blob_file/reader.rs @@ -243,7 +243,12 @@ impl<'a> Reader<'a> { } #[cfg(test)] -#[allow(clippy::unwrap_used, clippy::indexing_slicing, clippy::useless_vec)] +#[expect( + clippy::unwrap_used, + clippy::indexing_slicing, + clippy::useless_vec, + reason = "test code" +)] mod tests { use super::*; use crate::fs::StdFs; diff --git a/src/vlog/blob_file/scanner.rs b/src/vlog/blob_file/scanner.rs index b5bfbf3d8..5763c5157 100644 --- a/src/vlog/blob_file/scanner.rs +++ b/src/vlog/blob_file/scanner.rs @@ -198,7 +198,12 @@ impl Iterator for Scanner { } #[cfg(test)] -#[allow(clippy::unwrap_used, clippy::indexing_slicing, clippy::useless_vec)] +#[expect( + clippy::unwrap_used, + clippy::indexing_slicing, + clippy::useless_vec, + reason = "test code" +)] mod tests { use super::*; use crate::{fs::StdFs, vlog::blob_file::writer::Writer as BlobFileWriter, Slice}; diff --git a/tests/level_routing.rs b/tests/level_routing.rs index b2f199fe6..93b2e70ea 100644 --- a/tests/level_routing.rs +++ b/tests/level_routing.rs @@ -737,6 +737,52 @@ fn missing_table_without_routes_returns_unrecoverable() -> lsm_tree::Result<()> Ok(()) } +#[test] +fn deleted_sst_with_routes_configured_returns_unrecoverable() -> lsm_tree::Result<()> { + let dir = tempfile::tempdir()?; + + // Phase 1: write data WITH routes — tables go to hot tier + { + let config = three_tier_config(dir.path()); + let tree = config.open()?; + + tree.insert("a", "value_a", 0); + tree.insert("b", "value_b", 1); + tree.flush_active_memtable(0)?; + } + + // Phase 2: delete a table file from the hot tier (simulates corruption) + let hot_tables_dir = dir.path().join("hot").join("tables"); + for entry in std::fs::read_dir(&hot_tables_dir)? { + let entry = entry?; + if entry.file_type()?.is_file() { + std::fs::remove_file(entry.path())?; + break; + } + } + + // Phase 3: reopen WITH THE SAME routes — L0 is covered by hot route, + // so the missing table is corruption, NOT a route mismatch + { + let config = three_tier_config(dir.path()); + + match config.open() { + Err(lsm_tree::Error::Unrecoverable) => {} + Err(lsm_tree::Error::RouteMismatch { expected, found }) => { + panic!( + "BUG: got RouteMismatch(expected={expected}, found={found}) \ + but the table was deleted from a covered route — \ + should be Unrecoverable" + ); + } + Err(e) => panic!("expected Unrecoverable, got: {e:?}"), + Ok(_) => panic!("expected Unrecoverable error, but open succeeded"), + } + } + + Ok(()) +} + #[test] #[should_panic(expected = "empty or inverted level route range")] fn empty_range_panics() { From d7cfa80d6aacaf36fbba79d116c843171d24fbb3 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Tue, 24 Mar 2026 21:38:05 +0200 Subject: [PATCH 04/13] fix(error): update RouteMismatch doc to match tightened detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Doc now accurately describes level-based detection logic - Convert remaining #[allow] → #[expect] in leveled/mod.rs and table/mod.rs --- src/compaction/leveled/mod.rs | 5 +++-- src/error.rs | 17 +++++++++-------- src/table/mod.rs | 5 +++-- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/src/compaction/leveled/mod.rs b/src/compaction/leveled/mod.rs index 1b3c1b84c..6a56e48c6 100644 --- a/src/compaction/leveled/mod.rs +++ b/src/compaction/leveled/mod.rs @@ -3,11 +3,12 @@ // (found in the LICENSE-* files in the repository) #[cfg(test)] -#[allow( +#[expect( clippy::unwrap_used, clippy::indexing_slicing, clippy::useless_vec, - clippy::unnecessary_map_or + clippy::unnecessary_map_or, + reason = "test code" )] mod test; diff --git a/src/error.rs b/src/error.rs index f62856673..fc4e85f6a 100644 --- a/src/error.rs +++ b/src/error.rs @@ -109,15 +109,16 @@ pub enum Error { /// Route-compatibility mismatch on reopen. /// - /// The current [`level_routes`](crate::Config::level_routes) configuration - /// does not cover all table folders that were used when the tree was last - /// written. Recovery found fewer tables on disk than the manifest expects, - /// which means some previously routed directories are no longer reachable. + /// Recovery found fewer tables on disk than the manifest expects, and all + /// missing tables are on levels not covered by any current + /// [`level_route`](crate::Config::level_routes). This typically means a + /// previously configured route was removed, leaving its directory + /// unreachable. /// - /// Unlike [`Unrecoverable`](Self::Unrecoverable), this often indicates a - /// configuration error rather than data corruption — re-adding the missing - /// route(s) will typically fix it. However, the same condition can arise - /// if SST files were deleted while `level_routes` is configured. + /// Re-adding the missing route(s) will usually resolve the error. If + /// missing tables are on levels that *are* covered by a current route, + /// recovery returns [`Unrecoverable`](Self::Unrecoverable) instead + /// (the SST files were genuinely lost). RouteMismatch { /// Number of tables listed in the manifest. expected: usize, diff --git a/src/table/mod.rs b/src/table/mod.rs index 054932159..ff7ba4a90 100644 --- a/src/table/mod.rs +++ b/src/table/mod.rs @@ -18,11 +18,12 @@ pub mod util; pub mod writer; #[cfg(test)] -#[allow( +#[expect( clippy::unwrap_used, clippy::indexing_slicing, clippy::useless_vec, - clippy::needless_borrows_for_generic_args + clippy::needless_borrows_for_generic_args, + reason = "test code" )] mod tests; From cb372ac32c17387a574dd1233da39a1c2e0c0239 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Tue, 24 Mar 2026 21:44:37 +0200 Subject: [PATCH 05/13] fix(filter): replace panics with errors in BlockedBloomFilterReader::new assert_eq! on filter_type and hash_type would panic on corrupted on-disk data instead of returning a structured error. Replace with conditional checks returning Error::InvalidHeader. Also replace expect() on slice bounds with ok_or(). Adds regression tests: corrupted_filter_type_returns_error, corrupted_hash_type_returns_error, truncated_filter_returns_error. --- src/table/filter/blocked_bloom/mod.rs | 77 ++++++++++++++++++++++----- 1 file changed, 64 insertions(+), 13 deletions(-) diff --git a/src/table/filter/blocked_bloom/mod.rs b/src/table/filter/blocked_bloom/mod.rs index 58089eed7..ebaf62405 100644 --- a/src/table/filter/blocked_bloom/mod.rs +++ b/src/table/filter/blocked_bloom/mod.rs @@ -41,10 +41,6 @@ impl<'a> BlockedBloomFilterReader<'a> { clippy::cast_possible_truncation, reason = "bloom filter metadata (num_blocks, k, offset) always fits in usize" )] - #[expect( - clippy::expect_used, - reason = "offset is always within slice bounds after reading the fixed-size header" - )] pub fn new(slice: &'a [u8]) -> crate::Result { let mut reader = Cursor::new(slice); @@ -59,26 +55,33 @@ impl<'a> BlockedBloomFilterReader<'a> { // NOTE: Filter type let filter_type = reader.read_u8()?; let filter_type = FilterType::try_from(filter_type)?; - assert_eq!( - FilterType::BlockedBloom, - filter_type, - "Invalid filter type, got={filter_type:?}, expected={:?}", - FilterType::BlockedBloom, - ); + if filter_type != FilterType::BlockedBloom { + return Err(crate::Error::InvalidHeader( + "BloomFilter: wrong filter type", + )); + } - // NOTE: Hash type (unused) + // NOTE: Hash type (unused, must be 0) let hash_type = reader.read_u8()?; - assert_eq!(0, hash_type, "Invalid bloom hash type"); + if hash_type != 0 { + return Err(crate::Error::InvalidHeader( + "BloomFilter: invalid hash type", + )); + } let num_blocks = reader.read_u64::()? as usize; let k = reader.read_u64::()? as usize; let offset = reader.position() as usize; + let data = slice + .get(offset..) + .ok_or(crate::Error::InvalidHeader("BloomFilter: truncated data"))?; + Ok(Self { k, num_blocks, - inner: BitArrayReader::new(slice.get(offset..).expect("should be in bounds")), + inner: BitArrayReader::new(data), }) } @@ -192,4 +195,52 @@ mod tests { Ok(()) } + + #[test] + fn corrupted_filter_type_returns_error() { + // Build a valid filter, then corrupt the filter-type byte + let mut filter = Builder::with_fp_rate(10, 0.0001); + for i in 0..10u8 { + filter.set_with_hash(Builder::get_hash(&[i])); + } + let mut bytes = filter.build(); + + // Filter type byte is right after MAGIC_BYTES (4 bytes) + // Set it to StandardBloom (0) instead of BlockedBloom (1) + bytes[crate::file::MAGIC_BYTES.len()] = 0; + + let result = BlockedBloomFilterReader::new(&bytes); + assert!( + result.is_err(), + "wrong filter type should return error, not panic" + ); + } + + #[test] + fn corrupted_hash_type_returns_error() { + let mut filter = Builder::with_fp_rate(10, 0.0001); + for i in 0..10u8 { + filter.set_with_hash(Builder::get_hash(&[i])); + } + let mut bytes = filter.build(); + + // Hash type byte is after MAGIC_BYTES + filter_type (1 byte) + bytes[crate::file::MAGIC_BYTES.len() + 1] = 99; + + let result = BlockedBloomFilterReader::new(&bytes); + assert!( + result.is_err(), + "invalid hash type should return error, not panic" + ); + } + + #[test] + fn truncated_filter_returns_error() { + // Provide only the magic bytes — not enough data for the full header + let result = BlockedBloomFilterReader::new(&crate::file::MAGIC_BYTES); + assert!( + result.is_err(), + "truncated input should return error, not panic" + ); + } } From 198020b6dfc91520b120a36d827d985c9d154e14 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Tue, 24 Mar 2026 21:55:28 +0200 Subject: [PATCH 06/13] refactor(filter): remove unfinished blocked_bloom module blocked_bloom was never integrated into the Segment loader pipeline (upstream fjall-rs/lsm-tree#78 is still open). Remove the module source, re-disable the module declaration, and clean up bench references. FilterType::BlockedBloom enum variant is preserved for on-disk format compatibility (tag byte 1). --- benches/bloom.rs | 74 ------- src/table/filter/blocked_bloom/builder.rs | 187 ---------------- src/table/filter/blocked_bloom/mod.rs | 246 ---------------------- src/table/filter/mod.rs | 1 - 4 files changed, 508 deletions(-) delete mode 100644 src/table/filter/blocked_bloom/builder.rs delete mode 100644 src/table/filter/blocked_bloom/mod.rs diff --git a/benches/bloom.rs b/benches/bloom.rs index 8438ee46e..4e3d2fc19 100644 --- a/benches/bloom.rs +++ b/benches/bloom.rs @@ -54,34 +54,6 @@ fn standard_filter_construction(c: &mut Criterion) { }); } -fn blocked_filter_construction(c: &mut Criterion) { - use lsm_tree::table::filter::blocked_bloom::Builder; - - let mut rng = rand::rng(); - - c.bench_function("blocked bloom filter add key, 1M", |b| { - let mut filter = Builder::with_fp_rate(1_000_000, 0.01); - - b.iter(|| { - let mut key = [0; 16]; - rng.fill_bytes(&mut key); - - filter.set_with_hash(Builder::get_hash(&key)); - }); - }); - - c.bench_function("blocked bloom filter add key, 10M", |b| { - let mut filter = Builder::with_fp_rate(10_000_000, 0.01); - - b.iter(|| { - let mut key = [0; 16]; - rng.fill_bytes(&mut key); - - filter.set_with_hash(Builder::get_hash(&key)); - }); - }); -} - fn standard_filter_contains(c: &mut Criterion) { use lsm_tree::table::filter::standard_bloom::Builder; @@ -129,56 +101,10 @@ fn standard_filter_contains(c: &mut Criterion) { } } -fn blocked_filter_contains(c: &mut Criterion) { - use lsm_tree::table::filter::blocked_bloom::Builder; - - let keys = (0..100_000u128) - .map(|x| x.to_be_bytes().to_vec()) - .collect::>(); - - for fpr in [0.01, 0.001, 0.0001] { - // Same rationale as standard_filter_contains. - let n = 1_000_000; - - let mut filter = Builder::with_fp_rate(n, fpr); - - for key in &keys { - filter.set_with_hash(Builder::get_hash(key)); - } - - let mut rng = rand::rng(); - - let filter_bytes = filter.build(); - - c.bench_function( - &format!( - "blocked bloom filter contains key, true positive ({}%)", - fpr * 100.0, - ), - |b| { - b.iter(|| { - use lsm_tree::table::filter::blocked_bloom::BlockedBloomFilterReader as Reader; - use rand::seq::IndexedRandom; - - // NOTE: To make the costs more realistic, we - // pretend we are reading the filter straight from the block - let filter = Reader::new(&filter_bytes).unwrap(); - - let sample = keys.choose(&mut rng).unwrap(); - let hash = Builder::get_hash(sample); - assert!(filter.contains_hash(hash)); - }); - }, - ); - } -} - criterion_group!( benches, fast_block_index, standard_filter_construction, - blocked_filter_construction, standard_filter_contains, - blocked_filter_contains, ); criterion_main!(benches); diff --git a/src/table/filter/blocked_bloom/builder.rs b/src/table/filter/blocked_bloom/builder.rs deleted file mode 100644 index 77c66de00..000000000 --- a/src/table/filter/blocked_bloom/builder.rs +++ /dev/null @@ -1,187 +0,0 @@ -// Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) - -use super::super::bit_array::Builder as BitArrayBuilder; -use super::super::standard_bloom::builder::secondary_hash; -use crate::{ - file::MAGIC_BYTES, - table::filter::{blocked_bloom::CACHE_LINE_BYTES, FilterType}, -}; -use byteorder::{LittleEndian, WriteBytesExt}; -use std::io::Write; - -#[derive(Debug)] -pub struct Builder { - /// Raw bytes exposed as bit array - inner: BitArrayBuilder, - - /// Number of hash functions - pub(crate) k: usize, - - /// Number of blocks in the blocked bloom filter - pub(crate) num_blocks: usize, -} - -impl Builder { - #[expect( - clippy::expect_used, - reason = "we write into a Vec, so no I/O error can happen" - )] - #[must_use] - pub fn build(&self) -> Vec { - let mut v = vec![]; - - // Write header - v.write_all(&MAGIC_BYTES).expect("should not fail"); - - // NOTE: Filter type - v.write_u8(FilterType::BlockedBloom.into()) - .expect("should not fail"); - - // NOTE: Hash type (unused) - v.write_u8(0).expect("should not fail"); - - v.write_u64::(self.num_blocks as u64) - .expect("should not fail"); - v.write_u64::(self.k as u64) - .expect("should not fail"); - v.write_all(self.inner.bytes()).expect("should not fail"); - - v - } - - /// Constructs a bloom filter that can hold `n` items - /// while maintaining a certain false positive rate `fpr`. - #[expect( - clippy::cast_precision_loss, - clippy::cast_possible_truncation, - clippy::cast_sign_loss, - reason = "truncation and precision loss are fine because this is a probabilistic filter sizing" - )] - #[must_use] - pub fn with_fp_rate(n: usize, fpr: f32) -> Self { - use std::f32::consts::LN_2; - - assert!(n > 0); - - // NOTE: Some sensible minimum - let fpr = fpr.max(0.000_000_1); - - // NOTE: We add ~5-25% more bits to account for blocked bloom filters being a bit less accurate - // See https://dl.acm.org/doi/10.1145/1498698.1594230 - let bonus = match fpr { - _ if fpr <= 0.001 => 1.25, - _ if fpr <= 0.01 => 1.2, - _ if fpr <= 0.1 => 1.1, - _ => 1.05, - }; - - let m = ((Self::calculate_m(n, fpr)) as f32 * bonus) as usize; - let bpk = m / n; - let k = (((bpk as f32) * LN_2) as usize).max(1); - - let num_blocks = m.div_ceil(CACHE_LINE_BYTES * 8); - - Self { - inner: BitArrayBuilder::with_capacity(num_blocks * CACHE_LINE_BYTES), - k, - num_blocks, - } - } - - /// Constructs a bloom filter that can hold `n` items - /// with `bpk` bits per key. - /// - /// 10 bits per key is a sensible default. - #[expect( - clippy::cast_precision_loss, - clippy::cast_possible_truncation, - clippy::cast_sign_loss, - reason = "truncation and precision loss are fine because this is a probabilistic filter sizing" - )] - #[must_use] - pub fn with_bpk(n: usize, bpk: u8) -> Self { - use std::f32::consts::LN_2; - - assert!(bpk > 0); - assert!(n > 0); - - let bpk = bpk as usize; - - let m = n * bpk; - let k = (((bpk as f32) * LN_2) as usize).max(1); - - let num_blocks = m.div_ceil(CACHE_LINE_BYTES * 8); - - Self { - inner: BitArrayBuilder::with_capacity(num_blocks * CACHE_LINE_BYTES), - k, - num_blocks, - } - } - - #[expect( - clippy::cast_precision_loss, - clippy::cast_possible_truncation, - clippy::cast_sign_loss, - reason = "truncation and precision loss are fine because this is a probabilistic filter sizing" - )] - fn calculate_m(n: usize, fp_rate: f32) -> usize { - use std::f32::consts::LN_2; - - let n = n as f32; - let ln2_squared = LN_2.powi(2); - - let numerator = n * fp_rate.ln(); - let m = -(numerator / ln2_squared); - - // Round up to next byte - ((m / 8.0).ceil() * 8.0) as usize - } - - /// Adds the key to the filter. - pub fn set_with_hash(&mut self, mut h1: u64) { - let mut h2 = secondary_hash(h1); - - let block_idx = h1 % (self.num_blocks as u64); - - for i in 1..(self.k as u64) { - let idx = h1 % (CACHE_LINE_BYTES as u64 * 8); - - #[expect( - clippy::cast_possible_truncation, - reason = "filters tend to be pretty small, definitely less than 4 GiB, even for large tables" - )] - self.inner - .enable_bit(Self::get_bit_idx(block_idx as usize, idx as usize)); - - h1 = h1.wrapping_add(h2); - h2 = h2.wrapping_mul(i); - } - } - - #[must_use] - pub fn get_bit_idx(block_idx: usize, idx_in_block: usize) -> usize { - block_idx * CACHE_LINE_BYTES * 8 + idx_in_block - } - - /// Gets the hash of a key. - #[must_use] - pub fn get_hash(key: &[u8]) -> u64 { - super::super::standard_bloom::Builder::get_hash(key) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use test_log::test; - - #[test] - fn bloom_calculate_m() { - assert_eq!(9_592, Builder::calculate_m(1_000, 0.01)); - assert_eq!(4_800, Builder::calculate_m(1_000, 0.1)); - assert_eq!(4_792_536, Builder::calculate_m(1_000_000, 0.1)); - } -} diff --git a/src/table/filter/blocked_bloom/mod.rs b/src/table/filter/blocked_bloom/mod.rs deleted file mode 100644 index ebaf62405..000000000 --- a/src/table/filter/blocked_bloom/mod.rs +++ /dev/null @@ -1,246 +0,0 @@ -// Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) - -mod builder; - -pub use builder::Builder; - -use super::bit_array::BitArrayReader; -use crate::{ - file::MAGIC_BYTES, - table::filter::{standard_bloom::builder::secondary_hash, FilterType}, -}; -use byteorder::{LittleEndian, ReadBytesExt}; -use std::io::{Cursor, Read}; - -const CACHE_LINE_BYTES: usize = 64; - -/// A blocked bloom filter -/// -/// Allows buffering the key hashes before actual filter construction -/// which is needed to properly calculate the filter size, as the number of items -/// are unknown during table construction. -/// -/// The filter uses double hashing instead of `k` hash functions, see: -/// -#[derive(Debug)] -pub struct BlockedBloomFilterReader<'a> { - /// Raw bytes exposed as bit array - inner: BitArrayReader<'a>, - - /// Number of hash functions - pub(crate) k: usize, - - /// Number of blocks in the blocked bloom filter - pub(crate) num_blocks: usize, -} - -impl<'a> BlockedBloomFilterReader<'a> { - #[expect( - clippy::cast_possible_truncation, - reason = "bloom filter metadata (num_blocks, k, offset) always fits in usize" - )] - pub fn new(slice: &'a [u8]) -> crate::Result { - let mut reader = Cursor::new(slice); - - // Check header - let mut magic = [0u8; MAGIC_BYTES.len()]; - reader.read_exact(&mut magic)?; - - if magic != MAGIC_BYTES { - return Err(crate::Error::InvalidHeader("BloomFilter")); - } - - // NOTE: Filter type - let filter_type = reader.read_u8()?; - let filter_type = FilterType::try_from(filter_type)?; - if filter_type != FilterType::BlockedBloom { - return Err(crate::Error::InvalidHeader( - "BloomFilter: wrong filter type", - )); - } - - // NOTE: Hash type (unused, must be 0) - let hash_type = reader.read_u8()?; - if hash_type != 0 { - return Err(crate::Error::InvalidHeader( - "BloomFilter: invalid hash type", - )); - } - - let num_blocks = reader.read_u64::()? as usize; - let k = reader.read_u64::()? as usize; - - let offset = reader.position() as usize; - - let data = slice - .get(offset..) - .ok_or(crate::Error::InvalidHeader("BloomFilter: truncated data"))?; - - Ok(Self { - k, - num_blocks, - inner: BitArrayReader::new(data), - }) - } - - /// Returns `true` if the hash may be contained. - /// - /// Will never have a false negative. - #[must_use] - #[expect( - clippy::cast_possible_truncation, - reason = "block_idx and bit_idx are bounded by filter dimensions which fit in usize" - )] - pub fn contains_hash(&self, mut h1: u64) -> bool { - let mut h2 = secondary_hash(h1); - - let block_idx = h1 % (self.num_blocks as u64); - - for i in 1..(self.k as u64) { - let bit_idx = h1 % (CACHE_LINE_BYTES as u64 * 8); - - if !self.has_bit(block_idx as usize, bit_idx as usize) { - return false; - } - - h1 = h1.wrapping_add(h2); - h2 = h2.wrapping_mul(i); - } - - true - } - - /// Returns `true` if the bit at `idx` is `1`. - fn has_bit(&self, block_idx: usize, idx_in_block: usize) -> bool { - self.inner - .get(Builder::get_bit_idx(block_idx, idx_in_block)) - } - - /// Gets the hash of a key. - #[must_use] - pub fn get_hash(key: &[u8]) -> u64 { - Builder::get_hash(key) - } - - /// Returns `true` if the item may be contained. - /// - /// Will never have a false negative. - #[must_use] - pub fn contains(&self, key: &[u8]) -> bool { - self.contains_hash(Self::get_hash(key)) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use test_log::test; - - #[test] - fn filter_bloom_blocked_serde_round_trip() -> crate::Result<()> { - let mut filter = Builder::with_fp_rate(10, 0.0001); - - let keys = &[ - b"item0", b"item1", b"item2", b"item3", b"item4", b"item5", b"item6", b"item7", - b"item8", b"item9", - ]; - - for key in keys { - filter.set_with_hash(BlockedBloomFilterReader::get_hash(*key)); - } - - let filter_bytes = filter.build(); - let filter_copy = BlockedBloomFilterReader::new(&filter_bytes)?; - - assert_eq!(filter.k, filter_copy.k); - assert_eq!(filter.num_blocks, filter_copy.num_blocks); - assert!(!filter_copy.contains(b"asdasads")); - assert!(!filter_copy.contains(b"item10")); - assert!(!filter_copy.contains(b"cxycxycxy")); - - Ok(()) - } - - #[test] - fn filter_bloom_blocked_basic() -> crate::Result<()> { - let mut filter = Builder::with_fp_rate(10, 0.0001); - - let keys = [ - b"item0" as &[u8], - b"item1", - b"item2", - b"item3", - b"item4", - b"item5", - b"item6", - b"item7", - b"item8", - b"item9", - ]; - - for key in &keys { - filter.set_with_hash(Builder::get_hash(key)); - } - - let filter_bytes = filter.build(); - let filter = BlockedBloomFilterReader::new(&filter_bytes)?; - - for key in &keys { - assert!(filter.contains(key)); - } - - assert!(!filter.contains(b"asdasdasdasdasdasdasd")); - - Ok(()) - } - - #[test] - fn corrupted_filter_type_returns_error() { - // Build a valid filter, then corrupt the filter-type byte - let mut filter = Builder::with_fp_rate(10, 0.0001); - for i in 0..10u8 { - filter.set_with_hash(Builder::get_hash(&[i])); - } - let mut bytes = filter.build(); - - // Filter type byte is right after MAGIC_BYTES (4 bytes) - // Set it to StandardBloom (0) instead of BlockedBloom (1) - bytes[crate::file::MAGIC_BYTES.len()] = 0; - - let result = BlockedBloomFilterReader::new(&bytes); - assert!( - result.is_err(), - "wrong filter type should return error, not panic" - ); - } - - #[test] - fn corrupted_hash_type_returns_error() { - let mut filter = Builder::with_fp_rate(10, 0.0001); - for i in 0..10u8 { - filter.set_with_hash(Builder::get_hash(&[i])); - } - let mut bytes = filter.build(); - - // Hash type byte is after MAGIC_BYTES + filter_type (1 byte) - bytes[crate::file::MAGIC_BYTES.len() + 1] = 99; - - let result = BlockedBloomFilterReader::new(&bytes); - assert!( - result.is_err(), - "invalid hash type should return error, not panic" - ); - } - - #[test] - fn truncated_filter_returns_error() { - // Provide only the magic bytes — not enough data for the full header - let result = BlockedBloomFilterReader::new(&crate::file::MAGIC_BYTES); - assert!( - result.is_err(), - "truncated input should return error, not panic" - ); - } -} diff --git a/src/table/filter/mod.rs b/src/table/filter/mod.rs index d0b1d794c..4689feceb 100644 --- a/src/table/filter/mod.rs +++ b/src/table/filter/mod.rs @@ -4,7 +4,6 @@ pub mod bit_array; pub mod block; -pub mod blocked_bloom; pub mod standard_bloom; use standard_bloom::Builder as StandardBloomFilterBuilder; From ee4c8ec4cffd196d01d1359b5d3a9848d87121d4 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Tue, 24 Mar 2026 21:58:22 +0200 Subject: [PATCH 07/13] test: assert file deletion actually happened in corruption tests Add deleted flag + assertion to both missing_table_without_routes and deleted_sst_with_routes_configured tests so they fail explicitly if the test setup didn't actually remove a file. --- tests/level_routing.rs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tests/level_routing.rs b/tests/level_routing.rs index 93b2e70ea..1d93364c7 100644 --- a/tests/level_routing.rs +++ b/tests/level_routing.rs @@ -711,13 +711,16 @@ fn missing_table_without_routes_returns_unrecoverable() -> lsm_tree::Result<()> // Phase 2: delete a table file to simulate corruption let tables_dir = dir.path().join("primary").join("tables"); + let mut deleted = false; for entry in std::fs::read_dir(&tables_dir)? { let entry = entry?; if entry.file_type()?.is_file() { std::fs::remove_file(entry.path())?; - break; // remove just one + deleted = true; + break; } } + assert!(deleted, "expected to delete at least one table file"); // Phase 3: reopen without routes — should get Unrecoverable, not RouteMismatch { @@ -753,13 +756,19 @@ fn deleted_sst_with_routes_configured_returns_unrecoverable() -> lsm_tree::Resul // Phase 2: delete a table file from the hot tier (simulates corruption) let hot_tables_dir = dir.path().join("hot").join("tables"); + let mut deleted = false; for entry in std::fs::read_dir(&hot_tables_dir)? { let entry = entry?; if entry.file_type()?.is_file() { std::fs::remove_file(entry.path())?; + deleted = true; break; } } + assert!( + deleted, + "expected to delete at least one table file from hot tier" + ); // Phase 3: reopen WITH THE SAME routes — L0 is covered by hot route, // so the missing table is corruption, NOT a route mismatch From 60efdc3c8e9ad70c88bc719bc9a7644cdbb26a24 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Tue, 24 Mar 2026 22:00:18 +0200 Subject: [PATCH 08/13] docs(tree): document route detection heuristic limitation Add code comment noting that level-based RouteMismatch detection is best-effort and cannot detect same-level path changes. Persisting route provenance per-table would enable exact detection but requires a manifest format change. --- src/tree/mod.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/tree/mod.rs b/src/tree/mod.rs index fd2c0bfaa..273cd73c2 100644 --- a/src/tree/mod.rs +++ b/src/tree/mod.rs @@ -1626,9 +1626,12 @@ impl Tree { } if tables.len() < cnt { - // Route configuration is NOT persisted. To distinguish a removed - // route (config error) from a deleted SST (data corruption), check - // each missing table's level against the current routes: + // Route configuration is NOT persisted. This is a best-effort + // heuristic: it checks each missing table's level against the + // current routes, but cannot detect same-level path changes + // (e.g., L0 routed to /hot_old → /hot_new). Persisting route + // provenance per-table in the manifest would enable exact + // detection but requires a format change. // // - Level IS covered by a current route → its directory was scanned // and the file was not found → data corruption / deletion. From 770c646eb040bbbe877e2f1820fc2edfb9c1faaf Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Tue, 24 Mar 2026 22:07:43 +0200 Subject: [PATCH 09/13] test: add mixed covered/uncovered missing tables scenario MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Exercises the path where some missing tables are on covered levels (corruption) and some on uncovered levels (route mismatch) — must return Unrecoverable since at least one SST was genuinely lost. --- tests/level_routing.rs | 61 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/tests/level_routing.rs b/tests/level_routing.rs index 1d93364c7..e8cacd68a 100644 --- a/tests/level_routing.rs +++ b/tests/level_routing.rs @@ -792,6 +792,67 @@ fn deleted_sst_with_routes_configured_returns_unrecoverable() -> lsm_tree::Resul Ok(()) } +#[test] +fn mixed_missing_covered_and_uncovered_returns_unrecoverable() -> lsm_tree::Result<()> { + let dir = tempfile::tempdir()?; + + // Phase 1: write data with 3-tier config, flush twice to get tables in hot tier + { + let config = three_tier_config(dir.path()); + let tree = config.open()?; + + tree.insert("a", "value_a", 0); + tree.flush_active_memtable(0)?; + + tree.insert("b", "value_b", 1); + tree.flush_active_memtable(0)?; + } + + // Phase 2: delete a table from hot tier (covered by route 0..2) + let hot_tables_dir = dir.path().join("hot").join("tables"); + let mut deleted = false; + for entry in std::fs::read_dir(&hot_tables_dir)? { + let entry = entry?; + if entry.file_type()?.is_file() { + std::fs::remove_file(entry.path())?; + deleted = true; + break; + } + } + assert!(deleted, "expected to delete at least one table file"); + + // Phase 3: reopen WITHOUT warm route (2..5 removed) but WITH hot route (0..2) + // One table missing from covered level (deleted) + warm route removed. + // Since at least one missing table is on a covered level → Unrecoverable. + { + let hot = dir.path().join("hot"); + let config = Config::new( + dir.path().join("primary"), + SequenceNumberCounter::default(), + SequenceNumberCounter::default(), + ) + .level_routes(vec![LevelRoute { + levels: 0..2, + path: hot, + fs: Arc::new(StdFs), + }]); + + match config.open() { + Err(lsm_tree::Error::Unrecoverable) => {} + Err(lsm_tree::Error::RouteMismatch { expected, found }) => { + panic!( + "BUG: got RouteMismatch(expected={expected}, found={found}) \ + but at least one missing table is on a covered level" + ); + } + Err(e) => panic!("expected Unrecoverable, got: {e:?}"), + Ok(_) => panic!("expected error, but open succeeded"), + } + } + + Ok(()) +} + #[test] #[should_panic(expected = "empty or inverted level route range")] fn empty_range_panics() { From 3791c93a6e248c34c7bfc91473a7389f42fe03ea Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Tue, 24 Mar 2026 22:14:22 +0200 Subject: [PATCH 10/13] test: fix mixed scenario to actually create tables on different levels Compact one table to L3 (warm tier) and leave another at L0 (hot tier) so the test genuinely exercises mixed covered+uncovered missing tables when reopening with a partial route config. --- tests/level_routing.rs | 42 ++++++++++++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 12 deletions(-) diff --git a/tests/level_routing.rs b/tests/level_routing.rs index e8cacd68a..0cf08d8dd 100644 --- a/tests/level_routing.rs +++ b/tests/level_routing.rs @@ -794,24 +794,42 @@ fn deleted_sst_with_routes_configured_returns_unrecoverable() -> lsm_tree::Resul #[test] fn mixed_missing_covered_and_uncovered_returns_unrecoverable() -> lsm_tree::Result<()> { + use lsm_tree::compaction::PullDown; + let dir = tempfile::tempdir()?; - // Phase 1: write data with 3-tier config, flush twice to get tables in hot tier + // Phase 1: create tables at DIFFERENT levels: + // - Compact one table to L3 (warm tier, 2..5) + // - Leave one table at L0 (hot tier, 0..2) { let config = three_tier_config(dir.path()); let tree = config.open()?; + // First write → flush to L0 → compact to L3 (warm tier) tree.insert("a", "value_a", 0); tree.flush_active_memtable(0)?; + tree.compact(Arc::new(PullDown(0, 3)), 1)?; - tree.insert("b", "value_b", 1); + // Second write → flush to L0 (stays in hot tier) + tree.insert("b", "value_b", 2); tree.flush_active_memtable(0)?; } - // Phase 2: delete a table from hot tier (covered by route 0..2) - let hot_tables_dir = dir.path().join("hot").join("tables"); + // Verify: warm tier has tables, hot tier has tables + let warm_tables = dir.path().join("warm").join("tables"); + let hot_tables = dir.path().join("hot").join("tables"); + assert!( + std::fs::read_dir(&warm_tables)?.count() > 0, + "warm tier should have at least one table" + ); + assert!( + std::fs::read_dir(&hot_tables)?.count() > 0, + "hot tier should have at least one table" + ); + + // Phase 2: delete a table from warm tier (covered by warm route 2..5) let mut deleted = false; - for entry in std::fs::read_dir(&hot_tables_dir)? { + for entry in std::fs::read_dir(&warm_tables)? { let entry = entry?; if entry.file_type()?.is_file() { std::fs::remove_file(entry.path())?; @@ -819,21 +837,21 @@ fn mixed_missing_covered_and_uncovered_returns_unrecoverable() -> lsm_tree::Resu break; } } - assert!(deleted, "expected to delete at least one table file"); + assert!(deleted, "expected to delete at least one warm table"); - // Phase 3: reopen WITHOUT warm route (2..5 removed) but WITH hot route (0..2) - // One table missing from covered level (deleted) + warm route removed. - // Since at least one missing table is on a covered level → Unrecoverable. + // Phase 3: reopen WITHOUT hot route (0..2 removed), WITH warm route (2..5) + // - Warm table deleted → missing on covered level (corruption) + // - Hot table unreachable → missing on uncovered level (route mismatch) + // Mixed: at least one on a covered level → Unrecoverable { - let hot = dir.path().join("hot"); let config = Config::new( dir.path().join("primary"), SequenceNumberCounter::default(), SequenceNumberCounter::default(), ) .level_routes(vec![LevelRoute { - levels: 0..2, - path: hot, + levels: 2..5, + path: dir.path().join("warm"), fs: Arc::new(StdFs), }]); From dad413cda370be4361072b82185757e583a0bef4 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Tue, 24 Mar 2026 22:30:49 +0200 Subject: [PATCH 11/13] fix(docs): correct RouteMismatch and level_routes doc wording MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix level_route → level_routes typo in RouteMismatch rustdoc - Soften level_routes reopen contract: "may fail" with RouteMismatch or Unrecoverable depending on which levels are affected - Add comment explaining Guard trait import for into_inner() - Hoist log::error! args into let bindings for coverage visibility --- benches/tree.rs | 1 + src/config/mod.rs | 8 +++++--- src/error.rs | 2 +- src/tree/mod.rs | 12 ++++++------ 4 files changed, 13 insertions(+), 10 deletions(-) diff --git a/benches/tree.rs b/benches/tree.rs index b04c964ec..036f16324 100644 --- a/benches/tree.rs +++ b/benches/tree.rs @@ -1,4 +1,5 @@ use criterion::{criterion_group, criterion_main, Criterion}; +// Guard trait import required for .into_inner() on IterGuardImpl use lsm_tree::{AbstractTree, Cache, Config, Guard, SeqNo, SequenceNumberCounter}; use std::sync::Arc; use tempfile::tempdir; diff --git a/src/config/mod.rs b/src/config/mod.rs index 0c362a11f..3f2e0e134 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -589,9 +589,11 @@ impl Config { /// /// Changing the mapping from levels to paths is allowed as long as /// the previously used folders remain covered. If old folders are - /// omitted, recovery will fail with - /// [`RouteMismatch`](crate::Error::RouteMismatch) because the missing - /// tables cannot be found. + /// omitted, recovery may fail with + /// [`RouteMismatch`](crate::Error::RouteMismatch) (when all missing + /// tables are on uncovered levels) or + /// [`Unrecoverable`](crate::Error::Unrecoverable) (when some missing + /// tables are on levels that are still covered). /// /// # Panics /// diff --git a/src/error.rs b/src/error.rs index fc4e85f6a..e42a23675 100644 --- a/src/error.rs +++ b/src/error.rs @@ -111,7 +111,7 @@ pub enum Error { /// /// Recovery found fewer tables on disk than the manifest expects, and all /// missing tables are on levels not covered by any current - /// [`level_route`](crate::Config::level_routes). This typically means a + /// [`level_routes`](crate::Config::level_routes). This typically means a /// previously configured route was removed, leaving its directory /// unreachable. /// diff --git a/src/tree/mod.rs b/src/tree/mod.rs index 273cd73c2..54a558756 100644 --- a/src/tree/mod.rs +++ b/src/tree/mod.rs @@ -1648,17 +1648,17 @@ impl Tree { .all(|(level, _, _)| !routes.iter().any(|r| r.levels.contains(level))); if all_missing_uncovered { + let found = tables.len(); + let missing_ids: Vec<_> = table_map.keys().collect(); + log::error!( - "Route mismatch: expected {} tables but found {} — \ + "Route mismatch: expected {cnt} tables but found {found} — \ level_routes do not cover all previously used levels. \ - Missing table IDs: {:?}", - cnt, - tables.len(), - table_map.keys(), + Missing table IDs: {missing_ids:?}", ); return Err(crate::Error::RouteMismatch { expected: cnt, - found: tables.len(), + found, }); } } From 7bc435e15d23e0d779dd1fa6159cde297df2362e Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Tue, 24 Mar 2026 22:48:15 +0200 Subject: [PATCH 12/13] fix(bench): pass TempDir by reference to preserve RAII cleanup TempDir was moved into Config::new, causing immediate drop and directory deletion. Pass by reference so TempDir lives for the duration of the benchmark closure. --- benches/tree.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/benches/tree.rs b/benches/tree.rs index 036f16324..3ac8cd937 100644 --- a/benches/tree.rs +++ b/benches/tree.rs @@ -15,7 +15,7 @@ fn full_scan(c: &mut Criterion) { let path = tempdir().unwrap(); let tree = Config::new( - path, + &path, SequenceNumberCounter::default(), SequenceNumberCounter::default(), ) @@ -40,7 +40,7 @@ fn full_scan(c: &mut Criterion) { let path = tempdir().unwrap(); let tree = Config::new( - path, + &path, SequenceNumberCounter::default(), SequenceNumberCounter::default(), ) @@ -77,7 +77,7 @@ fn scan_vs_query(c: &mut Criterion) { let path = tempdir().unwrap(); let tree = Config::new( - path, + &path, SequenceNumberCounter::default(), SequenceNumberCounter::default(), ) @@ -146,7 +146,7 @@ fn scan_vs_prefix(c: &mut Criterion) { let path = tempdir().unwrap(); let tree = Config::new( - path, + &path, SequenceNumberCounter::default(), SequenceNumberCounter::default(), ) @@ -205,7 +205,7 @@ fn tree_get_pairs(c: &mut Criterion) { { let folder = tempfile::tempdir().unwrap(); let tree = Config::new( - folder, + &folder, SequenceNumberCounter::default(), SequenceNumberCounter::default(), ) @@ -248,7 +248,7 @@ fn tree_get_pairs(c: &mut Criterion) { { let folder = tempfile::tempdir().unwrap(); let tree = Config::new( - folder, + &folder, SequenceNumberCounter::default(), SequenceNumberCounter::default(), ) @@ -296,7 +296,7 @@ fn disk_point_read(c: &mut Criterion) { let folder = tempdir().unwrap(); let tree = Config::new( - folder, + &folder, SequenceNumberCounter::default(), SequenceNumberCounter::default(), ) @@ -332,7 +332,7 @@ fn disjoint_tree_minmax(c: &mut Criterion) { let folder = tempfile::tempdir().unwrap(); let tree = Config::new( - folder, + &folder, SequenceNumberCounter::default(), SequenceNumberCounter::default(), ) From d31d106198451f4b679336d8bde8c57cb8358c17 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Tue, 24 Mar 2026 23:01:29 +0200 Subject: [PATCH 13/13] fix(bench): panic on iterator errors instead of silently skipping filter_map(ok()) swallowed scan errors, making benchmarks look correct even with broken entries. Use map(unwrap()) to fail fast. --- benches/tree.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benches/tree.rs b/benches/tree.rs index 3ac8cd937..f96c3aeed 100644 --- a/benches/tree.rs +++ b/benches/tree.rs @@ -99,7 +99,7 @@ fn scan_vs_query(c: &mut Criterion) { b.iter(|| { let count = tree .iter(MAX, None) - .filter_map(|guard| guard.into_inner().ok()) + .map(|guard| guard.into_inner().unwrap()) .filter(|(key, _)| { let buf = &key[..8]; let (int_bytes, _rest) = buf.split_at(std::mem::size_of::()); @@ -176,7 +176,7 @@ fn scan_vs_prefix(c: &mut Criterion) { b.iter(|| { let count = tree .iter(MAX, None) - .filter_map(|guard| guard.into_inner().ok()) + .map(|guard| guard.into_inner().unwrap()) .filter(|(key, _)| key.starts_with(prefix.as_bytes())) .count(); assert_eq!(count, 10);