diff --git a/src/abstract.rs b/src/abstract.rs index 0a3f123a2..0d1dc00b3 100644 --- a/src/abstract.rs +++ b/src/abstract.rs @@ -3,8 +3,9 @@ // (found in the LICENSE-* files in the repository) use crate::{ - iter_guard::IterGuardImpl, table::Table, version::Version, vlog::BlobFile, AnyTree, BlobTree, - Config, Guard, InternalValue, KvPair, Memtable, SeqNo, TableId, Tree, UserKey, UserValue, + fs::FileSystem, iter_guard::IterGuardImpl, table::Table, version::Version, vlog::BlobFile, + AnyTree, BlobTree, Config, Guard, InternalValue, KvPair, Memtable, SeqNo, TableId, Tree, + UserKey, UserValue, }; use std::{ ops::RangeBounds, @@ -13,15 +14,14 @@ use std::{ pub type RangeItem = crate::Result; -type FlushToTablesResult = (Vec, Option>); +type FlushToTablesResult = (Vec>, Option>>); /// Generic Tree API #[enum_dispatch::enum_dispatch] -pub trait AbstractTree { +pub trait AbstractTree { /// Debug method for tracing the MVCC history of a key. #[doc(hidden)] fn print_trace(&self, key: &[u8]) -> crate::Result<()>; - /// Returns the number of cached table file descriptors. fn table_file_cache_size(&self) -> usize; @@ -44,10 +44,10 @@ pub trait AbstractTree { fn get_internal_entry(&self, key: &[u8], seqno: SeqNo) -> crate::Result>; #[doc(hidden)] - fn current_version(&self) -> Version; + fn current_version(&self) -> Version; #[doc(hidden)] - fn get_version_history_lock(&self) -> RwLockWriteGuard<'_, crate::version::SuperVersions>; + fn get_version_history_lock(&self) -> RwLockWriteGuard<'_, crate::version::SuperVersions>; /// Seals the active memtable and flushes to table(s). /// @@ -124,7 +124,7 @@ pub trait AbstractTree { &self, seqno: SeqNo, index: Option<(Arc, SeqNo)>, - ) -> Box + Send + 'static> { + ) -> Box> + Send + 'static> { self.range::<&[u8], _>(.., seqno, index) } @@ -136,7 +136,7 @@ pub trait AbstractTree { prefix: K, seqno: SeqNo, index: Option<(Arc, SeqNo)>, - ) -> Box + Send + 'static>; + ) -> Box> + Send + 'static>; /// Returns an iterator over a range of items. /// @@ -146,7 +146,7 @@ pub trait AbstractTree { range: R, seqno: SeqNo, index: Option<(Arc, SeqNo)>, - ) -> Box + Send + 'static>; + ) -> Box> + Send + 'static>; /// Returns the approximate number of tombstones in the tree. fn tombstone_count(&self) -> u64; @@ -220,7 +220,7 @@ pub trait AbstractTree { fn flush_to_tables( &self, stream: impl Iterator>, - ) -> crate::Result>; + ) -> crate::Result>>; /// Atomically registers flushed tables into the tree, removing their associated sealed memtables. /// @@ -229,8 +229,8 @@ pub trait AbstractTree { /// Will return `Err` if an IO error occurs. fn register_tables( &self, - tables: &[Table], - blob_files: Option<&[BlobFile]>, + tables: &[Table], + blob_files: Option<&[BlobFile]>, frag_map: Option, sealed_memtables_to_delete: &[crate::tree::inner::MemtableId], gc_watermark: SeqNo, @@ -249,7 +249,7 @@ pub trait AbstractTree { /// Will return `Err` if an IO error occurs. fn compact( &self, - strategy: Arc, + strategy: Arc>, seqno_threshold: SeqNo, ) -> crate::Result<()>; @@ -257,7 +257,7 @@ pub trait AbstractTree { fn get_next_table_id(&self) -> TableId; /// Returns the tree config. - fn tree_config(&self) -> &Config; + fn tree_config(&self) -> &Config; /// Returns the highest sequence number. fn get_highest_seqno(&self) -> Option { @@ -403,7 +403,7 @@ pub trait AbstractTree { &self, seqno: SeqNo, index: Option<(Arc, SeqNo)>, - ) -> Option { + ) -> Option> { self.iter(seqno, index).next() } @@ -436,7 +436,7 @@ pub trait AbstractTree { &self, seqno: SeqNo, index: Option<(Arc, SeqNo)>, - ) -> Option { + ) -> Option> { self.iter(seqno, index).next_back() } diff --git a/src/any_tree.rs b/src/any_tree.rs index 29cfbaf6b..79e3767c2 100644 --- a/src/any_tree.rs +++ b/src/any_tree.rs @@ -2,16 +2,16 @@ // This source code is licensed under both the Apache 2.0 and MIT License // (found in the LICENSE-* files in the repository) -use crate::{BlobTree, Tree}; +use crate::{fs::FileSystem, BlobTree, Tree}; use enum_dispatch::enum_dispatch; /// May be a standard [`Tree`] or a [`BlobTree`] #[derive(Clone)] -#[enum_dispatch(AbstractTree)] -pub enum AnyTree { +#[enum_dispatch(AbstractTree)] +pub enum AnyTree { /// Standard LSM-tree, see [`Tree`] - Standard(Tree), + Standard(Tree), /// Key-value separated LSM-tree, see [`BlobTree`] - Blob(BlobTree), + Blob(BlobTree), } diff --git a/src/blob_tree/gc.rs b/src/blob_tree/gc.rs index 040adf0e3..6387a954b 100644 --- a/src/blob_tree/gc.rs +++ b/src/blob_tree/gc.rs @@ -4,7 +4,7 @@ use crate::{ blob_tree::handle::BlobIndirection, coding::Decode, compaction::stream::ExpiredKvCallback, - version::BlobFileList, vlog::BlobFileId, + fs::FileSystem, version::BlobFileList, vlog::BlobFileId, }; /// Tracks fragmentation information in a blob file @@ -58,7 +58,7 @@ impl FragmentationMap { /// Removes blob file entries that are not part of the value log (anymore) /// to reduce linear memory growth. - pub fn prune(&mut self, value_log: &BlobFileList) { + pub fn prune(&mut self, value_log: &BlobFileList) { self.0.retain(|&k, _| value_log.contains_key(k)); } diff --git a/src/blob_tree/ingest.rs b/src/blob_tree/ingest.rs index 5d4622ade..1b4a1d2f4 100644 --- a/src/blob_tree/ingest.rs +++ b/src/blob_tree/ingest.rs @@ -5,6 +5,7 @@ use crate::{ blob_tree::handle::BlobIndirection, file::BLOBS_FOLDER, + fs::FileSystem, table::Table, tree::ingest::Ingestion as TableIngestion, vlog::{BlobFileWriter, ValueHandle}, @@ -17,22 +18,22 @@ use crate::{ /// /// Uses table ingestion for the index and a blob file writer for large /// values so both streams advance together. -pub struct BlobIngestion<'a> { - tree: &'a crate::BlobTree, - pub(crate) table: TableIngestion<'a>, - pub(crate) blob: BlobFileWriter, +pub struct BlobIngestion<'a, F: FileSystem> { + tree: &'a crate::BlobTree, + pub(crate) table: TableIngestion<'a, F>, + pub(crate) blob: BlobFileWriter, seqno: SeqNo, separation_threshold: u32, last_key: Option, } -impl<'a> BlobIngestion<'a> { +impl<'a, F: FileSystem + 'static> BlobIngestion<'a, F> { /// Creates a new ingestion. /// /// # Errors /// /// Will return `Err` if an IO error occurs. - pub fn new(tree: &'a crate::BlobTree) -> crate::Result { + pub fn new(tree: &'a crate::BlobTree) -> crate::Result { #[expect( clippy::expect_used, reason = "cannot define blob tree without kv separation options" @@ -47,7 +48,7 @@ impl<'a> BlobIngestion<'a> { let blob_file_size = kv.file_target_size; let table = TableIngestion::new(&tree.index)?; - let blob = BlobFileWriter::new( + let blob = BlobFileWriter::::new( tree.index.0.blob_file_id_counter.clone(), tree.index.config.path.join(BLOBS_FOLDER), tree.index.id, @@ -226,8 +227,8 @@ impl<'a> BlobIngestion<'a> { // pressure unnecessarily. let created_tables = results .into_iter() - .map(|(table_id, checksum)| -> crate::Result
{ - Table::recover( + .map(|(table_id, checksum)| -> crate::Result> { + Table::::recover( index .config .path @@ -270,7 +271,7 @@ impl<'a> BlobIngestion<'a> { // Perform maintenance on the version history (e.g., clean up old versions). // We use gc_watermark=0 since ingestion doesn't affect sealed memtables. - if let Err(e) = version_lock.maintenance(&index.config.path, 0) { + if let Err(e) = version_lock.maintenance::(&index.config.path, 0) { log::warn!("Version GC failed: {e:?}"); } @@ -278,7 +279,7 @@ impl<'a> BlobIngestion<'a> { } #[inline] - fn index(&self) -> &crate::Tree { + fn index(&self) -> &crate::Tree { &self.tree.index } } diff --git a/src/blob_tree/mod.rs b/src/blob_tree/mod.rs index 6e779be58..08cd178f0 100644 --- a/src/blob_tree/mod.rs +++ b/src/blob_tree/mod.rs @@ -11,6 +11,7 @@ pub use gc::{FragmentationEntry, FragmentationMap}; use crate::{ coding::Decode, + fs::FileSystem, iter_guard::{IterGuard, IterGuardImpl}, r#abstract::{AbstractTree, RangeItem}, table::Table, @@ -28,13 +29,13 @@ use std::{ }; /// Iterator value guard -pub struct Guard { - tree: crate::BlobTree, - version: Version, +pub struct Guard { + tree: crate::BlobTree, + version: Version, kv: crate::Result, } -impl IterGuard for Guard { +impl IterGuard for Guard { fn into_inner_if( self, pred: impl Fn(&UserKey) -> bool, @@ -82,11 +83,11 @@ impl IterGuard for Guard { } } -fn resolve_value_handle( +fn resolve_value_handle( tree_id: TreeId, blobs_folder: &Path, cache: &Cache, - version: &Version, + version: &Version, item: InternalValue, ) -> RangeItem { if item.key.value_type.is_indirection() { @@ -94,7 +95,7 @@ fn resolve_value_handle( let vptr = BlobIndirection::decode_from(&mut cursor)?; // Resolve indirection using value log - match Accessor::new(&version.blob_files).get( + match Accessor::new(version.blob_files.as_ref()).get( tree_id, blobs_folder, &item.key.user_key, @@ -126,24 +127,32 @@ fn resolve_value_handle( /// This tree is a composite structure, consisting of an /// index tree (LSM-tree) and a log-structured value log /// to reduce write amplification. -#[derive(Clone)] -pub struct BlobTree { +pub struct BlobTree { /// Index tree that holds value handles or small inline values #[doc(hidden)] - pub index: crate::Tree, + pub index: crate::Tree, blobs_folder: Arc, } -impl BlobTree { - pub(crate) fn open(config: Config) -> crate::Result { +impl Clone for BlobTree { + fn clone(&self) -> Self { + Self { + index: self.index.clone(), + blobs_folder: self.blobs_folder.clone(), + } + } +} + +impl BlobTree { + pub(crate) fn open(config: Config) -> crate::Result { use crate::file::{fsync_directory, BLOBS_FOLDER}; let index = crate::Tree::open(config)?; let blobs_folder = index.config.path.join(BLOBS_FOLDER); - std::fs::create_dir_all(&blobs_folder)?; - fsync_directory(&blobs_folder)?; + F::create_dir_all(&blobs_folder)?; + fsync_directory::(&blobs_folder)?; let blob_file_id_to_continue_with = index .current_version() @@ -165,18 +174,17 @@ impl BlobTree { } } -impl AbstractTree for BlobTree { +impl AbstractTree for BlobTree { fn print_trace(&self, key: &[u8]) -> crate::Result<()> { self.index.print_trace(key) } - fn table_file_cache_size(&self) -> usize { self.index.table_file_cache_size() } fn get_version_history_lock( &self, - ) -> std::sync::RwLockWriteGuard<'_, crate::version::SuperVersions> { + ) -> std::sync::RwLockWriteGuard<'_, crate::version::SuperVersions> { self.index.get_version_history_lock() } @@ -192,7 +200,7 @@ impl AbstractTree for BlobTree { self.index.get_internal_entry(key, seqno) } - fn current_version(&self) -> Version { + fn current_version(&self) -> Version { self.index.current_version() } @@ -210,7 +218,7 @@ impl AbstractTree for BlobTree { prefix: K, seqno: SeqNo, index: Option<(Arc, SeqNo)>, - ) -> Box + Send + 'static> { + ) -> Box> + Send + 'static> { use crate::range::prefix_to_range; let super_version = self.index.get_version_for_snapshot(seqno); @@ -219,15 +227,14 @@ impl AbstractTree for BlobTree { let range = prefix_to_range(prefix.as_ref()); Box::new( - crate::Tree::create_internal_range(super_version.clone(), &range, seqno, index).map( - move |kv| { - IterGuardImpl::Blob(Guard { + crate::Tree::::create_internal_range(super_version.clone(), &range, seqno, index) + .map(move |kv| { + IterGuardImpl::::Blob(Guard { tree: tree.clone(), version: super_version.version.clone(), kv, }) - }, - ), + }), ) } @@ -236,20 +243,19 @@ impl AbstractTree for BlobTree { range: R, seqno: SeqNo, index: Option<(Arc, SeqNo)>, - ) -> Box + Send + 'static> { + ) -> Box> + Send + 'static> { let super_version = self.index.get_version_for_snapshot(seqno); let tree = self.clone(); Box::new( - crate::Tree::create_internal_range(super_version.clone(), &range, seqno, index).map( - move |kv| { - IterGuardImpl::Blob(Guard { + crate::Tree::::create_internal_range(super_version.clone(), &range, seqno, index) + .map(move |kv| { + IterGuardImpl::::Blob(Guard { tree: tree.clone(), version: super_version.version.clone(), kv, }) - }, - ), + }), ) } @@ -335,7 +341,7 @@ impl AbstractTree for BlobTree { fn flush_to_tables( &self, stream: impl Iterator>, - ) -> crate::Result, Option>)>> { + ) -> crate::Result>, Option>>)>> { use crate::{ coding::Encode, file::BLOBS_FOLDER, file::TABLES_FOLDER, table::multi_writer::MultiWriter, @@ -364,7 +370,7 @@ impl AbstractTree for BlobTree { log::debug!("=> to table(s) in {}", table_folder.display()); log::debug!("=> to blob file(s) at {}", self.blobs_folder.display()); - let mut table_writer = MultiWriter::new( + let mut table_writer = MultiWriter::::new( table_folder.clone(), self.index.table_id_counter.clone(), 64 * 1_024 * 1_024, @@ -404,7 +410,7 @@ impl AbstractTree for BlobTree { .as_ref() .expect("kv separation options should exist"); - let mut blob_writer = BlobFileWriter::new( + let mut blob_writer = BlobFileWriter::::new( self.index.0.blob_file_id_counter.clone(), self.index.config.path.join(BLOBS_FOLDER), self.id(), @@ -469,8 +475,8 @@ impl AbstractTree for BlobTree { // Load tables let tables = result .into_iter() - .map(|(table_id, checksum)| -> crate::Result
{ - Table::recover( + .map(|(table_id, checksum)| -> crate::Result> { + Table::::recover( table_folder.join(table_id.to_string()), checksum, 0, @@ -490,8 +496,8 @@ impl AbstractTree for BlobTree { fn register_tables( &self, - tables: &[Table], - blob_files: Option<&[BlobFile]>, + tables: &[Table], + blob_files: Option<&[BlobFile]>, frag_map: Option, sealed_memtables_to_delete: &[MemtableId], gc_watermark: SeqNo, @@ -507,7 +513,7 @@ impl AbstractTree for BlobTree { fn compact( &self, - strategy: Arc, + strategy: Arc>, seqno_threshold: SeqNo, ) -> crate::Result<()> { self.index.compact(strategy, seqno_threshold) @@ -517,7 +523,7 @@ impl AbstractTree for BlobTree { self.index.get_next_table_id() } - fn tree_config(&self) -> &Config { + fn tree_config(&self) -> &Config { &self.index.config } diff --git a/src/compaction/drop_range.rs b/src/compaction/drop_range.rs index 4134a195b..8e2f84b00 100644 --- a/src/compaction/drop_range.rs +++ b/src/compaction/drop_range.rs @@ -4,6 +4,7 @@ use super::{Choice, CompactionStrategy}; use crate::compaction::state::CompactionState; +use crate::fs::FileSystem; use crate::version::Version; use crate::{config::Config, slice::Slice, version::run::Ranged, KeyRange}; use crate::{HashSet, Table}; @@ -67,12 +68,12 @@ impl Strategy { } } -impl CompactionStrategy for Strategy { +impl CompactionStrategy for Strategy { fn get_name(&self) -> &'static str { "DropRangeCompaction" } - fn choose(&self, version: &Version, _: &Config, state: &CompactionState) -> Choice { + fn choose(&self, version: &Version, _: &Config, state: &CompactionState) -> Choice { let table_ids: HashSet<_> = version .iter_levels() .flat_map(|lvl| lvl.iter()) diff --git a/src/compaction/fifo.rs b/src/compaction/fifo.rs index 02a4aacea..d13024098 100644 --- a/src/compaction/fifo.rs +++ b/src/compaction/fifo.rs @@ -4,8 +4,8 @@ use super::{Choice, CompactionStrategy}; use crate::{ - compaction::state::CompactionState, config::Config, time::unix_timestamp, version::Version, - HashSet, KvPair, + compaction::state::CompactionState, config::Config, fs::FileSystem, time::unix_timestamp, + version::Version, HashSet, KvPair, }; #[doc(hidden)] @@ -45,7 +45,7 @@ impl Strategy { } } -impl CompactionStrategy for Strategy { +impl CompactionStrategy for Strategy { fn get_name(&self) -> &'static str { NAME } @@ -71,7 +71,7 @@ impl CompactionStrategy for Strategy { ] } - fn choose(&self, version: &Version, _: &Config, state: &CompactionState) -> Choice { + fn choose(&self, version: &Version, _: &Config, state: &CompactionState) -> Choice { let first_level = version.l0(); // Early return avoids unnecessary work and keeps FIFO a no-op when there is nothing to do. diff --git a/src/compaction/flavour.rs b/src/compaction/flavour.rs index 0f7d78dbf..b2e7272cb 100644 --- a/src/compaction/flavour.rs +++ b/src/compaction/flavour.rs @@ -8,6 +8,7 @@ use crate::coding::{Decode, Encode}; use crate::compaction::worker::Options; use crate::compaction::Input as CompactionPayload; use crate::file::TABLES_FOLDER; +use crate::fs::FileSystem; use crate::table::multi_writer::MultiWriter; use crate::version::{SuperVersions, Version}; use crate::vlog::blob_file::scanner::ScanEntry; @@ -41,11 +42,11 @@ fn drain_blobs>>( Ok(()) } -pub(super) fn prepare_table_writer( - version: &Version, - opts: &Options, +pub(super) fn prepare_table_writer( + version: &Version, + opts: &Options, payload: &CompactionPayload, -) -> crate::Result { +) -> crate::Result> { let table_base_folder = opts.config.path.join(TABLES_FOLDER); let dst_lvl = payload.canonical_level.into(); @@ -72,7 +73,7 @@ pub(super) fn prepare_table_writer( opts.mvcc_gc_watermark, ); - let mut table_writer = MultiWriter::new( + let mut table_writer = MultiWriter::::new( table_base_folder, opts.table_id_generator.clone(), payload.target_size, @@ -117,14 +118,14 @@ pub(super) fn prepare_table_writer( } // TODO: find a better name -pub(super) trait CompactionFlavour { +pub(super) trait CompactionFlavour { fn write(&mut self, item: InternalValue) -> crate::Result<()>; #[warn(clippy::too_many_arguments)] fn finish( self: Box, - super_version: &mut SuperVersions, - opts: &Options, + super_version: &mut SuperVersions, + opts: &Options, payload: &CompactionPayload, dst_lvl: usize, blob_frag_map: FragmentationMap, @@ -132,20 +133,20 @@ pub(super) trait CompactionFlavour { } /// Compaction worker that will relocate blobs that sit in blob files that are being rewritten -pub struct RelocatingCompaction { - inner: StandardCompaction, - blob_scanner: Peekable, - blob_writer: BlobFileWriter, +pub struct RelocatingCompaction { + inner: StandardCompaction, + blob_scanner: Peekable>, + blob_writer: BlobFileWriter, rewriting_blob_file_ids: HashSet, - rewriting_blob_files: Vec, + rewriting_blob_files: Vec>, } -impl RelocatingCompaction { +impl RelocatingCompaction { pub fn new( - inner: StandardCompaction, - blob_scanner: Peekable, - blob_writer: BlobFileWriter, - rewriting_blob_files: Vec, + inner: StandardCompaction, + blob_scanner: Peekable>, + blob_writer: BlobFileWriter, + rewriting_blob_files: Vec>, ) -> Self { Self { inner, @@ -162,7 +163,7 @@ impl RelocatingCompaction { } } -impl CompactionFlavour for RelocatingCompaction { +impl CompactionFlavour for RelocatingCompaction { fn write(&mut self, item: InternalValue) -> crate::Result<()> { if item.key.value_type.is_indirection() { let mut reader = &item.value[..]; @@ -248,8 +249,8 @@ impl CompactionFlavour for RelocatingCompaction { fn finish( mut self: Box, - super_version: &mut SuperVersions, - opts: &Options, + super_version: &mut SuperVersions, + opts: &Options, payload: &CompactionPayload, dst_lvl: usize, blob_frag_map_diff: FragmentationMap, @@ -317,14 +318,14 @@ impl CompactionFlavour for RelocatingCompaction { } /// Standard compaction worker that just passes through all its data -pub struct StandardCompaction { +pub struct StandardCompaction { start: Instant, - table_writer: MultiWriter, - tables_to_rewrite: Vec
, + table_writer: MultiWriter, + tables_to_rewrite: Vec>, } -impl StandardCompaction { - pub fn new(table_writer: MultiWriter, tables_to_rewrite: Vec
) -> Self { +impl StandardCompaction { + pub fn new(table_writer: MultiWriter, tables_to_rewrite: Vec>) -> Self { Self { start: Instant::now(), table_writer, @@ -332,7 +333,7 @@ impl StandardCompaction { } } - fn consume_writer(self, opts: &Options, dst_lvl: usize) -> crate::Result> { + fn consume_writer(self, opts: &Options, dst_lvl: usize) -> crate::Result>> { let table_base_folder = self.table_writer.base_path.clone(); let pin_filter = opts.config.filter_block_pinning_policy.get(dst_lvl); @@ -341,8 +342,8 @@ impl StandardCompaction { self.table_writer .finish()? .into_iter() - .map(|(table_id, checksum)| -> crate::Result
{ - Table::recover( + .map(|(table_id, checksum)| -> crate::Result> { + Table::::recover( table_base_folder.join(table_id.to_string()), checksum, 0, @@ -359,7 +360,7 @@ impl StandardCompaction { } } -impl CompactionFlavour for StandardCompaction { +impl CompactionFlavour for StandardCompaction { fn write(&mut self, item: InternalValue) -> crate::Result<()> { let indirection = if item.key.value_type.is_indirection() { Some({ @@ -381,8 +382,8 @@ impl CompactionFlavour for StandardCompaction { fn finish( mut self: Box, - super_version: &mut SuperVersions, - opts: &Options, + super_version: &mut SuperVersions, + opts: &Options, payload: &CompactionPayload, dst_lvl: usize, blob_frag_map: FragmentationMap, diff --git a/src/compaction/leveled/mod.rs b/src/compaction/leveled/mod.rs index e60990d47..1c6b31222 100644 --- a/src/compaction/leveled/mod.rs +++ b/src/compaction/leveled/mod.rs @@ -9,6 +9,7 @@ use super::{Choice, CompactionStrategy, Input as CompactionInput}; use crate::{ compaction::state::{hidden_set::HiddenSet, CompactionState}, config::Config, + fs::FileSystem, slice_windows::{GrowingWindowsExt, ShrinkingWindowsExt}, table::{util::aggregate_run_key_range, Table}, version::{Run, Version}, @@ -16,9 +17,9 @@ use crate::{ }; /// Tries to find the most optimal compaction set from one level into the other. -fn pick_minimal_compaction( - curr_run: &Run
, - next_run: Option<&Run
>, +fn pick_minimal_compaction( + curr_run: &Run>, + next_run: Option<&Run>>, hidden_set: &HiddenSet, overshoot: u64, table_base_size: u64, @@ -232,7 +233,7 @@ impl Strategy { } } -impl CompactionStrategy for Strategy { +impl CompactionStrategy for Strategy { fn get_name(&self) -> &'static str { NAME } @@ -274,7 +275,7 @@ impl CompactionStrategy for Strategy { } #[expect(clippy::too_many_lines)] - fn choose(&self, version: &Version, _: &Config, state: &CompactionState) -> Choice { + fn choose(&self, version: &Version, _: &Config, state: &CompactionState) -> Choice { assert!(version.level_count() == 7, "should have exactly 7 levels"); // Trivial move into Lmax diff --git a/src/compaction/major.rs b/src/compaction/major.rs index 0e973cc87..50ff0e881 100644 --- a/src/compaction/major.rs +++ b/src/compaction/major.rs @@ -4,7 +4,8 @@ use super::{Choice, CompactionStrategy, Input as CompactionInput}; use crate::{ - compaction::state::CompactionState, config::Config, table::Table, version::Version, HashSet, + compaction::state::CompactionState, config::Config, fs::FileSystem, table::Table, + version::Version, HashSet, }; /// Compacts all tables into the last level @@ -28,12 +29,12 @@ impl Default for Strategy { } } -impl CompactionStrategy for Strategy { +impl CompactionStrategy for Strategy { fn get_name(&self) -> &'static str { "MajorCompaction" } - fn choose(&self, version: &Version, cfg: &Config, state: &CompactionState) -> Choice { + fn choose(&self, version: &Version, cfg: &Config, state: &CompactionState) -> Choice { let table_ids: HashSet<_> = version.iter_tables().map(Table::id).collect(); // NOTE: This should generally not occur because of the diff --git a/src/compaction/mod.rs b/src/compaction/mod.rs index 91c81cde2..6434c83fa 100644 --- a/src/compaction/mod.rs +++ b/src/compaction/mod.rs @@ -33,7 +33,8 @@ pub use movedown::Strategy as MoveDown; pub use pulldown::Strategy as PullDown; use crate::{ - compaction::state::CompactionState, config::Config, version::Version, HashSet, KvPair, TableId, + compaction::state::CompactionState, config::Config, fs::FileSystem, version::Version, HashSet, + KvPair, TableId, }; /// Input for compactor. @@ -82,7 +83,7 @@ pub enum Choice { /// The strategy receives the levels of the LSM-tree as argument /// and emits a choice on what to do. #[expect(clippy::module_name_repetitions)] -pub trait CompactionStrategy { +pub trait CompactionStrategy { /// Gets the compaction strategy name. fn get_name(&self) -> &'static str; @@ -92,5 +93,5 @@ pub trait CompactionStrategy { } /// Decides on what to do based on the current state of the LSM-tree's levels - fn choose(&self, version: &Version, config: &Config, state: &CompactionState) -> Choice; + fn choose(&self, version: &Version, config: &Config, state: &CompactionState) -> Choice; } diff --git a/src/compaction/movedown.rs b/src/compaction/movedown.rs index 2b9902a88..ae39d33dd 100644 --- a/src/compaction/movedown.rs +++ b/src/compaction/movedown.rs @@ -3,17 +3,19 @@ // (found in the LICENSE-* files in the repository) use super::{Choice, CompactionStrategy, Input}; -use crate::{compaction::state::CompactionState, table::Table, version::Version, Config}; +use crate::{ + compaction::state::CompactionState, fs::FileSystem, table::Table, version::Version, Config, +}; /// Moves down a level into the destination level. pub struct Strategy(pub u8, pub u8); -impl CompactionStrategy for Strategy { +impl CompactionStrategy for Strategy { fn get_name(&self) -> &'static str { "MoveDownCompaction" } - fn choose(&self, version: &Version, _: &Config, state: &CompactionState) -> Choice { + fn choose(&self, version: &Version, _: &Config, state: &CompactionState) -> Choice { if version.level_is_busy(usize::from(self.0), state.hidden_set()) { return Choice::DoNothing; } diff --git a/src/compaction/pulldown.rs b/src/compaction/pulldown.rs index 09a558fd5..f29094f76 100644 --- a/src/compaction/pulldown.rs +++ b/src/compaction/pulldown.rs @@ -5,6 +5,7 @@ use super::{Choice, CompactionStrategy}; use crate::{ compaction::{state::CompactionState, Input}, + fs::FileSystem, version::Version, Config, }; @@ -14,13 +15,13 @@ use crate::{ /// Used for unit tests. pub struct Strategy(pub u8, pub u8); -impl CompactionStrategy for Strategy { +impl CompactionStrategy for Strategy { fn get_name(&self) -> &'static str { "PullDownCompaction" } #[expect(clippy::expect_used)] - fn choose(&self, version: &Version, _: &Config, _: &CompactionState) -> Choice { + fn choose(&self, version: &Version, _: &Config, _: &CompactionState) -> Choice { let level = version .level(usize::from(self.0)) .expect("source level should exist"); diff --git a/src/compaction/worker.rs b/src/compaction/worker.rs index 94951f984..6ca1b975a 100644 --- a/src/compaction/worker.rs +++ b/src/compaction/worker.rs @@ -12,6 +12,7 @@ use crate::{ Choice, }, file::BLOBS_FOLDER, + fs::FileSystem, merge::Merger, run_scanner::RunScanner, stop_signal::StopSignal, @@ -31,7 +32,7 @@ use crate::metrics::Metrics; pub type CompactionReader<'a> = Box> + 'a>; /// Compaction options -pub struct Options { +pub struct Options { pub tree_id: TreeId, pub global_seqno: SequenceNumberCounter, @@ -43,12 +44,12 @@ pub struct Options { pub blob_file_id_generator: SequenceNumberCounter, /// Configuration of tree. - pub config: Arc, + pub config: Arc>, - pub version_history: Arc>, + pub version_history: Arc>>, /// Compaction strategy to use. - pub strategy: Arc, + pub strategy: Arc>, /// Stop signal to interrupt a compaction worker in case /// the tree is dropped. @@ -63,8 +64,8 @@ pub struct Options { pub metrics: Arc, } -impl Options { - pub fn from_tree(tree: &crate::Tree, strategy: Arc) -> Self { +impl Options { + pub fn from_tree(tree: &crate::Tree, strategy: Arc>) -> Self { Self { global_seqno: tree.config.seqno.clone(), visible_seqno: tree.config.visible_seqno.clone(), @@ -88,7 +89,7 @@ impl Options { /// Runs compaction task. /// /// This will block until the compactor is fully finished. -pub fn do_compaction(opts: &Options) -> crate::Result<()> { +pub fn do_compaction(opts: &Options) -> crate::Result<()> { #[expect(clippy::expect_used, reason = "lock is expected to not be poisoned")] let compaction_state = opts.compaction_state.lock().expect("lock is poisoned"); @@ -102,7 +103,7 @@ pub fn do_compaction(opts: &Options) -> crate::Result<()> { ); let choice = opts.strategy.choose( &version_history_lock.latest_version().version, - &opts.config, + opts.config.as_ref(), &compaction_state, ); @@ -133,7 +134,10 @@ pub fn do_compaction(opts: &Options) -> crate::Result<()> { } } -fn pick_run_indexes(run: &Run
, to_compact: &[TableId]) -> Option<(usize, usize)> { +fn pick_run_indexes( + run: &Run>, + to_compact: &[TableId], +) -> Option<(usize, usize)> { let lo = run .iter() .position(|table| to_compact.contains(&table.id()))?; @@ -145,8 +149,8 @@ fn pick_run_indexes(run: &Run
, to_compact: &[TableId]) -> Option<(usize, Some((lo, hi)) } -fn create_compaction_stream<'a>( - version: &Version, +fn create_compaction_stream<'a, F: FileSystem + 'a>( + version: &Version, to_compact: &[TableId], eviction_seqno: SeqNo, ) -> crate::Result>>>> { @@ -159,7 +163,7 @@ fn create_compaction_stream<'a>( continue; }; - readers.push(Box::new(RunScanner::culled( + readers.push(Box::new(RunScanner::::culled( run.clone(), (Some(lo), Some(hi)), )?)); @@ -180,9 +184,9 @@ fn create_compaction_stream<'a>( }) } -fn move_tables( +fn move_tables( compaction_state: &MutexGuard<'_, CompactionState>, - opts: &Options, + opts: &Options, payload: &CompactionPayload, ) -> crate::Result<()> { #[expect(clippy::expect_used, reason = "lock is expected to not be poisoned")] @@ -217,7 +221,8 @@ fn move_tables( &opts.visible_seqno, )?; - if let Err(e) = version_history_lock.maintenance(&opts.config.path, opts.mvcc_gc_watermark) { + if let Err(e) = version_history_lock.maintenance::(&opts.config.path, opts.mvcc_gc_watermark) + { log::error!("Manifest maintenance failed: {e:?}"); return Err(e); } @@ -226,11 +231,11 @@ fn move_tables( } /// Picks blob files to rewrite (defragment) -fn pick_blob_files_to_rewrite( +fn pick_blob_files_to_rewrite( picked_tables: &HashSet, - current_version: &Version, + current_version: &Version, blob_opts: &crate::KvSeparationOptions, -) -> crate::Result> { +) -> crate::Result>> { use crate::Table; // We start off by getting all the blob files that are referenced by the tables @@ -306,9 +311,9 @@ fn pick_blob_files_to_rewrite( Ok(linked_blob_files.into_iter().cloned().collect::>()) } -fn hidden_guard( +fn hidden_guard( payload: &CompactionPayload, - opts: &Options, + opts: &Options, f: impl FnOnce() -> crate::Result<()>, ) -> crate::Result<()> { f().inspect_err(|e| { @@ -325,10 +330,10 @@ fn hidden_guard( } #[expect(clippy::too_many_lines)] -fn merge_tables( +fn merge_tables( mut compaction_state: MutexGuard<'_, CompactionState>, - version_history_lock: RwLockReadGuard<'_, SuperVersions>, - opts: &Options, + version_history_lock: RwLockReadGuard<'_, SuperVersions>, + opts: &Options, payload: &CompactionPayload, ) -> crate::Result<()> { if opts.stop_signal.is_stopped() { @@ -407,7 +412,7 @@ fn merge_tables( log::debug!("No blob relocation needed"); Box::new(StandardCompaction::new(table_writer, tables)) - as Box + as Box> } else { log::debug!( "Relocate blob files: {:?}", @@ -417,14 +422,14 @@ fn merge_tables( .collect::>(), ); - let scanner = BlobFileMergeScanner::new( + let scanner = BlobFileMergeScanner::::new( blob_files_to_rewrite .iter() - .map(|bf| BlobFileScanner::new(&bf.0.path, bf.id())) + .map(|bf| BlobFileScanner::::new_with_fs(&bf.0.path, bf.id())) .collect::>>()?, ); - let writer = BlobFileWriter::new( + let writer = BlobFileWriter::::new( opts.blob_file_id_generator.clone(), opts.config.path.join(BLOBS_FOLDER), opts.tree_id, @@ -507,7 +512,7 @@ fn merge_tables( .show(payload.table_ids.iter().copied()); version_history_lock - .maintenance(&opts.config.path, opts.mvcc_gc_watermark) + .maintenance::(&opts.config.path, opts.mvcc_gc_watermark) .inspect_err(|e| { log::error!("Manifest maintenance failed: {e:?}"); })?; @@ -520,9 +525,9 @@ fn merge_tables( Ok(()) } -fn drop_tables( +fn drop_tables( compaction_state: MutexGuard<'_, CompactionState>, - opts: &Options, + opts: &Options, ids_to_drop: &[TableId], ) -> crate::Result<()> { #[expect(clippy::expect_used, reason = "lock is expected to not be poisoned")] @@ -579,7 +584,8 @@ fn drop_tables( &opts.visible_seqno, )?; - if let Err(e) = version_history_lock.maintenance(&opts.config.path, opts.mvcc_gc_watermark) { + if let Err(e) = version_history_lock.maintenance::(&opts.config.path, opts.mvcc_gc_watermark) + { log::error!("Manifest maintenance failed: {e:?}"); return Err(e); } @@ -610,6 +616,7 @@ mod tests { use crate::{ compaction::{state::CompactionState, Choice, CompactionStrategy, Input}, config::BlockSizePolicy, + fs::FileSystem, version::Version, AbstractTree, Config, KvSeparationOptions, SequenceNumberCounter, TableId, }; @@ -820,12 +827,12 @@ mod tests { fn blob_file_picking_simple() -> crate::Result<()> { struct InPlaceStrategy(Vec); - impl CompactionStrategy for InPlaceStrategy { + impl CompactionStrategy for InPlaceStrategy { fn get_name(&self) -> &'static str { "InPlaceCompaction" } - fn choose(&self, _: &Version, _: &Config, _: &CompactionState) -> Choice { + fn choose(&self, _: &Version, _: &Config, _: &CompactionState) -> Choice { Choice::Merge(Input { table_ids: self.0.iter().copied().collect(), dest_level: 6, diff --git a/src/config/mod.rs b/src/config/mod.rs index ae5e2a4f9..a8dbe0825 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -20,10 +20,13 @@ pub use restart_interval::RestartIntervalPolicy; pub type PartitioningPolicy = PinningPolicy; use crate::{ - path::absolute_path, version::DEFAULT_LEVEL_COUNT, AnyTree, BlobTree, Cache, CompressionType, - DescriptorTable, SequenceNumberCounter, Tree, + fs::{FileSystem, StdFileSystem}, + path::absolute_path, + version::DEFAULT_LEVEL_COUNT, + AnyTree, BlobTree, Cache, CompressionType, DescriptorTable, SequenceNumberCounter, Tree, }; use std::{ + marker::PhantomData, path::{Path, PathBuf}, sync::Arc, }; @@ -159,7 +162,7 @@ impl KvSeparationOptions { } /// Tree configuration builder -pub struct Config { +pub struct Config { /// Folder path #[doc(hidden)] pub path: PathBuf, @@ -170,7 +173,7 @@ pub struct Config { /// Descriptor table to use #[doc(hidden)] - pub descriptor_table: Option>, + pub descriptor_table: Option>>, /// Number of levels of the LSM tree (depth of tree) /// @@ -235,10 +238,13 @@ pub struct Config { pub(crate) seqno: SequenceNumberCounter, pub(crate) visible_seqno: SequenceNumberCounter, + + #[doc(hidden)] + pub(crate) phantom: PhantomData, } // TODO: remove default? -impl Default for Config { +impl Default for Config { fn default() -> Self { Self { path: absolute_path(Path::new(DEFAULT_FILE_FOLDER)), @@ -249,7 +255,7 @@ impl Default for Config { cache: Arc::new(Cache::with_capacity_bytes( /* 16 MiB */ 16 * 1_024 * 1_024, )), - + phantom: PhantomData, data_block_restart_interval_policy: RestartIntervalPolicy::all(16), index_block_restart_interval_policy: RestartIntervalPolicy::all(1), @@ -293,9 +299,9 @@ impl Default for Config { } } -impl Config { - /// Initializes a new config - pub fn new>( +impl Config { + /// Initializes a new config for a specific filesystem implementation. + pub fn new_for_filesystem>( path: P, seqno: SequenceNumberCounter, visible_seqno: SequenceNumberCounter, @@ -307,7 +313,20 @@ impl Config { ..Default::default() } } +} + +impl Config { + /// Initializes a new config using the default [`StdFileSystem`]. + pub fn new>( + path: P, + seqno: SequenceNumberCounter, + visible_seqno: SequenceNumberCounter, + ) -> Self { + Self::new_for_filesystem(path, seqno, visible_seqno) + } +} +impl Config { /// Sets the global cache. /// /// You can create a global [`Cache`] and share it between multiple @@ -324,7 +343,11 @@ impl Config { /// /// Can be shared across trees. #[must_use] - pub fn use_descriptor_table(mut self, descriptor_table: Option>) -> Self { + #[doc(hidden)] + pub fn use_descriptor_table( + mut self, + descriptor_table: Option>>, + ) -> Self { self.descriptor_table = descriptor_table; self } @@ -454,13 +477,15 @@ impl Config { self.kv_separation_opts = opts; self } +} +impl Config { /// Opens a tree using the config. /// /// # Errors /// /// Will return `Err` if an IO error occurs. - pub fn open(self) -> crate::Result { + pub fn open(self) -> crate::Result> { Ok(if self.kv_separation_opts.is_some() { AnyTree::Blob(BlobTree::open(self)?) } else { diff --git a/src/descriptor_table.rs b/src/descriptor_table.rs index 12df8db2b..754147948 100644 --- a/src/descriptor_table.rs +++ b/src/descriptor_table.rs @@ -2,24 +2,24 @@ // This source code is licensed under both the Apache 2.0 and MIT License // (found in the LICENSE-* files in the repository) -use crate::GlobalTableId; +use crate::{fs::FileSystem, GlobalTableId}; use quick_cache::{sync::Cache as QuickCache, UnitWeighter}; -use std::{fs::File, sync::Arc}; +use std::sync::Arc; const TAG_BLOCK: u8 = 0; const TAG_BLOB: u8 = 1; -type Item = Arc; +type Item = Arc<::File>; #[derive(Eq, std::hash::Hash, PartialEq)] struct CacheKey(u8, u64, u64); /// Caches file descriptors to tables and blob files -pub struct DescriptorTable { - inner: QuickCache, +pub struct DescriptorTable { + inner: QuickCache, UnitWeighter, rustc_hash::FxBuildHasher>, } -impl DescriptorTable { +impl DescriptorTable { #[must_use] pub fn new(capacity: usize) -> Self { use quick_cache::sync::DefaultLifecycle; @@ -40,23 +40,23 @@ impl DescriptorTable { } #[must_use] - pub fn access_for_table(&self, id: &GlobalTableId) -> Option> { + pub fn access_for_table(&self, id: &GlobalTableId) -> Option> { let key = CacheKey(TAG_BLOCK, id.tree_id(), id.table_id()); self.inner.get(&key) } - pub fn insert_for_table(&self, id: GlobalTableId, item: Item) { + pub fn insert_for_table(&self, id: GlobalTableId, item: Item) { let key = CacheKey(TAG_BLOCK, id.tree_id(), id.table_id()); self.inner.insert(key, item); } #[must_use] - pub fn access_for_blob_file(&self, id: &GlobalTableId) -> Option> { + pub fn access_for_blob_file(&self, id: &GlobalTableId) -> Option> { let key = CacheKey(TAG_BLOB, id.tree_id(), id.table_id()); self.inner.get(&key) } - pub fn insert_for_blob_file(&self, id: GlobalTableId, item: Item) { + pub fn insert_for_blob_file(&self, id: GlobalTableId, item: Item) { let key = CacheKey(TAG_BLOB, id.tree_id(), id.table_id()); self.inner.insert(key, item); } diff --git a/src/file.rs b/src/file.rs index f958e4972..4d23c65fb 100644 --- a/src/file.rs +++ b/src/file.rs @@ -2,8 +2,11 @@ // This source code is licensed under both the Apache 2.0 and MIT License // (found in the LICENSE-* files in the repository) -use crate::Slice; -use std::{fs::File, io::Write, path::Path}; +use crate::{ + fs::{FileLike, FileSystem}, + Slice, +}; +use std::{io::Write, path::Path}; pub const MAGIC_BYTES: [u8; 4] = [b'L', b'S', b'M', 3]; @@ -12,7 +15,7 @@ pub const BLOBS_FOLDER: &str = "blobs"; pub const CURRENT_VERSION_FILE: &str = "current"; /// Reads bytes from a file using `pread`. -pub fn read_exact(file: &File, offset: u64, size: usize) -> std::io::Result { +pub fn read_exact(file: &impl FileLike, offset: u64, size: usize) -> std::io::Result { // SAFETY: This slice builder starts uninitialized, but we know its length // // We use read_at/seek_read which give us the number of bytes read @@ -25,32 +28,15 @@ pub fn read_exact(file: &File, offset: u64, size: usize) -> std::io::Result std::io::Result std::io::Result<()> { +pub fn rewrite_atomic(path: &Path, content: &[u8]) -> std::io::Result<()> { #[expect( clippy::expect_used, reason = "every file should have a parent directory" @@ -75,7 +61,7 @@ pub fn rewrite_atomic(path: &Path, content: &[u8]) -> std::io::Result<()> { // TODO: not sure why it fails on Windows... #[cfg(not(target_os = "windows"))] { - let file = std::fs::File::open(path)?; + let file = F::open(path)?; file.sync_all()?; #[expect( @@ -83,21 +69,21 @@ pub fn rewrite_atomic(path: &Path, content: &[u8]) -> std::io::Result<()> { reason = "files should always have a parent directory" )] let folder = path.parent().expect("should have parent folder"); - fsync_directory(folder)?; + fsync_directory::(folder)?; } Ok(()) } #[cfg(not(target_os = "windows"))] -pub fn fsync_directory(path: &Path) -> std::io::Result<()> { - let file = std::fs::File::open(path)?; +pub fn fsync_directory(path: &Path) -> std::io::Result<()> { + let file = F::open(path)?; debug_assert!(file.metadata()?.is_dir()); file.sync_all() } #[cfg(target_os = "windows")] -pub fn fsync_directory(path: &Path) -> std::io::Result<()> { +pub fn fsync_directory(_path: &Path) -> std::io::Result<()> { // Cannot fsync directory on Windows Ok(()) } @@ -105,7 +91,7 @@ pub fn fsync_directory(path: &Path) -> std::io::Result<()> { #[cfg(test)] mod tests { use super::*; - use std::fs::File; + use crate::fs::FileSystem; use std::io::Write; use test_log::test; @@ -115,13 +101,13 @@ mod tests { let path = dir.path().join("test.txt"); { - let mut file = File::create(&path)?; + let mut file = ::create(&path)?; write!(file, "asdasdasdasdasd")?; } - rewrite_atomic(&path, b"newcontent")?; + rewrite_atomic::(&path, b"newcontent")?; - let content = std::fs::read_to_string(&path)?; + let content = crate::fs::StdFileSystem::read_to_string(&path)?; assert_eq!("newcontent", content); Ok(()) diff --git a/src/file_accessor.rs b/src/file_accessor.rs index f5e04ff1c..354447082 100644 --- a/src/file_accessor.rs +++ b/src/file_accessor.rs @@ -2,24 +2,23 @@ // This source code is licensed under both the Apache 2.0 and MIT License // (found in the LICENSE-* files in the repository) -use crate::descriptor_table::DescriptorTable; use crate::GlobalTableId; -use std::{fs::File, sync::Arc}; +use crate::{descriptor_table::DescriptorTable, fs::FileSystem}; +use std::sync::Arc; /// Allows accessing a file (either cached or pinned) -#[derive(Clone)] -pub enum FileAccessor { +pub enum FileAccessor { /// Pinned file descriptor /// /// This is used in case file descriptor cache is `None` (to skip cache lookups) - File(Arc), + File(Arc), /// Access to file descriptor cache - DescriptorTable(Arc), + DescriptorTable(Arc>), } -impl FileAccessor { - pub fn as_descriptor_table(&self) -> Option<&DescriptorTable> { +impl FileAccessor { + pub fn as_descriptor_table(&self) -> Option<&DescriptorTable> { match self { Self::DescriptorTable(d) => Some(d), Self::File(_) => None, @@ -27,21 +26,21 @@ impl FileAccessor { } #[must_use] - pub fn access_for_table(&self, table_id: &GlobalTableId) -> Option> { + pub fn access_for_table(&self, table_id: &GlobalTableId) -> Option> { match self { Self::File(fd) => Some(fd.clone()), Self::DescriptorTable(descriptor_table) => descriptor_table.access_for_table(table_id), } } - pub fn insert_for_table(&self, table_id: GlobalTableId, fd: Arc) { + pub fn insert_for_table(&self, table_id: GlobalTableId, fd: Arc) { if let Self::DescriptorTable(descriptor_table) = self { descriptor_table.insert_for_table(table_id, fd); } } #[must_use] - pub fn access_for_blob_file(&self, table_id: &GlobalTableId) -> Option> { + pub fn access_for_blob_file(&self, table_id: &GlobalTableId) -> Option> { match self { Self::File(fd) => Some(fd.clone()), Self::DescriptorTable(descriptor_table) => { @@ -50,14 +49,25 @@ impl FileAccessor { } } - pub fn insert_for_blob_file(&self, table_id: GlobalTableId, fd: Arc) { + pub fn insert_for_blob_file(&self, table_id: GlobalTableId, fd: Arc) { if let Self::DescriptorTable(descriptor_table) = self { descriptor_table.insert_for_blob_file(table_id, fd); } } } -impl std::fmt::Debug for FileAccessor { +impl Clone for FileAccessor { + fn clone(&self) -> Self { + match self { + Self::File(fd) => Self::File(fd.clone()), + Self::DescriptorTable(descriptor_table) => { + Self::DescriptorTable(descriptor_table.clone()) + } + } + } +} + +impl std::fmt::Debug for FileAccessor { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { match self { Self::File(_) => write!(f, "FileAccessor::Pinned"), diff --git a/src/fs.rs b/src/fs.rs new file mode 100644 index 000000000..5e9427810 --- /dev/null +++ b/src/fs.rs @@ -0,0 +1,196 @@ +// Copyright (c) 2025-present, fjall-rs +// This source code is licensed under both the Apache 2.0 and MIT License +// (found in the LICENSE-* files in the repository) + +use std::{ + ffi::{OsStr, OsString}, + io, + io::{Read, Seek, Write}, + path::{Path, PathBuf}, +}; + +/// Minimal metadata needed by the storage layer. +#[derive(Clone, Copy, Debug)] +pub struct Metadata { + len: u64, + is_dir: bool, +} + +impl Metadata { + /// Returns the file length in bytes. + #[must_use] + pub fn len(&self) -> u64 { + self.len + } + + /// Returns whether the file length is zero. + #[must_use] + pub fn is_empty(&self) -> bool { + self.len == 0 + } + + /// Returns whether this entry is a directory. + #[must_use] + pub fn is_dir(&self) -> bool { + self.is_dir + } +} + +/// File abstraction for pluggable storage backends. +pub trait FileLike: Read + Write + Seek + Send + Sync { + /// Reads bytes at a given offset without changing the current cursor. + fn read_at(&self, buf: &mut [u8], offset: u64) -> io::Result; + /// Flushes file contents to durable storage. + fn sync_all(&self) -> io::Result<()>; + /// Retrieves minimal file metadata. + fn metadata(&self) -> io::Result; +} + +/// Filesystem abstraction for pluggable storage backends. +pub trait FileSystem: Send + Sync + std::panic::RefUnwindSafe + std::panic::UnwindSafe { + /// File handle type for this filesystem. + type File: FileLike; + + /// Opens an existing file for reading. + fn open(path: &Path) -> io::Result; + /// Creates or truncates a file for writing. + fn create(path: &Path) -> io::Result; + /// Creates a new file, failing if it already exists. + fn create_new(path: &Path) -> io::Result; + /// Reads a file into memory. + fn read(path: &Path) -> io::Result>; + /// Reads a UTF-8 file into a string. + fn read_to_string(path: &Path) -> io::Result; + /// Lists directory entries. + fn read_dir(path: &Path) -> io::Result>; + /// Creates a directory and all missing parents. + fn create_dir_all(path: &Path) -> io::Result<()>; + /// Removes a file. + fn remove_file(path: &Path) -> io::Result<()>; + /// Removes a directory and all its contents. + fn remove_dir_all(path: &Path) -> io::Result<()>; + /// Checks whether a path exists. + fn exists(path: &Path) -> io::Result; +} + +/// Lightweight directory entry used by [`FileSystem`]. +#[derive(Clone, Debug)] +pub struct DirEntry { + path: PathBuf, + file_name: OsString, + is_dir: bool, +} + +impl DirEntry { + /// Returns the full path for this entry. + #[must_use] + pub fn path(&self) -> &Path { + &self.path + } + + /// Returns the file name for this entry. + #[must_use] + pub fn file_name(&self) -> &OsStr { + &self.file_name + } + + /// Returns whether the entry is a directory. + #[must_use] + pub fn is_dir(&self) -> bool { + self.is_dir + } +} + +/// `std::fs`-backed filesystem implementation. +#[derive(Clone, Copy, Debug, Default)] +pub struct StdFileSystem; + +impl FileSystem for StdFileSystem { + type File = std::fs::File; + + fn open(path: &Path) -> io::Result { + Self::File::open(path) + } + + fn create(path: &Path) -> io::Result { + Self::File::create(path) + } + + fn create_new(path: &Path) -> io::Result { + Self::File::create_new(path) + } + + fn read(path: &Path) -> io::Result> { + std::fs::read(path) + } + + fn read_to_string(path: &Path) -> io::Result { + std::fs::read_to_string(path) + } + + fn read_dir(path: &Path) -> io::Result> { + std::fs::read_dir(path)? + .map(|entry| { + entry.and_then(|entry| { + let file_name = entry.file_name(); + let file_type = entry.file_type()?; + Ok(DirEntry { + path: entry.path(), + file_name, + is_dir: file_type.is_dir(), + }) + }) + }) + .collect() + } + + fn create_dir_all(path: &Path) -> io::Result<()> { + std::fs::create_dir_all(path) + } + + fn remove_file(path: &Path) -> io::Result<()> { + std::fs::remove_file(path) + } + + fn remove_dir_all(path: &Path) -> io::Result<()> { + std::fs::remove_dir_all(path) + } + + fn exists(path: &Path) -> io::Result { + path.try_exists() + } +} + +impl FileLike for std::fs::File { + fn read_at(&self, buf: &mut [u8], offset: u64) -> io::Result { + #[cfg(unix)] + { + use std::os::unix::fs::FileExt; + FileExt::read_at(self, buf, offset) + } + + #[cfg(windows)] + { + use std::os::windows::fs::FileExt; + self.seek_read(buf, offset) + } + + #[cfg(not(any(unix, windows)))] + { + compile_error!("unsupported platform"); + unimplemented!(); + } + } + + fn sync_all(&self) -> io::Result<()> { + std::fs::File::sync_all(self) + } + + fn metadata(&self) -> io::Result { + let metadata = std::fs::File::metadata(self)?; + Ok(Metadata { + len: metadata.len(), + is_dir: metadata.is_dir(), + }) + } +} diff --git a/src/ingestion.rs b/src/ingestion.rs index 3e693b70e..d8b53d169 100644 --- a/src/ingestion.rs +++ b/src/ingestion.rs @@ -3,7 +3,8 @@ // (found in the LICENSE-* files in the repository) use crate::{ - blob_tree::ingest::BlobIngestion, tree::ingest::Ingestion, AnyTree, UserKey, UserValue, + blob_tree::ingest::BlobIngestion, fs::FileSystem, tree::ingest::Ingestion, AnyTree, UserKey, + UserValue, }; /// Unified ingestion builder over `AnyTree` @@ -11,15 +12,15 @@ use crate::{ // Ingestion calls use `&mut self` in tight loops; the active variant is stable and branch prediction makes the match cheap. // Allowing this lint preserves hot-path performance at the cost of a larger enum size. #[expect(clippy::large_enum_variant)] -pub enum AnyIngestion<'a> { +pub enum AnyIngestion<'a, F: FileSystem> { /// Ingestion for a standard LSM-tree - Standard(Ingestion<'a>), + Standard(Ingestion<'a, F>), /// Ingestion for a [`BlobTree`] with KV separation - Blob(BlobIngestion<'a>), + Blob(BlobIngestion<'a, F>), } -impl AnyIngestion<'_> { +impl AnyIngestion<'_, F> { /// Writes a key-value pair. /// /// # Errors @@ -88,13 +89,13 @@ impl AnyIngestion<'_> { } } -impl AnyTree { +impl AnyTree { /// Starts an ingestion for any tree type (standard or blob). /// /// # Errors /// /// Will return `Err` if an IO error occurs. - pub fn ingestion(&self) -> crate::Result> { + pub fn ingestion(&self) -> crate::Result> { match self { Self::Standard(t) => Ok(AnyIngestion::Standard(Ingestion::new(t)?)), Self::Blob(b) => Ok(AnyIngestion::Blob(BlobIngestion::new(b)?)), diff --git a/src/iter_guard.rs b/src/iter_guard.rs index 63fb9312e..cd0c0818b 100644 --- a/src/iter_guard.rs +++ b/src/iter_guard.rs @@ -1,5 +1,6 @@ use crate::{ - blob_tree::Guard as BlobGuard, tree::Guard as StandardGuard, KvPair, UserKey, UserValue, + blob_tree::Guard as BlobGuard, fs::FileSystem, tree::Guard as StandardGuard, KvPair, UserKey, + UserValue, }; use enum_dispatch::enum_dispatch; @@ -55,10 +56,10 @@ pub trait IterGuard { /// Generic iterator value #[enum_dispatch(IterGuard)] -pub enum IterGuardImpl { +pub enum IterGuardImpl { /// Iterator value of a standard LSM-tree Standard(StandardGuard), /// Iterator value of a key-value separated tree - Blob(BlobGuard), + Blob(BlobGuard), } diff --git a/src/lib.rs b/src/lib.rs index b2ca250ad..205a4a54b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -101,6 +101,9 @@ mod error; #[doc(hidden)] pub mod file; +/// Filesystem abstraction +pub mod fs; + mod hash; mod ingestion; mod iter_guard; @@ -181,6 +184,7 @@ pub use { descriptor_table::DescriptorTable, error::{Error, Result}, format_version::FormatVersion, + fs::FileSystem, ingestion::AnyIngestion, iter_guard::IterGuard as Guard, memtable::{Memtable, MemtableId}, diff --git a/src/range.rs b/src/range.rs index c0cd5df94..0b938aa80 100644 --- a/src/range.rs +++ b/src/range.rs @@ -3,6 +3,7 @@ // (found in the LICENSE-* files in the repository) use crate::{ + fs::FileSystem, key::InternalKey, memtable::Memtable, merge::Merger, @@ -12,7 +13,6 @@ use crate::{ version::SuperVersion, BoxedIterator, InternalValue, }; -use self_cell::self_cell; use std::{ ops::{Bound, RangeBounds}, sync::Arc, @@ -65,23 +65,43 @@ pub fn prefix_to_range(prefix: &[u8]) -> (Bound, Bound) { /// The iter state references the memtables used while the range is open /// /// Because of Rust rules, the state is referenced using `self_cell`, see below. -pub struct IterState { - pub(crate) version: SuperVersion, +pub struct IterState { + pub(crate) version: SuperVersion, pub(crate) ephemeral: Option<(Arc, SeqNo)>, } type BoxedMerge<'a> = Box> + Send + 'a>; +type TreeIterJoinedCell<'a, F> = + self_cell::unsafe_self_cell::JoinedCell, BoxedMerge<'a>>; -self_cell!( - pub struct TreeIter { - owner: IterState, +// NOTE: We avoid `self_cell!` here because it doesn't support a generic `F`. +pub struct TreeIter { + unsafe_self_cell: + self_cell::unsafe_self_cell::UnsafeSelfCell, IterState, BoxedMerge<'static>>, +} + +impl TreeIter { + pub fn new( + owner: IterState, + dependent_builder: impl for<'a> FnOnce(&'a IterState) -> BoxedMerge<'a>, + ) -> Self { + // SAFETY: `self_cell` guarantees the dependent doesn't outlive `owner`. + unsafe { + self_cell::_self_cell_new_body!(TreeIterJoinedCell<'_, F>, owner, dependent_builder) + } + } - #[covariant] - dependent: BoxedMerge, + fn with_dependent_mut( + &mut self, + func: impl for<'a> FnOnce(&'a IterState, &'a mut BoxedMerge<'a>) -> Output, + ) -> Output { + // SAFETY: `borrow_mut` enforces exclusive access to owner + dependent. + let (owner, dependent) = unsafe { self.unsafe_self_cell.borrow_mut() }; + func(owner, dependent) } -); +} -impl Iterator for TreeIter { +impl Iterator for TreeIter { type Item = crate::Result; fn next(&mut self) -> Option { @@ -89,15 +109,15 @@ impl Iterator for TreeIter { } } -impl DoubleEndedIterator for TreeIter { +impl DoubleEndedIterator for TreeIter { fn next_back(&mut self) -> Option { self.with_dependent_mut(|_, iter| iter.next_back()) } } -impl TreeIter { +impl TreeIter { pub fn create_range, R: RangeBounds>( - guard: IterState, + guard: IterState, range: R, seqno: SeqNo, ) -> Self { @@ -179,7 +199,7 @@ impl TreeIter { } } _ => { - if let Some(reader) = RunReader::new( + if let Some(reader) = RunReader::::new( run.clone(), ( range.start_bound().map(|x| &x.user_key).cloned(), diff --git a/src/run_reader.rs b/src/run_reader.rs index 96384e66b..61506cfbb 100644 --- a/src/run_reader.rs +++ b/src/run_reader.rs @@ -2,25 +2,25 @@ // This source code is licensed under both the Apache 2.0 and MIT License // (found in the LICENSE-* files in the repository) -use crate::{version::Run, BoxedIterator, InternalValue, Table, UserKey}; +use crate::{fs::FileSystem, version::Run, BoxedIterator, InternalValue, Table, UserKey}; use std::{ ops::{Deref, RangeBounds}, sync::Arc, }; /// Reads through a disjoint run -pub struct RunReader { - run: Arc>, +pub struct RunReader { + run: Arc>>, lo: usize, hi: usize, lo_reader: Option>, hi_reader: Option>, } -impl RunReader { +impl RunReader { #[must_use] pub fn new + Clone + Send + 'static>( - run: Arc>, + run: Arc>>, range: R, ) -> Option { assert!(!run.is_empty(), "level reader cannot read empty level"); @@ -32,7 +32,7 @@ impl RunReader { #[must_use] pub fn culled + Clone + Send + 'static>( - run: Arc>, + run: Arc>>, range: R, (lo, hi): (Option, Option), ) -> Self { @@ -69,7 +69,7 @@ impl RunReader { } } -impl Iterator for RunReader { +impl Iterator for RunReader { type Item = crate::Result; fn next(&mut self) -> Option { @@ -104,7 +104,7 @@ impl Iterator for RunReader { } } -impl DoubleEndedIterator for RunReader { +impl DoubleEndedIterator for RunReader { fn next_back(&mut self) -> Option { loop { if let Some(hi_reader) = &mut self.hi_reader { diff --git a/src/run_scanner.rs b/src/run_scanner.rs index 90d2ed702..df9535cc7 100644 --- a/src/run_scanner.rs +++ b/src/run_scanner.rs @@ -2,22 +2,22 @@ // This source code is licensed under both the Apache 2.0 and MIT License // (found in the LICENSE-* files in the repository) -use crate::{table::Scanner, version::Run, InternalValue, Table}; +use crate::{fs::FileSystem, table::Scanner, version::Run, InternalValue, Table}; use std::sync::Arc; /// Scans through a disjoint run /// /// Optimized for compaction, by using a `TableScanner` instead of `TableReader`. -pub struct RunScanner { - tables: Arc>, +pub struct RunScanner { + tables: Arc>>, lo: usize, hi: usize, - lo_reader: Option, + lo_reader: Option>, } -impl RunScanner { +impl RunScanner { pub fn culled( - run: Arc>, + run: Arc>>, (lo, hi): (Option, Option), ) -> crate::Result { let lo = lo.unwrap_or_default(); @@ -40,7 +40,7 @@ impl RunScanner { } } -impl Iterator for RunScanner { +impl Iterator for RunScanner { type Item = crate::Result; fn next(&mut self) -> Option { diff --git a/src/table/block/mod.rs b/src/table/block/mod.rs index c7f8e3c66..21c282f43 100644 --- a/src/table/block/mod.rs +++ b/src/table/block/mod.rs @@ -18,12 +18,12 @@ pub use offset::BlockOffset; pub use r#type::BlockType; pub(crate) use trailer::{Trailer, TRAILER_START_MARKER}; +use crate::fs::FileLike; use crate::{ coding::{Decode, Encode}, table::BlockHandle, Checksum, CompressionType, Slice, }; -use std::fs::File; /// A block on disk /// @@ -129,7 +129,7 @@ impl Block { /// Reads a block from a file. pub fn from_file( - file: &File, + file: &impl FileLike, handle: BlockHandle, compression: CompressionType, ) -> crate::Result { diff --git a/src/table/block_index/mod.rs b/src/table/block_index/mod.rs index 4eb54b417..272d4fb63 100644 --- a/src/table/block_index/mod.rs +++ b/src/table/block_index/mod.rs @@ -12,11 +12,11 @@ pub use two_level::TwoLevelBlockIndex; pub use volatile::VolatileBlockIndex; use super::KeyedBlockHandle; -use crate::SeqNo; +use crate::{fs::FileSystem, SeqNo}; -pub trait BlockIndex { - fn forward_reader(&self, needle: &[u8], seqno: SeqNo) -> Option; - fn iter(&self) -> BlockIndexIterImpl; +pub trait BlockIndex { + fn forward_reader(&self, needle: &[u8], seqno: SeqNo) -> Option>; + fn iter(&self) -> BlockIndexIterImpl; } pub trait BlockIndexIter: DoubleEndedIterator> { @@ -24,13 +24,13 @@ pub trait BlockIndexIter: DoubleEndedIterator bool; } -pub enum BlockIndexIterImpl { +pub enum BlockIndexIterImpl { Full(self::full::Iter), - Volatile(self::volatile::Iter), - TwoLevel(self::two_level::Iter), + Volatile(self::volatile::Iter), + TwoLevel(self::two_level::Iter), } -impl BlockIndexIter for BlockIndexIterImpl { +impl BlockIndexIter for BlockIndexIterImpl { fn seek_lower(&mut self, key: &[u8], seqno: SeqNo) -> bool { match self { Self::Full(i) => i.seek_lower(key, seqno), @@ -48,7 +48,7 @@ impl BlockIndexIter for BlockIndexIterImpl { } } -impl Iterator for BlockIndexIterImpl { +impl Iterator for BlockIndexIterImpl { type Item = crate::Result; fn next(&mut self) -> Option { @@ -60,7 +60,7 @@ impl Iterator for BlockIndexIterImpl { } } -impl DoubleEndedIterator for BlockIndexIterImpl { +impl DoubleEndedIterator for BlockIndexIterImpl { fn next_back(&mut self) -> Option<::Item> { match self { Self::Full(i) => i.next_back(), @@ -88,14 +88,14 @@ impl DoubleEndedIterator for BlockIndexIterImpl { /// found by finding the highest block that has a lower or equal end key than the searched key (by performing in-memory binary search). /// In the diagram above, searching for 'J' yields the block starting with 'G'. /// 'J' must be in that block, because the next block starts with 'M'). -pub enum BlockIndexImpl { +pub enum BlockIndexImpl { Full(FullBlockIndex), - VolatileFull(VolatileBlockIndex), - TwoLevel(TwoLevelBlockIndex), + VolatileFull(VolatileBlockIndex), + TwoLevel(TwoLevelBlockIndex), } -impl BlockIndex for BlockIndexImpl { - fn forward_reader(&self, needle: &[u8], seqno: SeqNo) -> Option { +impl BlockIndex for BlockIndexImpl { + fn forward_reader(&self, needle: &[u8], seqno: SeqNo) -> Option> { match self { Self::Full(index) => index .forward_reader(needle, seqno) @@ -121,7 +121,7 @@ impl BlockIndex for BlockIndexImpl { } } - fn iter(&self) -> BlockIndexIterImpl { + fn iter(&self) -> BlockIndexIterImpl { match self { Self::Full(index) => BlockIndexIterImpl::Full(index.iter()), Self::VolatileFull(index) => BlockIndexIterImpl::Volatile(index.iter()), diff --git a/src/table/block_index/two_level.rs b/src/table/block_index/two_level.rs index ff13ecb4a..4213f3e37 100644 --- a/src/table/block_index/two_level.rs +++ b/src/table/block_index/two_level.rs @@ -4,7 +4,7 @@ use crate::file_accessor::FileAccessor; use crate::table::{IndexBlock, KeyedBlockHandle}; -use crate::SeqNo; +use crate::{fs::FileSystem, SeqNo}; use crate::{ table::{ block::BlockType, @@ -13,7 +13,7 @@ use crate::{ }, Cache, CompressionType, GlobalTableId, UserKey, }; -use std::{path::PathBuf, sync::Arc}; +use std::{marker::PhantomData, path::PathBuf, sync::Arc}; #[cfg(feature = "metrics")] use crate::Metrics; @@ -21,20 +21,21 @@ use crate::Metrics; /// Index that translates item keys to data block handles /// /// Only the top-level index is loaded into memory. -pub struct TwoLevelBlockIndex { +pub struct TwoLevelBlockIndex { pub(crate) top_level_index: IndexBlock, pub(crate) table_id: GlobalTableId, pub(crate) path: Arc, - pub(crate) file_accessor: FileAccessor, + pub(crate) file_accessor: FileAccessor, pub(crate) cache: Arc, pub(crate) compression: CompressionType, + pub(crate) phantom: PhantomData, #[cfg(feature = "metrics")] pub(crate) metrics: Arc, } -impl TwoLevelBlockIndex { - pub fn iter(&self) -> Iter { +impl TwoLevelBlockIndex { + pub fn iter(&self) -> Iter { Iter { tli_block: self.top_level_index.clone(), tli: None, @@ -47,6 +48,7 @@ impl TwoLevelBlockIndex { file_accessor: self.file_accessor.clone(), cache: self.cache.clone(), compression: self.compression, + phantom: PhantomData, #[cfg(feature = "metrics")] metrics: self.metrics.clone(), @@ -54,7 +56,7 @@ impl TwoLevelBlockIndex { } } -pub struct Iter { +pub struct Iter { tli_block: IndexBlock, tli: Option, @@ -66,15 +68,16 @@ pub struct Iter { table_id: GlobalTableId, path: Arc, - file_accessor: FileAccessor, + file_accessor: FileAccessor, cache: Arc, compression: CompressionType, + phantom: PhantomData, #[cfg(feature = "metrics")] metrics: Arc, } -impl Iter { +impl Iter { fn init_tli(&mut self) -> bool { let mut iter = OwnedIndexBlockIter::new(self.tli_block.clone(), IndexBlock::iter); @@ -95,7 +98,7 @@ impl Iter { } } -impl BlockIndexIter for Iter { +impl BlockIndexIter for Iter { fn seek_lower(&mut self, key: &[u8], seqno: SeqNo) -> bool { self.lo = Some((key.into(), seqno)); true @@ -107,7 +110,7 @@ impl BlockIndexIter for Iter { } } -impl Iterator for Iter { +impl Iterator for Iter { type Item = crate::Result; fn next(&mut self) -> Option { @@ -125,7 +128,7 @@ impl Iterator for Iter { let next_lowest_block = tli.next(); if let Some(handle) = next_lowest_block { - let block = fail_iter!(load_block( + let block = fail_iter!(load_block::( self.table_id, &self.path, &self.file_accessor, @@ -172,7 +175,7 @@ impl Iterator for Iter { } } -impl DoubleEndedIterator for Iter { +impl DoubleEndedIterator for Iter { fn next_back(&mut self) -> Option { if let Some(hi_block) = &mut self.hi_consumer { if let Some(item) = hi_block.next_back() { @@ -188,7 +191,7 @@ impl DoubleEndedIterator for Iter { let next_highest_block = tli.next_back(); if let Some(handle) = next_highest_block { - let block = fail_iter!(load_block( + let block = fail_iter!(load_block::( self.table_id, &self.path, &self.file_accessor, diff --git a/src/table/block_index/volatile.rs b/src/table/block_index/volatile.rs index db731c9bd..1036feb8f 100644 --- a/src/table/block_index/volatile.rs +++ b/src/table/block_index/volatile.rs @@ -5,6 +5,7 @@ use super::KeyedBlockHandle; use crate::{ file_accessor::FileAccessor, + fs::FileSystem, table::{ block::BlockType, block_index::{iter::OwnedIndexBlockIter, BlockIndexIter}, @@ -13,7 +14,7 @@ use crate::{ }, Cache, CompressionType, GlobalTableId, SeqNo, UserKey, }; -use std::{path::PathBuf, sync::Arc}; +use std::{marker::PhantomData, path::PathBuf, sync::Arc}; #[cfg(feature = "metrics")] use crate::Metrics; @@ -21,38 +22,40 @@ use crate::Metrics; /// Index that translates item keys to data block handles /// /// The index is loaded on demand. -pub struct VolatileBlockIndex { +pub struct VolatileBlockIndex { pub(crate) table_id: GlobalTableId, pub(crate) path: Arc, - pub(crate) file_accessor: FileAccessor, + pub(crate) file_accessor: FileAccessor, pub(crate) cache: Arc, pub(crate) handle: BlockHandle, pub(crate) compression: CompressionType, + pub(crate) phantom: PhantomData, #[cfg(feature = "metrics")] pub(crate) metrics: Arc, } -impl VolatileBlockIndex { - pub fn forward_reader(&self, needle: &[u8], seqno: SeqNo) -> Iter { - let mut iter = Iter::new(self); +impl VolatileBlockIndex { + pub fn forward_reader(&self, needle: &[u8], seqno: SeqNo) -> Iter { + let mut iter = Iter::::new(self); iter.seek_lower(needle, seqno); iter } - pub fn iter(&self) -> Iter { + pub fn iter(&self) -> Iter { Iter::new(self) } } -pub struct Iter { +pub struct Iter { inner: Option, table_id: GlobalTableId, path: Arc, - file_accessor: FileAccessor, + file_accessor: FileAccessor, cache: Arc, handle: BlockHandle, compression: CompressionType, + phantom: PhantomData, lo: Option<(UserKey, SeqNo)>, hi: Option<(UserKey, SeqNo)>, @@ -61,8 +64,8 @@ pub struct Iter { pub(crate) metrics: Arc, } -impl Iter { - fn new(index: &VolatileBlockIndex) -> Self { +impl Iter { + fn new(index: &VolatileBlockIndex) -> Self { Self { inner: None, table_id: index.table_id, @@ -71,6 +74,7 @@ impl Iter { cache: index.cache.clone(), handle: index.handle, compression: index.compression, + phantom: PhantomData, lo: None, hi: None, @@ -81,7 +85,7 @@ impl Iter { } } -impl BlockIndexIter for Iter { +impl BlockIndexIter for Iter { fn seek_lower(&mut self, key: &[u8], seqno: SeqNo) -> bool { self.lo = Some((key.into(), seqno)); true @@ -93,14 +97,14 @@ impl BlockIndexIter for Iter { } } -impl Iterator for Iter { +impl Iterator for Iter { type Item = crate::Result; fn next(&mut self) -> Option { if let Some(inner) = &mut self.inner { inner.next().map(Ok) } else { - let block = fail_iter!(load_block( + let block = fail_iter!(load_block::( self.table_id, &self.path, &self.file_accessor, @@ -135,12 +139,12 @@ impl Iterator for Iter { } } -impl DoubleEndedIterator for Iter { +impl DoubleEndedIterator for Iter { fn next_back(&mut self) -> Option { if let Some(inner) = &mut self.inner { inner.next_back().map(Ok) } else { - let block = fail_iter!(load_block( + let block = fail_iter!(load_block::( self.table_id, &self.path, &self.file_accessor, diff --git a/src/table/inner.rs b/src/table/inner.rs index 5611ca135..64a395583 100644 --- a/src/table/inner.rs +++ b/src/table/inner.rs @@ -9,6 +9,7 @@ use super::{block_index::BlockIndexImpl, meta::ParsedMeta, regions::ParsedRegion use crate::{ cache::Cache, file_accessor::FileAccessor, + fs::FileSystem, table::{filter::block::FilterBlock, IndexBlock}, tree::inner::TreeId, Checksum, GlobalTableId, SeqNo, @@ -18,13 +19,13 @@ use std::{ sync::{atomic::AtomicBool, Arc, OnceLock}, }; -pub struct Inner { +pub struct Inner { pub path: Arc, pub(crate) tree_id: TreeId, #[doc(hidden)] - pub(crate) file_accessor: FileAccessor, + pub(crate) file_accessor: FileAccessor, /// Parsed metadata #[doc(hidden)] @@ -36,7 +37,7 @@ pub struct Inner { /// Translates key (first item of a block) to block offset (address inside file) and (compressed) size #[doc(hidden)] - pub block_index: Arc, + pub block_index: Arc>, /// Block cache /// @@ -67,7 +68,7 @@ pub struct Inner { pub(crate) cached_blob_bytes: OnceLock, } -impl Inner { +impl Inner { /// Gets the global table ID. #[must_use] pub(super) fn global_id(&self) -> GlobalTableId { @@ -75,14 +76,14 @@ impl Inner { } } -impl Drop for Inner { +impl Drop for Inner { fn drop(&mut self) { let global_id = self.global_id(); if self.is_deleted.load(std::sync::atomic::Ordering::Acquire) { log::trace!("Cleanup deleted table {global_id:?} at {:?}", self.path); - if let Err(e) = std::fs::remove_file(&*self.path) { + if let Err(e) = F::remove_file(&self.path) { log::warn!( "Failed to cleanup deleted table {global_id:?} at {:?}: {e:?}", self.path, diff --git a/src/table/iter.rs b/src/table/iter.rs index b03b69da1..8cb32947b 100644 --- a/src/table/iter.rs +++ b/src/table/iter.rs @@ -5,6 +5,7 @@ use super::{data_block::Iter as DataBlockIter, BlockOffset, DataBlock, GlobalTableId}; use crate::{ file_accessor::FileAccessor, + fs::FileSystem, table::{ block::ParsedItem, block_index::{BlockIndexIter, BlockIndexIterImpl}, @@ -91,16 +92,16 @@ fn create_data_block_reader(block: DataBlock) -> OwnedDataBlockIter { OwnedDataBlockIter::new(block, super::data_block::DataBlock::iter) } -pub struct Iter { +pub struct Iter { table_id: GlobalTableId, path: Arc, global_seqno: SeqNo, #[expect(clippy::struct_field_names)] - index_iter: BlockIndexIterImpl, + index_iter: BlockIndexIterImpl, - file_accessor: FileAccessor, + file_accessor: FileAccessor, cache: Arc, compression: CompressionType, @@ -118,13 +119,13 @@ pub struct Iter { metrics: Arc, } -impl Iter { +impl Iter { pub fn new( table_id: GlobalTableId, global_seqno: SeqNo, path: Arc, - index_iter: BlockIndexIterImpl, - file_accessor: FileAccessor, + index_iter: BlockIndexIterImpl, + file_accessor: FileAccessor, cache: Arc, compression: CompressionType, #[cfg(feature = "metrics")] metrics: Arc, @@ -164,7 +165,7 @@ impl Iter { } } -impl Iterator for Iter { +impl Iterator for Iter { type Item = crate::Result; fn next(&mut self) -> Option { @@ -249,7 +250,7 @@ impl Iterator for Iter { let block = match self.cache.get_block(self.table_id, handle.offset()) { Some(block) => block, None => { - fail_iter!(load_block( + fail_iter!(load_block::( self.table_id, &self.path, &self.file_accessor, @@ -292,7 +293,7 @@ impl Iterator for Iter { } } -impl DoubleEndedIterator for Iter { +impl DoubleEndedIterator for Iter { fn next_back(&mut self) -> Option { // Mirror the forward iterator: prefer consuming buffered items from the high data block to // avoid touching the index once a block has been materialized. @@ -370,7 +371,7 @@ impl DoubleEndedIterator for Iter { let block = match self.cache.get_block(self.table_id, handle.offset()) { Some(block) => block, None => { - fail_iter!(load_block( + fail_iter!(load_block::( self.table_id, &self.path, &self.file_accessor, diff --git a/src/table/meta.rs b/src/table/meta.rs index ff1810f10..b50c18fff 100644 --- a/src/table/meta.rs +++ b/src/table/meta.rs @@ -3,12 +3,13 @@ // (found in the LICENSE-* files in the repository) use super::{Block, BlockHandle, DataBlock}; +use crate::fs::FileLike; use crate::{ checksum::ChecksumType, coding::Decode, table::block::BlockType, CompressionType, KeyRange, SeqNo, TableId, }; use byteorder::{LittleEndian, ReadBytesExt}; -use std::{fs::File, ops::Deref}; +use std::ops::Deref; /// Nanosecond timestamp. #[derive(Copy, Clone, Debug, PartialEq, Eq, Ord, PartialOrd)] @@ -76,7 +77,7 @@ macro_rules! read_u64 { impl ParsedMeta { #[expect(clippy::expect_used, clippy::too_many_lines)] - pub fn load_with_handle(file: &File, handle: &BlockHandle) -> crate::Result { + pub fn load_with_handle(file: &impl FileLike, handle: &BlockHandle) -> crate::Result { let block = Block::from_file(file, *handle, CompressionType::None)?; if block.header.block_type != BlockType::Meta { diff --git a/src/table/mod.rs b/src/table/mod.rs index 07c45d8a8..97122ed1c 100644 --- a/src/table/mod.rs +++ b/src/table/mod.rs @@ -31,6 +31,7 @@ use crate::{ cache::Cache, descriptor_table::DescriptorTable, file_accessor::FileAccessor, + fs::{FileLike, FileSystem}, table::{ block::{BlockType, ParsedItem}, block_index::{BlockIndex, FullBlockIndex, TwoLevelBlockIndex, VolatileBlockIndex}, @@ -45,7 +46,6 @@ use inner::Inner; use iter::Iter; use std::{ borrow::Cow, - fs::File, ops::{Bound, RangeBounds}, path::PathBuf, sync::Arc, @@ -55,7 +55,7 @@ use util::load_block; #[cfg(feature = "metrics")] use crate::metrics::Metrics; -pub type TableInner = Inner; +pub type TableInner = Inner; /// A disk segment (a.k.a. `Table`, `SSTable`, `SST`, `sorted string table`) that is located on disk /// @@ -66,11 +66,16 @@ pub type TableInner = Inner; /// /// Tables can be merged together to improve read performance and free unneeded disk space by removing outdated item versions. #[doc(alias("sstable", "sst", "sorted string table"))] -#[derive(Clone)] -pub struct Table(Arc); +pub struct Table(Arc>); -impl std::ops::Deref for Table { - type Target = Inner; +impl Clone for Table { + fn clone(&self) -> Self { + Self(self.0.clone()) + } +} + +impl std::ops::Deref for Table { + type Target = Inner; fn deref(&self) -> &Self::Target { &self.0 @@ -78,13 +83,13 @@ impl std::ops::Deref for Table { } #[cfg_attr(coverage_nightly, coverage(off))] -impl std::fmt::Debug for Table { +impl std::fmt::Debug for Table { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "Table:{}({:?})", self.id(), self.metadata.key_range) } } -impl Table { +impl Table { #[must_use] pub fn global_seqno(&self) -> SeqNo { self.0.global_seqno @@ -114,11 +119,12 @@ impl Table { if let Some(fd) = self.file_accessor.access_for_table(&table_id) { (fd, false) } else { - (Arc::new(File::open(&*self.path)?), true) + (Arc::new(F::open(&self.path)?), true) }; // Read the exact region using pread-style helper - let buf = crate::file::read_exact(&fd, *handle.offset(), handle.size() as usize)?; + let buf = + crate::file::read_exact(fd.as_ref(), *handle.offset(), handle.size() as usize)?; // If we opened the file here, cache the FD for future accesses if fd_cache_miss { @@ -199,7 +205,7 @@ impl Table { block_type: BlockType, compression: CompressionType, ) -> crate::Result { - load_block( + load_block::( self.global_id(), &self.path, &self.file_accessor, @@ -332,7 +338,7 @@ impl Table { /// /// Will return `Err` if an IO error occurs. #[doc(hidden)] - pub fn scan(&self) -> crate::Result { + pub fn scan(&self) -> crate::Result> { #[expect( clippy::expect_used, reason = "there shouldn't be 4 billion data blocks in a single table" @@ -343,7 +349,7 @@ impl Table { .try_into() .expect("data block count should fit"); - Scanner::new( + Scanner::::new_with_fs( &self.path, block_count, self.metadata.data_block_compression, @@ -404,7 +410,7 @@ impl Table { fn read_tli( regions: &ParsedRegions, - file: &File, + file: &impl FileLike, compression: CompressionType, ) -> crate::Result { log::trace!("Reading TLI block, with tli_ptr={:?}", regions.tli); @@ -433,7 +439,7 @@ impl Table { global_seqno: SeqNo, tree_id: TreeId, cache: Arc, - descriptor_table: Option>, + descriptor_table: Option>>, pin_filter: bool, pin_index: bool, #[cfg(feature = "metrics")] metrics: Arc, @@ -443,7 +449,7 @@ impl Table { use std::sync::atomic::AtomicBool; log::debug!("Recovering table from file {}", file_path.display()); - let mut file = std::fs::File::open(&file_path)?; + let mut file = F::open(&file_path)?; let file_path = Arc::new(file_path); #[cfg(feature = "metrics")] @@ -457,12 +463,10 @@ impl Table { log::trace!("Reading meta block, with meta_ptr={:?}", regions.metadata); let metadata = ParsedMeta::load_with_handle(&file, ®ions.metadata)?; - let file = Arc::new(file); - let file_accessor = if let Some(dt) = descriptor_table { FileAccessor::DescriptorTable(dt) } else { - FileAccessor::File(file.clone()) + FileAccessor::File(Arc::new(F::open(&file_path)?)) }; let block_index = if regions.index.is_some() { @@ -480,6 +484,7 @@ impl Table { path: Arc::clone(&file_path), file_accessor: file_accessor.clone(), table_id: (tree_id, metadata.id).into(), + phantom: std::marker::PhantomData, #[cfg(feature = "metrics")] metrics: metrics.clone(), @@ -502,6 +507,7 @@ impl Table { handle: regions.tli, path: Arc::clone(&file_path), table_id: (tree_id, metadata.id).into(), + phantom: std::marker::PhantomData, #[cfg(feature = "metrics")] metrics: metrics.clone(), diff --git a/src/table/multi_writer.rs b/src/table/multi_writer.rs index fefed7ddf..691cb4804 100644 --- a/src/table/multi_writer.rs +++ b/src/table/multi_writer.rs @@ -4,15 +4,16 @@ use super::{filter::BloomConstructionPolicy, writer::Writer}; use crate::{ - blob_tree::handle::BlobIndirection, table::writer::LinkedFile, value::InternalValue, - vlog::BlobFileId, Checksum, CompressionType, HashMap, SequenceNumberCounter, TableId, UserKey, + blob_tree::handle::BlobIndirection, fs::FileSystem, table::writer::LinkedFile, + value::InternalValue, vlog::BlobFileId, Checksum, CompressionType, HashMap, + SequenceNumberCounter, TableId, UserKey, }; use std::path::PathBuf; /// Like `Writer` but will rotate to a new table, once a table grows larger than `target_size` /// /// This results in a sorted "run" of tables -pub struct MultiWriter { +pub struct MultiWriter { pub(crate) base_path: PathBuf, data_block_hash_ratio: f32, @@ -35,7 +36,7 @@ pub struct MultiWriter { table_id_generator: SequenceNumberCounter, - pub writer: Writer, + pub writer: Writer, pub data_block_compression: CompressionType, pub index_block_compression: CompressionType, @@ -50,7 +51,7 @@ pub struct MultiWriter { initial_level: u8, } -impl MultiWriter { +impl MultiWriter { /// Sets up a new `MultiWriter` at the given tables folder pub fn new( base_path: PathBuf, @@ -61,7 +62,7 @@ impl MultiWriter { let current_table_id = table_id_generator.next(); let path = base_path.join(current_table_id.to_string()); - let writer = Writer::new(path, current_table_id, initial_level)?; + let writer = Writer::::new(path, current_table_id, initial_level)?; Ok(Self { initial_level, @@ -184,7 +185,7 @@ impl MultiWriter { let new_table_id = self.table_id_generator.next(); let path = self.base_path.join(new_table_id.to_string()); - let mut new_writer = Writer::new(path, new_table_id, self.initial_level)? + let mut new_writer = Writer::::new(path, new_table_id, self.initial_level)? .use_data_block_compression(self.data_block_compression) .use_index_block_compression(self.index_block_compression) .use_data_block_size(self.data_block_size) diff --git a/src/table/scanner.rs b/src/table/scanner.rs index dc46858b4..abbcec697 100644 --- a/src/table/scanner.rs +++ b/src/table/scanner.rs @@ -4,14 +4,15 @@ use super::{Block, DataBlock}; use crate::{ + fs::FileSystem, table::{block::BlockType, iter::OwnedDataBlockIter}, CompressionType, InternalValue, SeqNo, }; -use std::{fs::File, io::BufReader, path::Path}; +use std::{io::BufReader, path::Path}; /// Table reader that is optimized for consuming an entire table -pub struct Scanner { - reader: BufReader, +pub struct Scanner { + reader: BufReader, iter: OwnedDataBlockIter, compression: CompressionType, @@ -21,15 +22,26 @@ pub struct Scanner { global_seqno: SeqNo, } -impl Scanner { +impl Scanner { pub fn new( path: &Path, block_count: usize, compression: CompressionType, global_seqno: SeqNo, + ) -> crate::Result { + Self::new_with_fs(path, block_count, compression, global_seqno) + } +} + +impl Scanner { + pub fn new_with_fs( + path: &Path, + block_count: usize, + compression: CompressionType, + global_seqno: SeqNo, ) -> crate::Result { // TODO: a larger buffer size may be better for HDD, maybe make this configurable - let mut reader = BufReader::with_capacity(8 * 4_096, File::open(path)?); + let mut reader = BufReader::with_capacity(8 * 4_096, F::open(path)?); let block = Self::fetch_next_block(&mut reader, compression)?; let iter = OwnedDataBlockIter::new(block, DataBlock::iter); @@ -47,7 +59,7 @@ impl Scanner { } fn fetch_next_block( - reader: &mut BufReader, + reader: &mut BufReader, compression: CompressionType, ) -> crate::Result { let block = Block::from_reader(reader, compression); @@ -68,7 +80,7 @@ impl Scanner { } } -impl Iterator for Scanner { +impl Iterator for Scanner { type Item = crate::Result; fn next(&mut self) -> Option { diff --git a/src/table/tests.rs b/src/table/tests.rs index 6df174d0f..cb380b86e 100644 --- a/src/table/tests.rs +++ b/src/table/tests.rs @@ -6,6 +6,7 @@ use super::*; use crate::{ config::BloomConstructionPolicy, table::filter::standard_bloom::Builder as BloomBuilder, }; +use std::sync::Arc; use tempfile::tempdir; use test_log::test; @@ -17,15 +18,16 @@ use test_log::test; )] fn test_with_table( items: &[InternalValue], - f: impl Fn(Table) -> crate::Result<()>, + f: impl Fn(Table) -> crate::Result<()>, rotate_every: Option, - config_writer: Option Writer>, + config_writer: Option< + impl Fn(Writer) -> Writer, + >, ) -> crate::Result<()> { let dir = tempdir()?; let file = dir.path().join("table"); - { - let mut writer = Writer::new(file.clone(), 0, 0)?; + let mut writer = Writer::::new(file.clone(), 0, 0)?; if let Some(f) = &config_writer { writer = f(writer); @@ -45,7 +47,7 @@ fn test_with_table( #[cfg(feature = "metrics")] let metrics = Arc::new(Metrics::default()); - let table = Table::recover( + let table = Table::::recover( file.clone(), checksum, 0, @@ -75,7 +77,7 @@ fn test_with_table( #[cfg(feature = "metrics")] let metrics = Arc::new(Metrics::default()); - let table = Table::recover( + let table = Table::::recover( file.clone(), checksum, 0, @@ -105,7 +107,7 @@ fn test_with_table( #[cfg(feature = "metrics")] let metrics = Arc::new(Metrics::default()); - let table = Table::recover( + let table = Table::::recover( file.clone(), checksum, 0, @@ -135,7 +137,7 @@ fn test_with_table( #[cfg(feature = "metrics")] let metrics = Arc::new(Metrics::default()); - let table = Table::recover( + let table = Table::::recover( file.clone(), checksum, 0, @@ -165,7 +167,7 @@ fn test_with_table( #[cfg(feature = "metrics")] let metrics = Arc::new(Metrics::default()); - let table = Table::recover( + let table = Table::::recover( file.clone(), checksum, 0, @@ -189,11 +191,12 @@ fn test_with_table( } } - std::fs::remove_file(&file)?; + crate::fs::StdFileSystem::remove_file(&file)?; // Test with partitioned indexes { - let mut writer = Writer::new(file.clone(), 0, 0)?.use_partitioned_index(); + let mut writer = + Writer::::new(file.clone(), 0, 0)?.use_partitioned_index(); if let Some(f) = config_writer { writer = f(writer); @@ -213,7 +216,7 @@ fn test_with_table( #[cfg(feature = "metrics")] let metrics = Arc::new(Metrics::default()); - let table = Table::recover( + let table = Table::::recover( file.clone(), checksum, 0, @@ -242,7 +245,7 @@ fn test_with_table( #[cfg(feature = "metrics")] let metrics = Arc::new(Metrics::default()); - let table = Table::recover( + let table = Table::::recover( file.clone(), checksum, 0, @@ -271,7 +274,7 @@ fn test_with_table( #[cfg(feature = "metrics")] let metrics = Arc::new(Metrics::default()); - let table = Table::recover( + let table = Table::::recover( file.clone(), checksum, 0, @@ -301,7 +304,7 @@ fn test_with_table( #[cfg(feature = "metrics")] let metrics = Arc::new(Metrics::default()); - let table = Table::recover( + let table = Table::::recover( file.clone(), checksum, 0, @@ -331,7 +334,7 @@ fn test_with_table( #[cfg(feature = "metrics")] let metrics = Arc::new(Metrics::default()); - let table = Table::recover( + let table = Table::::recover( file, checksum, 0, @@ -468,7 +471,7 @@ fn table_range_exclusive_bounds() -> crate::Result<()> { Ok(()) }, None, - Some(|x: Writer| x.use_data_block_size(1)), + Some(|x: Writer| x.use_data_block_size(1)), ) } @@ -670,7 +673,7 @@ fn table_range_multiple_data_blocks() -> crate::Result<()> { Ok(()) }, None, - Some(|x: Writer| x.use_data_block_size(1)), + Some(|x: Writer| x.use_data_block_size(1)), ) } @@ -706,7 +709,7 @@ fn table_point_read_partitioned_filter_smoke_test() -> crate::Result<()> { Ok(()) }, None, - Some(|x: Writer| x.use_partitioned_filter()), + Some(|x: Writer| x.use_partitioned_filter()), ) } @@ -794,7 +797,9 @@ fn table_partitioned_filter() -> crate::Result<()> { Ok(()) }, None, - Some(|x: Writer| x.use_partitioned_filter().use_meta_partition_size(3)), + Some(|x: Writer| { + x.use_partitioned_filter().use_meta_partition_size(3) + }), ) } @@ -839,7 +844,9 @@ fn table_zero_bpk() -> crate::Result<()> { Ok(()) }, None, - Some(|x: Writer| x.use_bloom_policy(BloomConstructionPolicy::BitsPerKey(0.0))), + Some(|x: Writer| { + x.use_bloom_policy(BloomConstructionPolicy::BitsPerKey(0.0)) + }), ) } @@ -1206,7 +1213,7 @@ fn table_read_fuzz_1() -> crate::Result<()> { let data_block_size = 97; - let mut writer = crate::table::Writer::new(file.clone(), 0, 0) + let mut writer = crate::table::Writer::::new(file.clone(), 0, 0) .unwrap() .use_data_block_size(data_block_size); @@ -1216,7 +1223,7 @@ fn table_read_fuzz_1() -> crate::Result<()> { let _trailer = writer.finish().unwrap(); - let table = crate::Table::recover( + let table = crate::Table::::recover( file, crate::Checksum::from_raw(0), 0, @@ -1278,7 +1285,7 @@ fn table_partitioned_index() -> crate::Result<()> { let dir = tempfile::tempdir()?; let file = dir.path().join("table_fuzz"); - let mut writer = crate::table::Writer::new(file.clone(), 0, 0) + let mut writer = crate::table::Writer::::new(file.clone(), 0, 0) .unwrap() .use_partitioned_index() .use_data_block_size(5) @@ -1290,7 +1297,7 @@ fn table_partitioned_index() -> crate::Result<()> { let _trailer = writer.finish().unwrap(); - let table = crate::Table::recover( + let table = crate::Table::::recover( file, crate::Checksum::from_raw(0), 0, @@ -1388,7 +1395,7 @@ fn table_global_seqno() -> crate::Result<()> { let dir = tempfile::tempdir()?; let file = dir.path().join("table_fuzz"); - let mut writer = crate::table::Writer::new(file.clone(), 0, 0) + let mut writer = crate::table::Writer::::new(file.clone(), 0, 0) .unwrap() .use_partitioned_filter() .use_data_block_size(1) @@ -1400,7 +1407,7 @@ fn table_global_seqno() -> crate::Result<()> { let _trailer = writer.finish().unwrap(); - let table = crate::Table::recover( + let table = crate::Table::::recover( file, crate::Checksum::from_raw(0), 7, diff --git a/src/table/util.rs b/src/table/util.rs index 45862d152..7012aa256 100644 --- a/src/table/util.rs +++ b/src/table/util.rs @@ -4,8 +4,8 @@ use super::{Block, BlockHandle, GlobalTableId}; use crate::{ - file_accessor::FileAccessor, table::block::BlockType, version::run::Ranged, Cache, - CompressionType, KeyRange, Table, + file_accessor::FileAccessor, fs::FileSystem, table::block::BlockType, version::run::Ranged, + Cache, CompressionType, KeyRange, Table, }; use std::{path::Path, sync::Arc}; @@ -13,7 +13,7 @@ use std::{path::Path, sync::Arc}; use crate::metrics::Metrics; #[must_use] -pub fn aggregate_run_key_range(tables: &[Table]) -> KeyRange { +pub fn aggregate_run_key_range(tables: &[Table]) -> KeyRange { #[expect(clippy::expect_used, reason = "runs are never empty by definition")] let lo = tables.first().expect("run should never be empty"); #[expect(clippy::expect_used, reason = "runs are never empty by definition")] @@ -29,10 +29,10 @@ pub struct SliceIndexes(pub usize, pub usize); /// /// Also handles file descriptor opening and caching. #[warn(clippy::too_many_arguments)] -pub fn load_block( +pub fn load_block( table_id: GlobalTableId, path: &Path, - file_accessor: &FileAccessor, + file_accessor: &FileAccessor, cache: &Cache, handle: &BlockHandle, block_type: BlockType, @@ -68,7 +68,7 @@ pub fn load_block( (cached_fd, false) } else { - let fd = std::fs::File::open(path)?; + let fd = F::open(path)?; #[cfg(feature = "metrics")] metrics.table_file_opened_uncached.fetch_add(1, Relaxed); @@ -76,7 +76,7 @@ pub fn load_block( (Arc::new(fd), true) }; - let block = Block::from_file(&fd, *handle, compression)?; + let block = Block::from_file(fd.as_ref(), *handle, compression)?; if block.header.block_type != block_type { return Err(crate::Error::InvalidTag(( diff --git a/src/table/writer/filter/full.rs b/src/table/writer/filter/full.rs index 66e7c2aa9..338dabd4f 100644 --- a/src/table/writer/filter/full.rs +++ b/src/table/writer/filter/full.rs @@ -9,7 +9,7 @@ use crate::{ table::{filter::standard_bloom::Builder, Block}, CompressionType, UserKey, }; -use std::{fs::File, io::BufWriter}; +use std::io::BufWriter; pub struct FullFilterWriter { /// Key hashes for AMQ filter @@ -51,7 +51,7 @@ impl FilterWriter for FullFilterWriter { fn finish( self: Box, - file_writer: &mut sfa::Writer>>, + file_writer: &mut sfa::Writer>>, ) -> crate::Result { if self.bloom_hash_buffer.is_empty() { log::trace!("Filter writer has no buffered hashes - not building filter"); diff --git a/src/table/writer/filter/mod.rs b/src/table/writer/filter/mod.rs index 892027542..a7ccb742d 100644 --- a/src/table/writer/filter/mod.rs +++ b/src/table/writer/filter/mod.rs @@ -11,9 +11,9 @@ pub use partitioned::PartitionedFilterWriter; use crate::{ checksum::ChecksummedWriter, config::BloomConstructionPolicy, CompressionType, UserKey, }; -use std::{fs::File, io::BufWriter}; +use std::io::BufWriter; -pub trait FilterWriter { +pub trait FilterWriter { // NOTE: We purposefully use a UserKey instead of &[u8] // so we can clone it without heap allocation, if needed /// Registers a key in the block index. @@ -24,7 +24,7 @@ pub trait FilterWriter { /// Returns the number of filter blocks written (always 1 in case of full filter block). fn finish( self: Box, - file_writer: &mut sfa::Writer>>, + file_writer: &mut sfa::Writer>>, ) -> crate::Result; fn set_filter_policy( diff --git a/src/table/writer/filter/partitioned.rs b/src/table/writer/filter/partitioned.rs index 1cd8068ff..b1154d78f 100644 --- a/src/table/writer/filter/partitioned.rs +++ b/src/table/writer/filter/partitioned.rs @@ -12,10 +12,7 @@ use crate::{ }, CompressionType, UserKey, }; -use std::{ - fs::File, - io::{BufWriter, Seek, Write}, -}; +use std::io::{BufWriter, Seek, Write}; pub struct PartitionedFilterWriter { final_filter_buffer: Vec, @@ -100,9 +97,9 @@ impl PartitionedFilterWriter { Ok(()) } - fn write_top_level_index( + fn write_top_level_index( &mut self, - file_writer: &mut sfa::Writer>>, + file_writer: &mut sfa::Writer>>, index_base_offset: BlockOffset, ) -> crate::Result<()> { file_writer.start("filter_tli")?; @@ -178,7 +175,7 @@ impl FilterWriter for PartitionedFilterWri fn finish( mut self: Box, - file_writer: &mut sfa::Writer>>, + file_writer: &mut sfa::Writer>>, ) -> crate::Result { if self.last_key.is_none() { log::trace!("Filter writer has not seen any writes - not building filter"); diff --git a/src/table/writer/index/full.rs b/src/table/writer/index/full.rs index 53baab272..21a010ed0 100644 --- a/src/table/writer/index/full.rs +++ b/src/table/writer/index/full.rs @@ -10,7 +10,7 @@ use crate::{ }, CompressionType, }; -use std::{fs::File, io::BufWriter}; +use std::io::BufWriter; pub struct FullIndexWriter { compression: CompressionType, @@ -54,7 +54,7 @@ impl BlockIndexWriter for FullIndexWriter fn finish( self: Box, - file_writer: &mut sfa::Writer>>, + file_writer: &mut sfa::Writer>>, ) -> crate::Result { file_writer.start("tli")?; diff --git a/src/table/writer/index/mod.rs b/src/table/writer/index/mod.rs index 0da86b0fe..1c51eb424 100644 --- a/src/table/writer/index/mod.rs +++ b/src/table/writer/index/mod.rs @@ -9,9 +9,9 @@ pub use full::FullIndexWriter; pub use partitioned::PartitionedIndexWriter; use crate::{checksum::ChecksummedWriter, table::index_block::KeyedBlockHandle, CompressionType}; -use std::{fs::File, io::BufWriter}; +use std::io::BufWriter; -pub trait BlockIndexWriter { +pub trait BlockIndexWriter { /// Registers a data block in the block index. fn register_data_block(&mut self, block_handle: KeyedBlockHandle) -> crate::Result<()>; @@ -20,7 +20,7 @@ pub trait BlockIndexWriter { /// Returns the number of index blocks written. fn finish( self: Box, - file_writer: &mut sfa::Writer>>, + file_writer: &mut sfa::Writer>>, ) -> crate::Result; fn use_compression( diff --git a/src/table/writer/index/partitioned.rs b/src/table/writer/index/partitioned.rs index ccb95521c..e80cab9b6 100644 --- a/src/table/writer/index/partitioned.rs +++ b/src/table/writer/index/partitioned.rs @@ -10,10 +10,7 @@ use crate::{ }, CompressionType, }; -use std::{ - fs::File, - io::{BufWriter, Seek, Write}, -}; +use std::io::{BufWriter, Seek, Write}; pub struct PartitionedIndexWriter { relative_file_pos: u64, @@ -103,9 +100,9 @@ impl PartitionedIndexWriter { Ok(()) } - fn write_top_level_index( + fn write_top_level_index( &mut self, - file_writer: &mut sfa::Writer>>, + file_writer: &mut sfa::Writer>>, index_base_offset: BlockOffset, ) -> crate::Result<()> { file_writer.start("tli")?; @@ -183,7 +180,7 @@ impl BlockIndexWriter for PartitionedIndex fn finish( mut self: Box, - file_writer: &mut sfa::Writer>>, + file_writer: &mut sfa::Writer>>, ) -> crate::Result { if self.buffer_size > 0 { self.cut_index_block()?; diff --git a/src/table/writer/mod.rs b/src/table/writer/mod.rs index f85d8c845..eb32108e6 100644 --- a/src/table/writer/mod.rs +++ b/src/table/writer/mod.rs @@ -14,6 +14,7 @@ use crate::{ checksum::{ChecksumType, ChecksummedWriter}, coding::Encode, file::fsync_directory, + fs::{FileLike, FileSystem}, table::{ writer::{ filter::{FilterWriter, FullFilterWriter}, @@ -26,7 +27,7 @@ use crate::{ Checksum, CompressionType, InternalValue, TableId, UserKey, ValueType, }; use index::BlockIndexWriter; -use std::{fs::File, io::BufWriter, path::PathBuf}; +use std::{io::BufWriter, marker::PhantomData, path::PathBuf}; #[derive(Copy, Clone, PartialEq, Eq, Debug, std::hash::Hash)] pub struct LinkedFile { @@ -37,11 +38,12 @@ pub struct LinkedFile { } /// Serializes and compresses values into blocks and writes them to disk as a table -pub struct Writer { +pub struct Writer { /// Table file path pub(crate) path: PathBuf, table_id: TableId, + phantom: PhantomData, data_block_restart_interval: u8, index_block_restart_interval: u8, @@ -63,15 +65,15 @@ pub struct Writer { /// File writer #[expect(clippy::struct_field_names)] - file_writer: sfa::Writer>>, + file_writer: sfa::Writer>>, /// Writer of index blocks #[expect(clippy::struct_field_names)] - index_writer: Box>>, + index_writer: Box>, /// Writer of filter #[expect(clippy::struct_field_names)] - filter_writer: Box>>, + filter_writer: Box>, /// Buffer of KVs chunk: Vec, @@ -94,9 +96,9 @@ pub struct Writer { initial_level: u8, } -impl Writer { +impl Writer { pub fn new(path: PathBuf, table_id: TableId, initial_level: u8) -> crate::Result { - let writer = BufWriter::with_capacity(u16::MAX.into(), File::create_new(&path)?); + let writer = BufWriter::with_capacity(u16::MAX.into(), F::create_new(&path)?); let writer = ChecksummedWriter::new(writer); let mut writer = sfa::Writer::from_writer(writer); writer.start("data")?; @@ -107,6 +109,7 @@ impl Writer { meta: meta::Metadata::default(), table_id, + phantom: PhantomData, data_block_restart_interval: 16, index_block_restart_interval: 1, @@ -375,7 +378,7 @@ impl Writer { // No items written! Just delete table file and return nothing if self.meta.item_count == 0 { - std::fs::remove_file(&self.path)?; + F::remove_file(&self.path)?; return Ok(None); } @@ -525,7 +528,7 @@ impl Writer { clippy::expect_used, reason = "if there's no parent folder, something has gone horribly wrong" )] - fsync_directory(self.path.parent().expect("should have folder"))?; + fsync_directory::(self.path.parent().expect("should have folder"))?; log::debug!( "Written {} items in {} blocks into new table file #{}, written {} MiB", @@ -548,7 +551,7 @@ mod tests { fn table_writer_count() -> crate::Result<()> { let dir = tempfile::tempdir()?; let path = dir.path().join("1"); - let mut writer = Writer::new(path, 1, 0)?; + let mut writer = Writer::::new(path, 1, 0)?; assert_eq!(0, writer.meta.key_count); assert_eq!(0, writer.chunk_size); diff --git a/src/tree/ingest.rs b/src/tree/ingest.rs index 0a588ab6d..79c01ad83 100644 --- a/src/tree/ingest.rs +++ b/src/tree/ingest.rs @@ -4,8 +4,8 @@ use super::Tree; use crate::{ - config::FilterPolicyEntry, table::multi_writer::MultiWriter, BlobIndirection, SeqNo, UserKey, - UserValue, + config::FilterPolicyEntry, fs::FileSystem, table::multi_writer::MultiWriter, BlobIndirection, + SeqNo, UserKey, UserValue, }; use std::path::PathBuf; @@ -17,21 +17,21 @@ pub const INITIAL_CANONICAL_LEVEL: usize = 1; /// /// Ingested data bypasses memtables and is written directly into new tables, /// using the same table writer configuration that is used for flush and compaction. -pub struct Ingestion<'a> { +pub struct Ingestion<'a, F: FileSystem> { folder: PathBuf, - tree: &'a Tree, - pub(crate) writer: MultiWriter, + tree: &'a Tree, + pub(crate) writer: MultiWriter, seqno: SeqNo, last_key: Option, } -impl<'a> Ingestion<'a> { +impl<'a, F: FileSystem + 'static> Ingestion<'a, F> { /// Creates a new ingestion. /// /// # Errors /// /// Will return `Err` if an IO error occurs. - pub fn new(tree: &'a Tree) -> crate::Result { + pub fn new(tree: &'a Tree) -> crate::Result { let folder = tree.config.path.join(crate::file::TABLES_FOLDER); log::debug!("Ingesting into tables in {}", folder.display()); @@ -46,7 +46,7 @@ impl<'a> Ingestion<'a> { .get(INITIAL_CANONICAL_LEVEL); // TODO: maybe create a PrepareMultiWriter that can be used by flush, ingest and compaction worker - let mut writer = MultiWriter::new( + let mut writer = MultiWriter::::new( folder.clone(), tree.table_id_counter.clone(), 64 * 1_024 * 1_024, @@ -290,8 +290,8 @@ impl<'a> Ingestion<'a> { // pressure unnecessarily. let created_tables = results .into_iter() - .map(|(table_id, checksum)| -> crate::Result
{ - Table::recover( + .map(|(table_id, checksum)| -> crate::Result> { + Table::::recover( self.folder.join(table_id.to_string()), checksum, global_seqno, @@ -326,7 +326,7 @@ impl<'a> Ingestion<'a> { // Perform maintenance on the version history (e.g., clean up old versions). // We use gc_watermark=0 since ingestion doesn't affect sealed memtables. - if let Err(e) = version_lock.maintenance(&self.tree.config.path, 0) { + if let Err(e) = version_lock.maintenance::(&self.tree.config.path, 0) { log::warn!("Version GC failed: {e:?}"); } diff --git a/src/tree/inner.rs b/src/tree/inner.rs index b186dd0fb..c146af113 100644 --- a/src/tree/inner.rs +++ b/src/tree/inner.rs @@ -5,6 +5,7 @@ use crate::{ compaction::state::CompactionState, config::Config, + fs::FileSystem, stop_signal::StopSignal, version::{persist_version, SuperVersions, Version}, SequenceNumberCounter, TableId, @@ -30,7 +31,7 @@ pub fn get_next_tree_id() -> TreeId { TREE_ID_COUNTER.fetch_add(1, std::sync::atomic::Ordering::Relaxed) } -pub struct TreeInner { +pub struct TreeInner { /// Unique tree ID pub id: TreeId, @@ -46,12 +47,12 @@ pub struct TreeInner { /// Hands out a unique (monotonically increasing) blob file ID pub(crate) blob_file_id_counter: SequenceNumberCounter, - pub(crate) version_history: Arc>, + pub(crate) version_history: Arc>>, pub(crate) compaction_state: Arc>, /// Tree configuration - pub config: Arc, + pub config: Arc>, /// Compaction may take a while; setting the signal to `true` /// will interrupt the compaction and kill the worker. @@ -70,9 +71,9 @@ pub struct TreeInner { pub metrics: Arc, } -impl TreeInner { - pub(crate) fn create_new(config: Config) -> crate::Result { - let version = Version::new( +impl TreeInner { + pub(crate) fn create_new(config: Config) -> crate::Result { + let version = Version::::new( 0, if config.kv_separation_opts.is_some() { crate::TreeType::Blob @@ -80,7 +81,7 @@ impl TreeInner { crate::TreeType::Standard }, ); - persist_version(&config.path, &version)?; + persist_version::(&config.path, &version)?; Ok(Self { id: get_next_tree_id(), @@ -104,7 +105,7 @@ impl TreeInner { } } -impl Drop for TreeInner { +impl Drop for TreeInner { fn drop(&mut self) { log::debug!("Dropping TreeInner"); diff --git a/src/tree/mod.rs b/src/tree/mod.rs index 57070f2b3..ea48977cf 100644 --- a/src/tree/mod.rs +++ b/src/tree/mod.rs @@ -11,6 +11,7 @@ use crate::{ config::Config, file::CURRENT_VERSION_FILE, format_version::FormatVersion, + fs::FileSystem, iter_guard::{IterGuard, IterGuardImpl}, key::InternalKey, manifest::Manifest, @@ -73,18 +74,23 @@ fn ignore_tombstone_value(item: InternalValue) -> Option { } /// A log-structured merge tree (LSM-tree/LSMT) -#[derive(Clone)] -pub struct Tree(#[doc(hidden)] pub Arc); +pub struct Tree(#[doc(hidden)] pub Arc>); -impl std::ops::Deref for Tree { - type Target = TreeInner; +impl Clone for Tree { + fn clone(&self) -> Self { + Self(self.0.clone()) + } +} + +impl std::ops::Deref for Tree { + type Target = TreeInner; fn deref(&self) -> &Self::Target { &self.0 } } -impl AbstractTree for Tree { +impl AbstractTree for Tree { fn table_file_cache_size(&self) -> usize { self.config .descriptor_table @@ -94,7 +100,7 @@ impl AbstractTree for Tree { fn get_version_history_lock( &self, - ) -> std::sync::RwLockWriteGuard<'_, crate::version::SuperVersions> { + ) -> std::sync::RwLockWriteGuard<'_, crate::version::SuperVersions> { #[expect(clippy::expect_used, reason = "lock is expected to not be poisoned")] self.version_history.write().expect("lock is poisoned") } @@ -165,7 +171,7 @@ impl AbstractTree for Tree { Self::get_internal_entry_from_version(&super_version, key, seqno) } - fn current_version(&self) -> Version { + fn current_version(&self) -> Version { #[expect(clippy::expect_used, reason = "lock is expected to not be poisoned")] self.version_history .read() @@ -197,10 +203,10 @@ impl AbstractTree for Tree { prefix: K, seqno: SeqNo, index: Option<(Arc, SeqNo)>, - ) -> Box + Send + 'static> { + ) -> Box> + Send + 'static> { Box::new( self.create_prefix(&prefix, seqno, index) - .map(|kv| IterGuardImpl::Standard(Guard(kv))), + .map(|kv| IterGuardImpl::::Standard(Guard(kv))), ) } @@ -209,10 +215,10 @@ impl AbstractTree for Tree { range: R, seqno: SeqNo, index: Option<(Arc, SeqNo)>, - ) -> Box + Send + 'static> { + ) -> Box> + Send + 'static> { Box::new( self.create_range(&range, seqno, index) - .map(|kv| IterGuardImpl::Standard(Guard(kv))), + .map(|kv| IterGuardImpl::::Standard(Guard(kv))), ) } @@ -341,7 +347,7 @@ impl AbstractTree for Tree { fn flush_to_tables( &self, stream: impl Iterator>, - ) -> crate::Result, Option>)>> { + ) -> crate::Result>, Option>>)>> { use crate::{file::TABLES_FOLDER, table::multi_writer::MultiWriter}; use std::time::Instant; @@ -367,7 +373,7 @@ impl AbstractTree for Tree { folder.display(), ); - let mut table_writer = MultiWriter::new( + let mut table_writer = MultiWriter::::new( folder.clone(), self.table_id_counter.clone(), 64 * 1_024 * 1_024, @@ -410,8 +416,8 @@ impl AbstractTree for Tree { // Load tables let tables = result .into_iter() - .map(|(table_id, checksum)| -> crate::Result
{ - Table::recover( + .map(|(table_id, checksum)| -> crate::Result> { + Table::::recover( folder.join(table_id.to_string()), checksum, 0, @@ -432,8 +438,8 @@ impl AbstractTree for Tree { #[expect(clippy::significant_drop_tightening)] fn register_tables( &self, - tables: &[Table], - blob_files: Option<&[BlobFile]>, + tables: &[Table], + blob_files: Option<&[BlobFile]>, frag_map: Option, sealed_memtables_to_delete: &[crate::tree::inner::MemtableId], gc_watermark: SeqNo, @@ -441,7 +447,7 @@ impl AbstractTree for Tree { log::trace!( "Registering {} tables, {} blob files", tables.len(), - blob_files.map(<[BlobFile]>::len).unwrap_or_default(), + blob_files.map(<[BlobFile]>::len).unwrap_or_default(), ); #[expect(clippy::expect_used, reason = "lock is expected to not be poisoned")] @@ -471,7 +477,7 @@ impl AbstractTree for Tree { &self.config.visible_seqno, )?; - if let Err(e) = version_lock.maintenance(&self.config.path, gc_watermark) { + if let Err(e) = version_lock.maintenance::(&self.config.path, gc_watermark) { log::warn!("Version GC failed: {e:?}"); } @@ -503,7 +509,7 @@ impl AbstractTree for Tree { fn compact( &self, - strategy: Arc, + strategy: Arc>, seqno_threshold: SeqNo, ) -> crate::Result<()> { // NOTE: Read lock major compaction lock @@ -523,7 +529,7 @@ impl AbstractTree for Tree { self.0.get_next_table_id() } - fn tree_config(&self) -> &Config { + fn tree_config(&self) -> &Config { &self.config } @@ -666,14 +672,17 @@ impl AbstractTree for Tree { } } -impl Tree { +impl Tree { #[doc(hidden)] pub fn create_internal_range<'a, K: AsRef<[u8]> + 'a, R: RangeBounds + 'a>( - version: SuperVersion, + version: SuperVersion, range: &'a R, seqno: SeqNo, ephemeral: Option<(Arc, SeqNo)>, - ) -> impl DoubleEndedIterator> + 'static { + ) -> impl DoubleEndedIterator> + 'static + where + F: 'static, + { use crate::range::{IterState, TreeIter}; use std::ops::Bound::{self, Excluded, Included, Unbounded}; @@ -697,7 +706,7 @@ impl Tree { } pub(crate) fn get_internal_entry_from_version( - super_version: &SuperVersion, + super_version: &SuperVersion, key: &[u8], seqno: SeqNo, ) -> crate::Result> { @@ -717,7 +726,7 @@ impl Tree { } fn get_internal_entry_from_tables( - version: &Version, + version: &Version, key: &[u8], seqno: SeqNo, ) -> crate::Result> { @@ -739,7 +748,7 @@ impl Tree { } fn get_internal_entry_from_sealed_memtables( - super_version: &SuperVersion, + super_version: &SuperVersion, key: &[u8], seqno: SeqNo, ) -> Option { @@ -752,7 +761,7 @@ impl Tree { None } - pub(crate) fn get_version_for_snapshot(&self, seqno: SeqNo) -> SuperVersion { + pub(crate) fn get_version_for_snapshot(&self, seqno: SeqNo) -> SuperVersion { #[expect(clippy::expect_used, reason = "lock is expected to not be poisoned")] self.version_history .read() @@ -809,7 +818,7 @@ impl Tree { /// # Errors /// /// Returns error, if an IO error occurred. - pub(crate) fn open(config: Config) -> crate::Result { + pub(crate) fn open(config: Config) -> crate::Result { log::debug!("Opening LSM-tree at {}", config.path.display()); // Check for old version @@ -842,7 +851,7 @@ impl Tree { fn inner_compact( &self, - strategy: Arc, + strategy: Arc>, mvcc_gc_watermark: SeqNo, ) -> crate::Result<()> { use crate::compaction::worker::{do_compaction, Options}; @@ -863,7 +872,10 @@ impl Tree { &self, seqno: SeqNo, ephemeral: Option<(Arc, SeqNo)>, - ) -> impl DoubleEndedIterator> + 'static { + ) -> impl DoubleEndedIterator> + 'static + where + F: 'static, + { self.create_range::(&.., seqno, ephemeral) } @@ -873,7 +885,10 @@ impl Tree { range: &'a R, seqno: SeqNo, ephemeral: Option<(Arc, SeqNo)>, - ) -> impl DoubleEndedIterator> + 'static { + ) -> impl DoubleEndedIterator> + 'static + where + F: 'static, + { #[expect(clippy::expect_used, reason = "lock is expected to not be poisoned")] let super_version = self .version_history @@ -893,7 +908,10 @@ impl Tree { prefix: K, seqno: SeqNo, ephemeral: Option<(Arc, SeqNo)>, - ) -> impl DoubleEndedIterator> + 'static { + ) -> impl DoubleEndedIterator> + 'static + where + F: 'static, + { use crate::range::prefix_to_range; let range = prefix_to_range(prefix.as_ref()); @@ -920,7 +938,7 @@ impl Tree { /// # Errors /// /// Returns error, if an IO error occurred. - fn recover(mut config: Config) -> crate::Result { + fn recover(mut config: Config) -> crate::Result { use crate::stop_signal::StopSignal; use inner::get_next_tree_id; @@ -1001,21 +1019,20 @@ impl Tree { } /// Creates a new LSM-tree in a directory. - fn create_new(config: Config) -> crate::Result { + fn create_new(config: Config) -> crate::Result { use crate::file::{fsync_directory, TABLES_FOLDER}; - use std::fs::create_dir_all; let path = config.path.clone(); log::trace!("Creating LSM-tree at {}", path.display()); - create_dir_all(&path)?; + F::create_dir_all(&path)?; let table_folder_path = path.join(TABLES_FOLDER); - create_dir_all(&table_folder_path)?; + F::create_dir_all(&table_folder_path)?; // IMPORTANT: fsync folders on Unix - fsync_directory(&table_folder_path)?; - fsync_directory(&path)?; + fsync_directory::(&table_folder_path)?; + fsync_directory::(&path)?; let inner = TreeInner::create_new(config)?; Ok(Self(Arc::new(inner))) @@ -1025,14 +1042,14 @@ impl Tree { fn recover_levels>( tree_path: P, tree_id: TreeId, - config: &Config, + config: &Config, #[cfg(feature = "metrics")] metrics: &Arc, - ) -> crate::Result { + ) -> crate::Result> { use crate::{file::fsync_directory, file::TABLES_FOLDER, TableId}; let tree_path = tree_path.as_ref(); - let recovery = recover(tree_path)?; + let recovery = recover::(tree_path)?; let table_map = { let mut result: crate::HashMap = @@ -1076,15 +1093,14 @@ impl Tree { let table_base_folder = tree_path.join(TABLES_FOLDER); - if !table_base_folder.try_exists()? { - std::fs::create_dir_all(&table_base_folder)?; - fsync_directory(&table_base_folder)?; + if !F::exists(&table_base_folder)? { + F::create_dir_all(&table_base_folder)?; + fsync_directory::(&table_base_folder)?; } let mut orphaned_tables = vec![]; - for (idx, dirent) in std::fs::read_dir(&table_base_folder)?.enumerate() { - let dirent = dirent?; + for (idx, dirent) in F::read_dir(&table_base_folder)?.into_iter().enumerate() { let file_name = dirent.file_name(); // https://en.wikipedia.org/wiki/.DS_Store @@ -1098,12 +1114,12 @@ impl Tree { } let table_file_name = file_name.to_str().ok_or_else(|| { - log::error!("invalid table file name {}", file_name.display()); + log::error!("invalid table file name {}", file_name.to_string_lossy()); crate::Error::Unrecoverable })?; - let table_file_path = dirent.path(); - assert!(!table_file_path.is_dir()); + let table_file_path = dirent.path().to_path_buf(); + assert!(!dirent.is_dir()); let table_id = table_file_name.parse::().map_err(|e| { log::error!("invalid table file name {table_file_name:?}: {e:?}"); @@ -1114,7 +1130,7 @@ impl Tree { let pin_filter = config.filter_block_pinning_policy.get(level_idx.into()); let pin_index = config.index_block_pinning_policy.get(level_idx.into()); - let table = Table::recover( + let table = Table::::recover( table_file_path, checksum, global_seqno, @@ -1147,50 +1163,48 @@ impl Tree { log::debug!("Successfully recovered {} tables", tables.len()); - let (blob_files, orphaned_blob_files) = crate::vlog::recover_blob_files( + let (blob_files, orphaned_blob_files) = crate::vlog::recover_blob_files::( &tree_path.join(crate::file::BLOBS_FOLDER), &recovery.blob_file_ids, tree_id, config.descriptor_table.as_ref(), )?; - let version = Version::from_recovery(recovery, &tables, &blob_files)?; + let version = Version::::from_recovery(recovery, &tables, &blob_files)?; // NOTE: Cleanup old versions // But only after we definitely recovered the latest version - Self::cleanup_orphaned_version(tree_path, version.id())?; + Self::cleanup_orphaned_version::(tree_path, version.id())?; for table_path in orphaned_tables { log::debug!("Deleting orphaned table {}", table_path.display()); - std::fs::remove_file(&table_path)?; + F::remove_file(&table_path)?; } for blob_file_path in orphaned_blob_files { log::debug!("Deleting orphaned blob file {}", blob_file_path.display()); - std::fs::remove_file(&blob_file_path)?; + F::remove_file(&blob_file_path)?; } Ok(version) } - fn cleanup_orphaned_version( + fn cleanup_orphaned_version( path: &Path, latest_version_id: crate::version::VersionId, ) -> crate::Result<()> { let version_str = format!("v{latest_version_id}"); - for file in std::fs::read_dir(path)? { - let dirent = file?; - - if dirent.file_type()?.is_dir() { + for dirent in Fs::read_dir(path)? { + if dirent.is_dir() { continue; } let name = dirent.file_name(); if name.to_string_lossy().starts_with('v') && *name != *version_str { - log::trace!("Cleanup orphaned version {}", name.display()); - std::fs::remove_file(dirent.path())?; + log::trace!("Cleanup orphaned version {}", name.to_string_lossy()); + Fs::remove_file(dirent.path())?; } } diff --git a/src/version/blob_file_list.rs b/src/version/blob_file_list.rs index e99d8b15f..bfd44da16 100644 --- a/src/version/blob_file_list.rs +++ b/src/version/blob_file_list.rs @@ -1,14 +1,31 @@ use crate::{ blob_tree::FragmentationMap, + fs::FileSystem, vlog::{BlobFile, BlobFileId}, }; use std::collections::BTreeMap; -#[derive(Clone, Default)] -pub struct BlobFileList(BTreeMap); +pub struct BlobFileList(BTreeMap>); -impl BlobFileList { - pub fn new(blob_files: BTreeMap) -> Self { +impl Clone for BlobFileList { + fn clone(&self) -> Self { + let blob_files = self + .0 + .iter() + .map(|(id, blob_file)| (*id, blob_file.clone())) + .collect(); + Self(blob_files) + } +} + +impl Default for BlobFileList { + fn default() -> Self { + Self(BTreeMap::default()) + } +} + +impl BlobFileList { + pub fn new(blob_files: BTreeMap>) -> Self { Self(blob_files) } @@ -21,7 +38,7 @@ impl BlobFileList { self.0.len() } - pub fn extend>(&mut self, iter: I) { + pub fn extend)>>(&mut self, iter: I) { self.0.extend(iter); } @@ -29,22 +46,22 @@ impl BlobFileList { self.0.contains_key(&key) } - pub fn prune_dead(&mut self, gc_stats: &FragmentationMap) -> Vec { + pub fn prune_dead(&mut self, gc_stats: &FragmentationMap) -> Vec> { self.0 .extract_if(.., |_, blob_file| blob_file.is_dead(gc_stats)) .map(|(_, v)| v) .collect() } - pub fn insert(&mut self, key: BlobFileId, value: BlobFile) { + pub fn insert(&mut self, key: BlobFileId, value: BlobFile) { self.0.insert(key, value); } - pub fn remove(&mut self, key: BlobFileId) -> Option { + pub fn remove(&mut self, key: BlobFileId) -> Option> { self.0.remove(&key) } - pub fn iter(&self) -> impl Iterator { + pub fn iter(&self) -> impl Iterator> { self.0.values() } @@ -52,7 +69,7 @@ impl BlobFileList { self.0.keys() } - pub fn get(&self, key: BlobFileId) -> Option<&BlobFile> { + pub fn get(&self, key: BlobFileId) -> Option<&BlobFile> { self.0.get(&key) } } diff --git a/src/version/mod.rs b/src/version/mod.rs index 1d6510ded..6b9d18c4b 100644 --- a/src/version/mod.rs +++ b/src/version/mod.rs @@ -18,6 +18,7 @@ use crate::blob_tree::{FragmentationEntry, FragmentationMap}; use crate::checksum::ChecksumType; use crate::coding::Encode; use crate::compaction::state::hidden_set::HiddenSet; +use crate::fs::FileSystem; use crate::version::recovery::Recovery; use crate::TreeType; use crate::{ @@ -33,7 +34,7 @@ pub const DEFAULT_LEVEL_COUNT: u8 = 7; /// Monotonically increasing ID of a version. pub type VersionId = u64; -impl Ranged for Table { +impl Ranged for Table { fn key_range(&self) -> &KeyRange { &self.metadata.key_range } @@ -77,23 +78,28 @@ impl GenericLevel { } } -#[derive(Clone)] -pub struct Level(Arc>); +pub struct Level(Arc>>); -impl std::ops::Deref for Level { - type Target = GenericLevel
; +impl Clone for Level { + fn clone(&self) -> Self { + Self(self.0.clone()) + } +} + +impl std::ops::Deref for Level { + type Target = GenericLevel>; fn deref(&self) -> &Self::Target { &self.0 } } -impl Level { +impl Level { pub fn empty() -> Self { Self::from_runs(vec![]) } - pub fn from_runs(runs: Vec>>) -> Self { + pub fn from_runs(runs: Vec>>>) -> Self { Self(Arc::new(GenericLevel { runs })) } @@ -104,7 +110,7 @@ impl Level { .collect() } - pub fn first_run(&self) -> Option<&Arc>> { + pub fn first_run(&self) -> Option<&Arc>>> { self.runs.first() } @@ -138,14 +144,14 @@ impl Level { } } -pub struct VersionInner { +pub struct VersionInner { /// The version's ID id: VersionId, tree_type: TreeType, /// The individual LSM-tree levels which consist of runs of tables - levels: Vec, + levels: Vec>, // NOTE: We purposefully use Arc<_> to avoid deep cloning the blob files again and again // @@ -154,7 +160,7 @@ pub struct VersionInner { // /// Blob files for large values (value log) #[doc(hidden)] - pub blob_files: Arc, + pub blob_files: Arc>, /// Blob file fragmentation gc_stats: Arc, @@ -163,13 +169,20 @@ pub struct VersionInner { /// A version is an immutable, point-in-time view of a tree's structure /// /// Any time a table is created or deleted, a new version is created. -#[derive(Clone)] -pub struct Version { - inner: Arc, +pub struct Version { + inner: Arc>, } -impl std::ops::Deref for Version { - type Target = VersionInner; +impl Clone for Version { + fn clone(&self) -> Self { + Self { + inner: self.inner.clone(), + } + } +} + +impl std::ops::Deref for Version { + type Target = VersionInner; fn deref(&self) -> &Self::Target { &self.inner @@ -177,7 +190,7 @@ impl std::ops::Deref for Version { } // TODO: impl using generics so we can easily unit test Version transformation functions -impl Version { +impl Version { /// Returns the initial tree type. pub fn tree_type(&self) -> TreeType { self.tree_type @@ -192,7 +205,7 @@ impl Version { &self.gc_stats } - pub fn l0(&self) -> &Level { + pub fn l0(&self) -> &Level { #[expect(clippy::expect_used)] self.levels.first().expect("L0 should exist") } @@ -224,8 +237,8 @@ impl Version { pub(crate) fn from_recovery( recovery: Recovery, - tables: &[Table], - blob_files: &[BlobFile], + tables: &[Table], + blob_files: &[BlobFile], ) -> crate::Result { let version_levels = recovery .table_ids @@ -272,8 +285,8 @@ impl Version { pub fn from_levels( id: VersionId, tree_type: TreeType, - levels: Vec, - blob_files: BlobFileList, + levels: Vec>, + blob_files: BlobFileList, gc_stats: FragmentationMap, ) -> Self { Self { @@ -293,7 +306,7 @@ impl Version { } /// Returns an iterator through all levels. - pub fn iter_levels(&self) -> impl Iterator { + pub fn iter_levels(&self) -> impl Iterator> { self.levels.iter() } @@ -307,27 +320,27 @@ impl Version { } /// Returns an iterator over all tables. - pub fn iter_tables(&self) -> impl Iterator { + pub fn iter_tables(&self) -> impl Iterator> { self.levels .iter() .flat_map(|x| x.iter()) .flat_map(|x| x.iter()) } - pub(crate) fn get_table(&self, id: TableId) -> Option<&Table> { + pub(crate) fn get_table(&self, id: TableId) -> Option<&Table> { self.iter_tables().find(|x| x.metadata.id == id) } /// Gets the n-th level. - pub fn level(&self, n: usize) -> Option<&Level> { + pub fn level(&self, n: usize) -> Option<&Level> { self.levels.get(n) } /// Creates a new version with the additional run added to the "top" of L0. pub fn with_new_l0_run( &self, - run: &[Table], - blob_files: Option<&[BlobFile]>, + run: &[Table], + blob_files: Option<&[BlobFile]>, diff: Option, ) -> Self { let id = self.id + 1; @@ -368,9 +381,9 @@ impl Version { // Value log let value_log = if let Some(blob_files) = blob_files { - let mut copy = self.blob_files.deref().clone(); + let mut copy: BlobFileList = self.blob_files.as_ref().clone(); copy.extend(blob_files.iter().cloned().map(|bf| (bf.id(), bf))); - copy.into() + Arc::new(copy) } else { self.blob_files.clone() }; @@ -378,7 +391,7 @@ impl Version { let gc_stats = if let Some(diff) = diff { let mut copy = self.gc_stats.deref().clone(); diff.merge_into(&mut copy); - copy.prune(&value_log); + copy.prune(value_log.as_ref()); Arc::new(copy) } else { self.gc_stats.clone() @@ -401,13 +414,13 @@ impl Version { pub fn with_dropped( &self, ids: &[TableId], - dropped_blob_files: &mut Vec, + dropped_blob_files: &mut Vec>, ) -> crate::Result { let id = self.id + 1; let mut levels = vec![]; - let mut dropped_tables: Vec
= vec![]; + let mut dropped_tables: Vec> = vec![]; for level in &self.levels { let runs = level @@ -463,7 +476,7 @@ impl Version { let value_log = if dropped_tables.is_empty() { self.blob_files.clone() } else { - let mut copy = self.blob_files.deref().clone(); + let mut copy: BlobFileList = self.blob_files.as_ref().clone(); dropped_blob_files.extend(copy.prune_dead(&gc_stats)); Arc::new(copy) }; @@ -482,10 +495,10 @@ impl Version { pub fn with_merge( &self, old_ids: &[TableId], - new_tables: &[Table], + new_tables: &[Table], dest_level: usize, diff: Option, - new_blob_files: Vec, + new_blob_files: Vec>, blob_files_to_drop: &HashSet, ) -> Self { let id = self.id + 1; @@ -520,7 +533,7 @@ impl Version { let value_log = if has_diff || !new_blob_files.is_empty() || !blob_files_to_drop.is_empty() { - let mut copy = self.blob_files.deref().clone(); + let mut copy: BlobFileList = self.blob_files.as_ref().clone(); for blob_file in new_blob_files { copy.insert(blob_file.id(), blob_file); @@ -542,7 +555,7 @@ impl Version { diff.merge_into(&mut copy); } - copy.prune(&value_log); + copy.prune(value_log.as_ref()); Arc::new(copy) } else { @@ -609,7 +622,7 @@ impl Version { } } -impl Version { +impl Version { pub(crate) fn encode_into( &self, writer: &mut sfa::Writer, diff --git a/src/version/persist.rs b/src/version/persist.rs index ad42fcf6c..8db86c839 100644 --- a/src/version/persist.rs +++ b/src/version/persist.rs @@ -1,12 +1,13 @@ use crate::{ checksum::ChecksummedWriter, file::{fsync_directory, rewrite_atomic, CURRENT_VERSION_FILE}, + fs::FileSystem, version::Version, }; use byteorder::{LittleEndian, WriteBytesExt}; use std::{io::BufWriter, path::Path}; -pub fn persist_version(folder: &Path, version: &Version) -> crate::Result<()> { +pub fn persist_version(folder: &Path, version: &Version) -> crate::Result<()> { log::trace!( "Persisting version {} in {}", version.id(), @@ -14,7 +15,7 @@ pub fn persist_version(folder: &Path, version: &Version) -> crate::Result<()> { ); let path = folder.join(format!("v{}", version.id())); - let file = std::fs::File::create_new(path)?; + let file = F::create_new(&path)?; let writer = BufWriter::new(file); let mut writer = ChecksummedWriter::new(writer); @@ -29,7 +30,7 @@ pub fn persist_version(folder: &Path, version: &Version) -> crate::Result<()> { })?; // IMPORTANT: fsync folder on Unix - fsync_directory(folder)?; + fsync_directory::(folder)?; } let checksum = writer.checksum(); @@ -39,7 +40,7 @@ pub fn persist_version(folder: &Path, version: &Version) -> crate::Result<()> { current_file_content.write_u128::(checksum.into_u128())?; current_file_content.write_u8(0)?; // 0 = xxh3 - rewrite_atomic(&folder.join(CURRENT_VERSION_FILE), ¤t_file_content)?; + rewrite_atomic::(&folder.join(CURRENT_VERSION_FILE), ¤t_file_content)?; Ok(()) } diff --git a/src/version/recovery.rs b/src/version/recovery.rs index 6bbff7bd9..63583cab6 100644 --- a/src/version/recovery.rs +++ b/src/version/recovery.rs @@ -3,16 +3,16 @@ // (found in the LICENSE-* files in the repository) use crate::{ - coding::Decode, file::CURRENT_VERSION_FILE, version::VersionId, vlog::BlobFileId, Checksum, - SeqNo, TableId, TreeType, + coding::Decode, file::CURRENT_VERSION_FILE, fs::FileSystem, version::VersionId, + vlog::BlobFileId, Checksum, SeqNo, TableId, TreeType, }; use byteorder::{LittleEndian, ReadBytesExt}; use std::path::Path; -pub fn get_current_version(folder: &std::path::Path) -> crate::Result { +pub fn get_current_version(folder: &std::path::Path) -> crate::Result { use byteorder::{LittleEndian, ReadBytesExt}; - std::fs::File::open(folder.join(CURRENT_VERSION_FILE)) + F::open(&folder.join(CURRENT_VERSION_FILE)) .and_then(|mut f| f.read_u64::()) .map_err(Into::into) } @@ -31,8 +31,8 @@ pub struct Recovery { pub gc_stats: crate::blob_tree::FragmentationMap, } -pub fn recover(folder: &Path) -> crate::Result { - let curr_version_id = get_current_version(folder)?; +pub fn recover(folder: &Path) -> crate::Result { + let curr_version_id = get_current_version::(folder)?; let version_file_path = folder.join(format!("v{curr_version_id}")); // TODO: maybe validate current version using the checksum in "current" diff --git a/src/version/super_version.rs b/src/version/super_version.rs index 71bf32186..0e92dab1e 100644 --- a/src/version/super_version.rs +++ b/src/version/super_version.rs @@ -3,6 +3,7 @@ // (found in the LICENSE-* files in the repository) use crate::{ + fs::FileSystem, memtable::Memtable, tree::sealed::SealedMemtables, version::{persist_version, Version}, @@ -11,8 +12,7 @@ use crate::{ use std::{collections::VecDeque, path::Path, sync::Arc}; /// A super version is a point-in-time snapshot of memtables and a [`Version`] (list of disk files) -#[derive(Clone)] -pub struct SuperVersion { +pub struct SuperVersion { /// Active memtable that is being written to #[doc(hidden)] pub active_memtable: Arc, @@ -21,15 +21,26 @@ pub struct SuperVersion { pub(crate) sealed_memtables: Arc, /// Current tree version - pub(crate) version: Version, + pub(crate) version: Version, pub(crate) seqno: SeqNo, } -pub struct SuperVersions(VecDeque); +impl Clone for SuperVersion { + fn clone(&self) -> Self { + Self { + active_memtable: self.active_memtable.clone(), + sealed_memtables: self.sealed_memtables.clone(), + version: self.version.clone(), + seqno: self.seqno, + } + } +} -impl SuperVersions { - pub fn new(version: Version) -> Self { +pub struct SuperVersions(VecDeque>); + +impl SuperVersions { + pub fn new(version: Version) -> Self { Self( vec![SuperVersion { active_memtable: Arc::new(Memtable::new(0)), @@ -67,7 +78,11 @@ impl SuperVersions { self.len().saturating_sub(1) } - pub fn maintenance(&mut self, folder: &Path, gc_watermark: SeqNo) -> crate::Result<()> { + pub fn maintenance( + &mut self, + folder: &Path, + gc_watermark: SeqNo, + ) -> crate::Result<()> { if gc_watermark == 0 { return Ok(()); } @@ -91,8 +106,8 @@ impl SuperVersions { ); let path = folder.join(format!("v{}", head.version.id())); - if path.try_exists()? { - std::fs::remove_file(path)?; + if FS::exists(&path)? { + FS::remove_file(&path)?; } self.0.pop_front(); @@ -110,10 +125,10 @@ impl SuperVersions { /// and returns a new version. /// /// The function takes care of persisting the version changes on disk. - pub(crate) fn upgrade_version crate::Result>( + pub(crate) fn upgrade_version) -> crate::Result>>( &mut self, tree_path: &Path, - f: F, + f: T, seqno: &SequenceNumberCounter, visible_seqno: &SequenceNumberCounter, ) -> crate::Result<()> { @@ -125,11 +140,11 @@ impl SuperVersions { /// This is useful when the seqno must be coordinated with other operations /// (e.g., bulk ingestion where tables are recovered with the same seqno). pub(crate) fn upgrade_version_with_seqno< - F: FnOnce(&SuperVersion) -> crate::Result, + T: FnOnce(&SuperVersion) -> crate::Result>, >( &mut self, tree_path: &Path, - f: F, + f: T, seqno: SeqNo, visible_seqno: &SequenceNumberCounter, ) -> crate::Result<()> { @@ -137,7 +152,7 @@ impl SuperVersions { next_version.seqno = seqno; log::trace!("Next version seqno={}", next_version.seqno); - persist_version(tree_path, &next_version.version)?; + persist_version::(tree_path, &next_version.version)?; self.append_version(next_version); visible_seqno.fetch_max(seqno + 1); @@ -145,17 +160,17 @@ impl SuperVersions { Ok(()) } - pub fn append_version(&mut self, version: SuperVersion) { + pub fn append_version(&mut self, version: SuperVersion) { self.0.push_back(version); } - pub fn replace_latest_version(&mut self, version: SuperVersion) { + pub fn replace_latest_version(&mut self, version: SuperVersion) { if self.0.pop_back().is_some() { self.0.push_back(version); } } - pub fn latest_version(&self) -> SuperVersion { + pub fn latest_version(&self) -> SuperVersion { #[expect(clippy::expect_used, reason = "SuperVersion is expected to exist")] self.0 .iter() @@ -164,7 +179,7 @@ impl SuperVersions { .expect("should always have a SuperVersion") } - pub fn get_version_for_snapshot(&self, seqno: SeqNo) -> SuperVersion { + pub fn get_version_for_snapshot(&self, seqno: SeqNo) -> SuperVersion { if seqno == 0 { #[expect(clippy::expect_used, reason = "SuperVersion is expected to exist")] return self @@ -207,26 +222,26 @@ mod tests { SuperVersion { active_memtable: Arc::new(Memtable::new(0)), sealed_memtables: Arc::default(), - version: Version::new(0, crate::TreeType::Standard), + version: Version::::new(0, crate::TreeType::Standard), seqno: 0, }, SuperVersion { active_memtable: Arc::new(Memtable::new(0)), sealed_memtables: Arc::default(), - version: Version::new(0, crate::TreeType::Standard), + version: Version::::new(0, crate::TreeType::Standard), seqno: 1, }, SuperVersion { active_memtable: Arc::new(Memtable::new(0)), sealed_memtables: Arc::default(), - version: Version::new(0, crate::TreeType::Standard), + version: Version::::new(0, crate::TreeType::Standard), seqno: 2, }, ] .into(), ); - history.maintenance(Path::new("."), 0)?; + history.maintenance::(Path::new("."), 0)?; assert_eq!(history.free_list_len(), 2); @@ -240,26 +255,26 @@ mod tests { SuperVersion { active_memtable: Arc::new(Memtable::new(0)), sealed_memtables: Arc::default(), - version: Version::new(0, crate::TreeType::Standard), + version: Version::::new(0, crate::TreeType::Standard), seqno: 0, }, SuperVersion { active_memtable: Arc::new(Memtable::new(0)), sealed_memtables: Arc::default(), - version: Version::new(0, crate::TreeType::Standard), + version: Version::::new(0, crate::TreeType::Standard), seqno: 1, }, SuperVersion { active_memtable: Arc::new(Memtable::new(0)), sealed_memtables: Arc::default(), - version: Version::new(0, crate::TreeType::Standard), + version: Version::::new(0, crate::TreeType::Standard), seqno: 2, }, ] .into(), ); - history.maintenance(Path::new("."), 3)?; + history.maintenance::(Path::new("."), 3)?; assert_eq!(history.len(), 1); @@ -273,32 +288,32 @@ mod tests { SuperVersion { active_memtable: Arc::new(Memtable::new(0)), sealed_memtables: Arc::default(), - version: Version::new(0, crate::TreeType::Standard), + version: Version::::new(0, crate::TreeType::Standard), seqno: 0, }, SuperVersion { active_memtable: Arc::new(Memtable::new(0)), sealed_memtables: Arc::default(), - version: Version::new(0, crate::TreeType::Standard), + version: Version::::new(0, crate::TreeType::Standard), seqno: 1, }, SuperVersion { active_memtable: Arc::new(Memtable::new(0)), sealed_memtables: Arc::default(), - version: Version::new(0, crate::TreeType::Standard), + version: Version::::new(0, crate::TreeType::Standard), seqno: 2, }, SuperVersion { active_memtable: Arc::new(Memtable::new(0)), sealed_memtables: Arc::default(), - version: Version::new(0, crate::TreeType::Standard), + version: Version::::new(0, crate::TreeType::Standard), seqno: 8, }, ] .into(), ); - history.maintenance(Path::new("."), 3)?; + history.maintenance::(Path::new("."), 3)?; assert_eq!(history.len(), 2); @@ -312,20 +327,20 @@ mod tests { SuperVersion { active_memtable: Arc::new(Memtable::new(0)), sealed_memtables: Arc::default(), - version: Version::new(0, crate::TreeType::Standard), + version: Version::::new(0, crate::TreeType::Standard), seqno: 0, }, SuperVersion { active_memtable: Arc::new(Memtable::new(0)), sealed_memtables: Arc::default(), - version: Version::new(0, crate::TreeType::Standard), + version: Version::::new(0, crate::TreeType::Standard), seqno: 8, }, ] .into(), ); - history.maintenance(Path::new("."), 3)?; + history.maintenance::(Path::new("."), 3)?; assert_eq!(history.len(), 2); @@ -339,20 +354,20 @@ mod tests { SuperVersion { active_memtable: Arc::new(Memtable::new(0)), sealed_memtables: Arc::default(), - version: Version::new(0, crate::TreeType::Standard), + version: Version::::new(0, crate::TreeType::Standard), seqno: 0, }, SuperVersion { active_memtable: Arc::new(Memtable::new(0)), sealed_memtables: Arc::default(), - version: Version::new(0, crate::TreeType::Standard), + version: Version::::new(0, crate::TreeType::Standard), seqno: 2, }, ] .into(), ); - history.maintenance(Path::new("."), 3)?; + history.maintenance::(Path::new("."), 3)?; assert_eq!(history.len(), 1); diff --git a/src/vlog/accessor.rs b/src/vlog/accessor.rs index f246ff7d7..ed81a5bf5 100644 --- a/src/vlog/accessor.rs +++ b/src/vlog/accessor.rs @@ -3,16 +3,17 @@ // (found in the LICENSE-* files in the repository) use crate::{ + fs::FileSystem, version::BlobFileList, vlog::{blob_file::reader::Reader, ValueHandle}, Cache, GlobalTableId, TreeId, UserValue, }; -use std::{fs::File, path::Path, sync::Arc}; +use std::{path::Path, sync::Arc}; -pub struct Accessor<'a>(&'a BlobFileList); +pub struct Accessor<'a, F: FileSystem>(&'a BlobFileList); -impl<'a> Accessor<'a> { - pub fn new(blob_files: &'a BlobFileList) -> Self { +impl<'a, F: FileSystem> Accessor<'a, F> { + pub fn new(blob_files: &'a BlobFileList) -> Self { Self(blob_files) } @@ -38,9 +39,7 @@ impl<'a> Accessor<'a> { if let Some(cached_fd) = blob_file.file_accessor().access_for_blob_file(&bf_id) { (cached_fd, false) } else { - let file = Arc::new(File::open( - base_path.join(vhandle.blob_file_id.to_string()), - )?); + let file = Arc::new(F::open(&base_path.join(vhandle.blob_file_id.to_string()))?); (file, true) }; diff --git a/src/vlog/blob_file/merge.rs b/src/vlog/blob_file/merge.rs index 9a87a420c..7a3172494 100644 --- a/src/vlog/blob_file/merge.rs +++ b/src/vlog/blob_file/merge.rs @@ -3,7 +3,10 @@ // (found in the LICENSE-* files in the repository) use super::scanner::Scanner as BlobFileScanner; -use crate::vlog::{blob_file::scanner::ScanEntry, BlobFileId}; +use crate::{ + fs::FileSystem, + vlog::{blob_file::scanner::ScanEntry, BlobFileId}, +}; use interval_heap::IntervalHeap; use std::cmp::Reverse; @@ -37,14 +40,14 @@ impl Ord for IteratorValue { } /// Interleaves multiple blob file readers into a single, sorted stream -pub struct MergeScanner { - readers: Vec, +pub struct MergeScanner { + readers: Vec>, heap: IntervalHeap, } -impl MergeScanner { +impl MergeScanner { /// Initializes a new merging reader - pub fn new(readers: Vec) -> Self { + pub fn new(readers: Vec>) -> Self { let heap = IntervalHeap::with_capacity(readers.len()); Self { readers, heap } } @@ -76,7 +79,7 @@ impl MergeScanner { } } -impl Iterator for MergeScanner { +impl Iterator for MergeScanner { type Item = crate::Result<(ScanEntry, BlobFileId)>; fn next(&mut self) -> Option { @@ -109,7 +112,8 @@ mod tests { let blob_file_path = dir.path().join("0"); { { - let mut writer = BlobFileWriter::new(&blob_file_path, 0, 0)?; + let mut writer = + BlobFileWriter::::new(&blob_file_path, 0, 0)?; writer.write(b"a", 1, &b"1".repeat(100))?; writer.write(b"a", 0, &b"0".repeat(100))?; @@ -153,7 +157,8 @@ mod tests { let keys = [b"a", b"c", b"e"]; { - let mut writer = BlobFileWriter::new(&blob_file_0_path, 0, 0)?; + let mut writer = + BlobFileWriter::::new(&blob_file_0_path, 0, 0)?; for key in keys { writer.write(key, 0, &key.repeat(100))?; @@ -167,7 +172,8 @@ mod tests { let keys = [b"b", b"d"]; { - let mut writer = BlobFileWriter::new(&blob_file_1_path, 1, 0)?; + let mut writer = + BlobFileWriter::::new(&blob_file_1_path, 1, 0)?; for key in keys { writer.write(key, 1, &key.repeat(100))?; diff --git a/src/vlog/blob_file/mod.rs b/src/vlog/blob_file/mod.rs index 138e5e142..b1f9d5e03 100644 --- a/src/vlog/blob_file/mod.rs +++ b/src/vlog/blob_file/mod.rs @@ -10,18 +10,18 @@ pub mod scanner; pub mod writer; use crate::{ - blob_tree::FragmentationMap, file_accessor::FileAccessor, vlog::BlobFileId, Checksum, - GlobalTableId, TreeId, + blob_tree::FragmentationMap, file_accessor::FileAccessor, fs::FileSystem, vlog::BlobFileId, + Checksum, GlobalTableId, TreeId, }; pub use meta::Metadata; use std::{ + marker::PhantomData, path::{Path, PathBuf}, sync::{atomic::AtomicBool, Arc}, }; /// A blob file is an immutable, sorted, contiguous file that contains large key-value pairs (blobs) -#[derive(Debug)] -pub struct Inner { +pub struct Inner { /// Blob file ID pub id: BlobFileId, @@ -29,6 +29,7 @@ pub struct Inner { /// File path pub path: PathBuf, + pub(crate) phantom: PhantomData, /// Statistics pub meta: Metadata, @@ -38,16 +39,28 @@ pub struct Inner { pub checksum: Checksum, - pub(crate) file_accessor: FileAccessor, + pub(crate) file_accessor: FileAccessor, } -impl Inner { +impl Inner { fn global_id(&self) -> GlobalTableId { GlobalTableId::from((self.tree_id, self.id)) } } -impl Drop for Inner { +impl std::fmt::Debug for Inner { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Inner") + .field("id", &self.id) + .field("path", &self.path) + .field("meta", &self.meta) + .field("is_deleted", &self.is_deleted) + .field("checksum", &self.checksum) + .finish() + } +} + +impl Drop for Inner { fn drop(&mut self) { if self.is_deleted.load(std::sync::atomic::Ordering::Acquire) { log::trace!( @@ -56,7 +69,7 @@ impl Drop for Inner { self.path.display(), ); - if let Err(e) = std::fs::remove_file(&*self.path) { + if let Err(e) = F::remove_file(&self.path) { log::warn!( "Failed to cleanup deleted blob file {:?} at {}: {e:?}", self.id, @@ -72,24 +85,29 @@ impl Drop for Inner { } /// A blob file stores large values and is part of the value log -#[derive(Clone)] -pub struct BlobFile(pub(crate) Arc); +pub struct BlobFile(pub(crate) Arc>); + +impl Clone for BlobFile { + fn clone(&self) -> Self { + Self(self.0.clone()) + } +} -impl Eq for BlobFile {} +impl Eq for BlobFile {} -impl PartialEq for BlobFile { +impl PartialEq for BlobFile { fn eq(&self, other: &Self) -> bool { self.id().eq(&other.id()) } } -impl std::hash::Hash for BlobFile { +impl std::hash::Hash for BlobFile { fn hash(&self, state: &mut H) { self.id().hash(state); } } -impl BlobFile { +impl BlobFile { pub(crate) fn mark_as_deleted(&self) { self.0 .is_deleted @@ -116,7 +134,7 @@ impl BlobFile { /// Returns the blob file accessor. #[must_use] - pub(crate) fn file_accessor(&self) -> &FileAccessor { + pub(crate) fn file_accessor(&self) -> &FileAccessor { &self.0.file_accessor } diff --git a/src/vlog/blob_file/multi_writer.rs b/src/vlog/blob_file/multi_writer.rs index 6d1050c6c..a082a08da 100644 --- a/src/vlog/blob_file/multi_writer.rs +++ b/src/vlog/blob_file/multi_writer.rs @@ -5,6 +5,7 @@ use super::writer::Writer; use crate::{ file_accessor::FileAccessor, + fs::FileSystem, vlog::{ blob_file::{Inner as BlobFileInner, Metadata}, BlobFileId, @@ -12,19 +13,18 @@ use crate::{ BlobFile, CompressionType, DescriptorTable, SeqNo, SequenceNumberCounter, TreeId, }; use std::{ - fs::File, path::{Path, PathBuf}, sync::{atomic::AtomicBool, Arc}, }; /// Blob file writer, may write multiple blob files -pub struct MultiWriter { +pub struct MultiWriter { folder: PathBuf, target_size: u64, - active_writer: Writer, + active_writer: Writer, - results: Vec, + results: Vec>, id_generator: SequenceNumberCounter, @@ -32,10 +32,10 @@ pub struct MultiWriter { passthrough_compression: CompressionType, tree_id: TreeId, - descriptor_table: Option>, + descriptor_table: Option>>, } -impl MultiWriter { +impl MultiWriter { /// Initializes a new blob file writer. /// /// # Errors @@ -46,7 +46,7 @@ impl MultiWriter { id_generator: SequenceNumberCounter, folder: P, tree_id: TreeId, - descriptor_table: Option>, + descriptor_table: Option>>, ) -> crate::Result { let folder = folder.as_ref(); @@ -58,7 +58,7 @@ impl MultiWriter { folder: folder.into(), target_size: 64 * 1_024 * 1_024, - active_writer: Writer::new(blob_file_path, blob_file_id, tree_id)?, + active_writer: Writer::::new(blob_file_path, blob_file_id, tree_id)?, results: Vec::new(), @@ -113,7 +113,7 @@ impl MultiWriter { let new_blob_file_id = self.id_generator.next(); let blob_file_path = self.folder.join(new_blob_file_id.to_string()); - let new_writer = Writer::new(blob_file_path, new_blob_file_id, self.tree_id)? + let new_writer = Writer::::new(blob_file_path, new_blob_file_id, self.tree_id)? .use_compression(self.compression); let old_writer = std::mem::replace(&mut self.active_writer, new_writer); @@ -128,10 +128,10 @@ impl MultiWriter { } fn consume_writer( - writer: Writer, + writer: Writer, passthrough_compression: CompressionType, - descriptor_table: Option>, - ) -> crate::Result> { + descriptor_table: Option>>, + ) -> crate::Result>> { if writer.item_count > 0 { let blob_file_id = writer.blob_file_id; let path = writer.path.clone(); @@ -146,7 +146,7 @@ impl MultiWriter { let (metadata, checksum) = writer.finish()?; - let file = Arc::new(File::open(&path)?); + let file = Arc::new(F::open(&path)?); let file_accessor = descriptor_table.map_or(FileAccessor::File(file.clone()), |dt| { FileAccessor::DescriptorTable(dt) }); @@ -158,6 +158,7 @@ impl MultiWriter { path, is_deleted: AtomicBool::new(false), id: blob_file_id, + phantom: std::marker::PhantomData, file_accessor, meta: Metadata { id: blob_file_id, @@ -182,7 +183,7 @@ impl MultiWriter { writer.path.display(), ); - if let Err(e) = std::fs::remove_file(&writer.path) { + if let Err(e) = F::remove_file(&writer.path) { log::warn!( "Could not delete empty blob file at {}: {e:?}", writer.path.display(), @@ -234,7 +235,7 @@ impl MultiWriter { Ok(bytes_written) } - pub(crate) fn finish(mut self) -> crate::Result> { + pub(crate) fn finish(mut self) -> crate::Result>> { let blob_file = Self::consume_writer( self.active_writer, self.passthrough_compression, diff --git a/src/vlog/blob_file/reader.rs b/src/vlog/blob_file/reader.rs index 7b9dcc184..fdec9fd82 100644 --- a/src/vlog/blob_file/reader.rs +++ b/src/vlog/blob_file/reader.rs @@ -3,6 +3,7 @@ // (found in the LICENSE-* files in the repository) use crate::{ + fs::FileSystem, vlog::{ blob_file::writer::{BLOB_HEADER_LEN, BLOB_HEADER_MAGIC}, ValueHandle, @@ -10,19 +11,16 @@ use crate::{ BlobFile, Checksum, CompressionType, UserValue, }; use byteorder::{LittleEndian, ReadBytesExt}; -use std::{ - fs::File, - io::{Cursor, Read, Seek}, -}; +use std::io::{Cursor, Read, Seek}; /// Reads a single blob from a blob file -pub struct Reader<'a> { - blob_file: &'a BlobFile, - file: &'a File, +pub struct Reader<'a, F: FileSystem> { + blob_file: &'a BlobFile, + file: &'a F::File, } -impl<'a> Reader<'a> { - pub fn new(blob_file: &'a BlobFile, file: &'a File) -> Self { +impl<'a, F: FileSystem> Reader<'a, F> { + pub fn new(blob_file: &'a BlobFile, file: &'a F::File) -> Self { Self { blob_file, file } } @@ -112,8 +110,13 @@ mod tests { let id_generator = SequenceNumberCounter::default(); let folder = tempfile::tempdir()?; - let mut writer = crate::vlog::BlobFileWriter::new(id_generator, folder.path(), 0, None)? - .use_target_size(u64::MAX); + let mut writer = crate::vlog::BlobFileWriter::::new( + id_generator, + folder.path(), + 0, + None, + )? + .use_target_size(u64::MAX); let offset = writer.offset(); let on_disk_size = writer.write(b"a", 0, b"abcdef")?; @@ -126,7 +129,7 @@ mod tests { let blob_file = writer.finish()?; let blob_file = blob_file.first().unwrap(); - let file = File::open(&blob_file.0.path)?; + let file = crate::fs::StdFileSystem::open(&blob_file.0.path)?; let reader = Reader::new(blob_file, &file); assert_eq!(reader.get(b"a", &handle)?, b"abcdef"); @@ -140,9 +143,14 @@ mod tests { let id_generator = SequenceNumberCounter::default(); let folder = tempfile::tempdir()?; - let mut writer = crate::vlog::BlobFileWriter::new(id_generator, folder.path(), 0, None)? - .use_target_size(u64::MAX) - .use_compression(CompressionType::Lz4); + let mut writer = crate::vlog::BlobFileWriter::::new( + id_generator, + folder.path(), + 0, + None, + )? + .use_target_size(u64::MAX) + .use_compression(CompressionType::Lz4); let offset = writer.offset(); let on_disk_size = writer.write(b"a", 0, b"abcdef")?; @@ -163,7 +171,7 @@ mod tests { let blob_file = writer.finish()?; let blob_file = blob_file.first().unwrap(); - let file = File::open(&blob_file.0.path)?; + let file = crate::fs::StdFileSystem::open(&blob_file.0.path)?; let reader = Reader::new(blob_file, &file); assert_eq!(reader.get(b"a", &handle0)?, b"abcdef"); diff --git a/src/vlog/blob_file/scanner.rs b/src/vlog/blob_file/scanner.rs index a4deb5def..4729bb17a 100644 --- a/src/vlog/blob_file/scanner.rs +++ b/src/vlog/blob_file/scanner.rs @@ -4,37 +4,47 @@ use super::writer::BLOB_HEADER_MAGIC; use crate::{ + fs::FileSystem, vlog::{blob_file::meta::METADATA_HEADER_MAGIC, BlobFileId}, Checksum, SeqNo, UserKey, UserValue, }; use byteorder::{LittleEndian, ReadBytesExt}; use std::{ - fs::File, io::{BufReader, Read, Seek}, path::Path, }; /// Reads through a blob file in order -pub struct Scanner { +pub struct Scanner { pub(crate) blob_file_id: BlobFileId, // TODO: remove unused? - inner: BufReader, + inner: BufReader, is_terminated: bool, } -impl Scanner { +impl Scanner { /// Initializes a new blob file reader. /// /// # Errors /// /// Will return `Err` if an IO error occurs. + #[allow(dead_code)] pub fn new>(path: P, blob_file_id: BlobFileId) -> crate::Result { - let file_reader = BufReader::with_capacity(32_000, File::open(path)?); + let file_reader = + BufReader::with_capacity(32_000, crate::fs::StdFileSystem::open(path.as_ref())?); + Ok(Self::with_reader(blob_file_id, file_reader)) + } +} + +impl Scanner { + /// Initializes a new blob file reader. + pub fn new_with_fs>(path: P, blob_file_id: BlobFileId) -> crate::Result { + let file_reader = BufReader::with_capacity(32_000, F::open(path.as_ref())?); Ok(Self::with_reader(blob_file_id, file_reader)) } /// Initializes a new blob file reader. #[must_use] - pub fn with_reader(blob_file_id: BlobFileId, file_reader: BufReader) -> Self { + pub fn with_reader(blob_file_id: BlobFileId, file_reader: BufReader) -> Self { Self { blob_file_id, inner: file_reader, @@ -52,7 +62,7 @@ pub struct ScanEntry { pub uncompressed_len: u32, } -impl Iterator for Scanner { +impl Iterator for Scanner { type Item = crate::Result; fn next(&mut self) -> Option { @@ -139,7 +149,8 @@ mod tests { let keys = [b"a", b"b", b"c", b"d", b"e"]; { - let mut writer = BlobFileWriter::new(&blob_file_path, 0, 0)?; + let mut writer = + BlobFileWriter::::new(&blob_file_path, 0, 0)?; for key in keys { writer.write(key, 0, &key.repeat(100))?; diff --git a/src/vlog/blob_file/writer.rs b/src/vlog/blob_file/writer.rs index 7e20dff48..231076a02 100644 --- a/src/vlog/blob_file/writer.rs +++ b/src/vlog/blob_file/writer.rs @@ -4,12 +4,14 @@ use super::meta::Metadata; use crate::{ - checksum::ChecksummedWriter, time::unix_timestamp, vlog::BlobFileId, Checksum, CompressionType, - KeyRange, SeqNo, TreeId, UserKey, + checksum::ChecksummedWriter, + fs::{FileLike, FileSystem}, + time::unix_timestamp, + vlog::BlobFileId, + Checksum, CompressionType, KeyRange, SeqNo, TreeId, UserKey, }; use byteorder::{LittleEndian, WriteBytesExt}; use std::{ - fs::File, io::{BufWriter, Write}, path::{Path, PathBuf}, }; @@ -24,13 +26,13 @@ pub const BLOB_HEADER_LEN: usize = BLOB_HEADER_MAGIC.len() + std::mem::size_of::(); // On-disk value length /// Blob file writer -pub struct Writer { +pub struct Writer { pub(crate) tree_id: TreeId, pub path: PathBuf, pub(crate) blob_file_id: BlobFileId, #[expect(clippy::struct_field_names)] - writer: sfa::Writer>>, + writer: sfa::Writer>>, offset: u64, @@ -44,7 +46,7 @@ pub struct Writer { pub(crate) compression: CompressionType, } -impl Writer { +impl Writer { /// Initializes a new blob file writer. /// /// # Errors @@ -58,7 +60,7 @@ impl Writer { ) -> crate::Result { let path = path.as_ref(); - let writer = BufWriter::new(File::create(path)?); + let writer = BufWriter::new(F::create(path)?); let writer = ChecksummedWriter::new(writer); let mut writer = sfa::Writer::from_writer(writer); writer.start("data")?; diff --git a/src/vlog/mod.rs b/src/vlog/mod.rs index 5f59a0a7d..66ec94bda 100644 --- a/src/vlog/mod.rs +++ b/src/vlog/mod.rs @@ -7,13 +7,15 @@ pub mod blob_file; mod handle; pub use { - accessor::Accessor, blob_file::merge::MergeScanner as BlobFileMergeScanner, - blob_file::multi_writer::MultiWriter as BlobFileWriter, + accessor::Accessor, blob_file::multi_writer::MultiWriter as BlobFileWriter, blob_file::scanner::Scanner as BlobFileScanner, blob_file::BlobFile, handle::ValueHandle, }; +pub type BlobFileMergeScanner = blob_file::merge::MergeScanner; + use crate::{ file_accessor::FileAccessor, + fs::FileSystem, vlog::blob_file::{Inner as BlobFileInner, Metadata}, Checksum, DescriptorTable, TreeId, }; @@ -22,13 +24,13 @@ use std::{ sync::{atomic::AtomicBool, Arc}, }; -pub fn recover_blob_files( +pub fn recover_blob_files( folder: &Path, ids: &[(BlobFileId, Checksum)], tree_id: TreeId, - descriptor_table: Option<&Arc>, -) -> crate::Result<(Vec, Vec)> { - if !folder.try_exists()? { + descriptor_table: Option<&Arc>>, +) -> crate::Result<(Vec>, Vec)> { + if !F::exists(folder)? { return Ok((vec![], vec![])); } @@ -45,8 +47,7 @@ pub fn recover_blob_files( let mut blob_files = Vec::with_capacity(ids.len()); let mut orphaned_blob_files = vec![]; - for (idx, dirent) in std::fs::read_dir(folder)?.enumerate() { - let dirent = dirent?; + for (idx, dirent) in F::read_dir(folder)?.into_iter().enumerate() { let file_name = dirent.file_name(); // https://en.wikipedia.org/wiki/.DS_Store @@ -60,7 +61,7 @@ pub fn recover_blob_files( } let blob_file_name = file_name.to_str().ok_or_else(|| { - log::error!("invalid table file name {}", file_name.display()); + log::error!("invalid table file name {}", file_name.to_string_lossy()); crate::Error::Unrecoverable })?; @@ -69,8 +70,8 @@ pub fn recover_blob_files( crate::Error::Unrecoverable })?; - let blob_file_path = dirent.path(); - assert!(!blob_file_path.is_dir()); + let blob_file_path = dirent.path().to_path_buf(); + assert!(!dirent.is_dir()); if let Some(&(_, checksum)) = ids.iter().find(|(id, _)| id == &blob_file_id) { log::trace!( @@ -78,7 +79,7 @@ pub fn recover_blob_files( blob_file_path.display(), ); - let file = std::fs::File::open(&blob_file_path)?; + let file = F::open(&blob_file_path)?; let meta = { let reader = sfa::Reader::new(&blob_file_path)?; @@ -108,6 +109,7 @@ pub fn recover_blob_files( blob_files.push(BlobFile(Arc::new(BlobFileInner { id: blob_file_id, path: blob_file_path, + phantom: std::marker::PhantomData, meta, is_deleted: AtomicBool::new(false), checksum, @@ -143,7 +145,12 @@ mod tests { #[test] fn vlog_recovery_missing_blob_file() { assert!(matches!( - recover_blob_files(Path::new("."), &[(0, Checksum::from_raw(0))], 0, None), + recover_blob_files::( + Path::new("."), + &[(0, Checksum::from_raw(0))], + 0, + None, + ), Err(crate::Error::Unrecoverable), )); } diff --git a/tests/tree_disjoint_iter.rs b/tests/tree_disjoint_iter.rs index a53a3afad..e02aa25d3 100644 --- a/tests/tree_disjoint_iter.rs +++ b/tests/tree_disjoint_iter.rs @@ -15,7 +15,7 @@ macro_rules! iter_closed { fn tree_disjoint_iter() -> lsm_tree::Result<()> { let folder = get_tmp_folder(); - let tree = crate::Config::new( + let tree = Config::new( &folder, SequenceNumberCounter::default(), SequenceNumberCounter::default(), diff --git a/tests/tree_disjoint_prefix.rs b/tests/tree_disjoint_prefix.rs index fb1907b49..eb800f5ec 100644 --- a/tests/tree_disjoint_prefix.rs +++ b/tests/tree_disjoint_prefix.rs @@ -15,7 +15,7 @@ macro_rules! iter_closed { fn tree_disjoint_prefix() -> lsm_tree::Result<()> { let folder = get_tmp_folder(); - let tree = crate::Config::new( + let tree = Config::new( &folder, SequenceNumberCounter::default(), SequenceNumberCounter::default(), diff --git a/tests/tree_disjoint_range.rs b/tests/tree_disjoint_range.rs index d2b81c086..8500539b7 100644 --- a/tests/tree_disjoint_range.rs +++ b/tests/tree_disjoint_range.rs @@ -15,7 +15,7 @@ macro_rules! iter_closed { fn tree_disjoint_range() -> lsm_tree::Result<()> { let folder = get_tmp_folder(); - let tree = crate::Config::new( + let tree = Config::new( &folder, SequenceNumberCounter::default(), SequenceNumberCounter::default(), diff --git a/tests/tree_drop_range.rs b/tests/tree_drop_range.rs index 02cdd75de..49b8a1d62 100644 --- a/tests/tree_drop_range.rs +++ b/tests/tree_drop_range.rs @@ -1,7 +1,7 @@ use lsm_tree::{get_tmp_folder, AbstractTree, AnyTree, Config, SeqNo, SequenceNumberCounter}; use std::ops::Bound::{Excluded, Included, Unbounded}; -fn populate_tables(tree: &AnyTree) -> lsm_tree::Result<()> { +fn populate_tables(tree: &AnyTree) -> lsm_tree::Result<()> { for key in 'a'..='e' { tree.insert([key as u8], "", 0); tree.flush_active_memtable(0)?;