diff --git a/CHANGELOG.md b/CHANGELOG.md index 10163fb8b7..cb13d1214b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## 0.24.0 (TBD) + +- Extracted `SmtStorageReader` trait from `SmtStorage`, allowing `LargeSmt` to work with read-only storage backends ([#958](https://github.com/0xMiden/crypto/pull/958)). + ## 0.23.0 (2026-03-11) - Replaced `Subtree` internal storage with bitmask layout ([#784](https://github.com/0xMiden/crypto/pull/784)). diff --git a/Cargo.lock b/Cargo.lock index 8096f0328a..f0f7c70f1f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1011,7 +1011,7 @@ dependencies = [ "p3-symmetric", "p3-util", "proptest", - "rand 0.9.2", + "rand 0.9.3", "rand_chacha", "rand_core 0.9.5", "rand_hc", @@ -1048,7 +1048,7 @@ dependencies = [ "p3-goldilocks", "paste", "proptest", - "rand 0.10.0", + "rand 0.10.1", "rstest", "serde", "subtle", @@ -1261,7 +1261,7 @@ dependencies = [ "p3-maybe-rayon", "p3-util", "paste", - "rand 0.10.0", + "rand 0.10.1", "serde", "tracing", ] @@ -1282,7 +1282,7 @@ dependencies = [ "p3-symmetric", "p3-util", "paste", - "rand 0.10.0", + "rand 0.10.1", "serde", ] @@ -1307,7 +1307,7 @@ dependencies = [ "p3-field", "p3-maybe-rayon", "p3-util", - "rand 0.10.0", + "rand 0.10.1", "serde", "tracing", ] @@ -1331,7 +1331,7 @@ dependencies = [ "p3-field", "p3-symmetric", "p3-util", - "rand 0.10.0", + "rand 0.10.1", ] [[package]] @@ -1362,7 +1362,7 @@ dependencies = [ "p3-miden-lmcs", "p3-miden-transcript", "p3-util", - "rand 0.10.0", + "rand 0.10.1", "thiserror", "tracing", ] @@ -1402,7 +1402,7 @@ dependencies = [ "p3-miden-transcript", "p3-symmetric", "p3-util", - "rand 0.10.0", + "rand 0.10.1", "serde", "thiserror", "tracing", @@ -1448,7 +1448,7 @@ dependencies = [ "p3-symmetric", "p3-util", "paste", - "rand 0.10.0", + "rand 0.10.1", "serde", "spin 0.10.0", "tracing", @@ -1462,7 +1462,7 @@ checksum = "ffe427e925ad0e85fd0e36ba53a3ab162dbeadc8507c31b7a513531df42d73e9" dependencies = [ "p3-field", "p3-symmetric", - "rand 0.10.0", + "rand 0.10.1", ] [[package]] @@ -1475,7 +1475,7 @@ dependencies = [ "p3-mds", "p3-symmetric", "p3-util", - "rand 0.10.0", + "rand 0.10.1", ] [[package]] @@ -1613,7 +1613,7 @@ checksum = "37566cb3fdacef14c0737f9546df7cfeadbfbc9fef10991038bf5015d0c80532" dependencies = [ "bitflags", "num-traits", - "rand 0.9.2", + "rand 0.9.3", "rand_chacha", "rand_xorshift", "regex-syntax", @@ -1643,9 +1643,9 @@ checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" [[package]] name = "rand" -version = "0.9.2" +version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" +checksum = "7ec095654a25171c2124e9e3393a930bddbffdc939556c914957a4c3e0a87166" dependencies = [ "rand_chacha", "rand_core 0.9.5", @@ -1653,9 +1653,9 @@ dependencies = [ [[package]] name = "rand" -version = "0.10.0" +version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc266eb313df6c5c09c1c7b1fbe2510961e5bcd3add930c1e31f7ed9da0feff8" +checksum = "d2e8e8bcc7961af1fdac401278c6a831614941f6164ee3bf4ce61b7edb162207" dependencies = [ "rand_core 0.10.0", ] @@ -2368,7 +2368,7 @@ version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a4ff3b651754a7bd216f959764d0a5ab6f4b551c9a3a08fb9ccecbed594b614a" dependencies = [ - "rand 0.9.2", + "rand 0.9.3", "winter-utils", ] diff --git a/deny.toml b/deny.toml index ec25b7f29e..6943ad7899 100644 --- a/deny.toml +++ b/deny.toml @@ -42,8 +42,9 @@ skip-tree = [ # miden-crypto uses rand 0.9 while Plonky3 0.5 uses rand 0.10, # causing duplicate rand / rand_core / getrandom trees { name = "getrandom", version = "=0.2.17" }, - { name = "rand", version = "=0.9.2" }, + { name = "rand", version = "=0.9.3" }, { name = "rand_core", version = "=0.6.4" }, + { name = "rand_core", version = "=0.9.5" }, ] wildcards = "allow" diff --git a/miden-crypto/src/merkle/smt/large/construction.rs b/miden-crypto/src/merkle/smt/large/construction.rs index f2282efc2a..88f1ca618c 100644 --- a/miden-crypto/src/merkle/smt/large/construction.rs +++ b/miden-crypto/src/merkle/smt/large/construction.rs @@ -5,7 +5,7 @@ use p3_maybe_rayon::prelude::*; use super::{ CONSTRUCTION_SUBTREE_BATCH_SIZE, IN_MEMORY_DEPTH, LargeSmt, LargeSmtError, NUM_IN_MEMORY_NODES, - ROOT_MEMORY_INDEX, SMT_DEPTH, SmtStorage, StorageError, Subtree, + ROOT_MEMORY_INDEX, SMT_DEPTH, SmtStorage, SmtStorageReader, StorageError, Subtree, }; use crate::{ EMPTY_WORD, Word, @@ -22,7 +22,7 @@ use crate::{ // CONSTRUCTION // ================================================================================================ -impl LargeSmt { +impl LargeSmt { /// Creates a new empty [LargeSmt] backed by the provided storage. /// /// This method is intended for creating a fresh tree with empty storage. If the storage @@ -113,35 +113,6 @@ impl LargeSmt { Ok(smt) } - /// Returns a new [Smt] instantiated with leaves set as specified by the provided entries. - /// - /// If the `concurrent` feature is enabled, this function uses a parallel implementation to - /// process the entries efficiently, otherwise it defaults to the sequential implementation. - /// - /// All leaves omitted from the entries list are set to [Self::EMPTY_VALUE]. - /// - /// # Errors - /// Returns an error if the provided entries contain multiple values for the same key. - pub fn with_entries( - storage: S, - entries: impl IntoIterator, - ) -> Result { - let entries: Vec<(Word, Word)> = entries.into_iter().collect(); - - if storage.has_leaves()? { - return Err(StorageError::Unsupported( - "Cannot create SMT with non-empty storage".into(), - ) - .into()); - } - let mut tree = LargeSmt::new(storage)?; - if entries.is_empty() { - return Ok(tree); - } - tree.build_subtrees(entries)?; - Ok(tree) - } - /// Internal method that initializes the in-memory tree from storage. /// /// For empty storage, returns an empty tree. For non-empty storage, @@ -225,6 +196,37 @@ impl LargeSmt { entry_count, }) } +} + +impl LargeSmt { + /// Returns a new [Smt] instantiated with leaves set as specified by the provided entries. + /// + /// If the `concurrent` feature is enabled, this function uses a parallel implementation to + /// process the entries efficiently, otherwise it defaults to the sequential implementation. + /// + /// All leaves omitted from the entries list are set to [Self::EMPTY_VALUE]. + /// + /// # Errors + /// Returns an error if the provided entries contain multiple values for the same key. + pub fn with_entries( + storage: S, + entries: impl IntoIterator, + ) -> Result { + let entries: Vec<(Word, Word)> = entries.into_iter().collect(); + + if storage.has_leaves()? { + return Err(StorageError::Unsupported( + "Cannot create SMT with non-empty storage".into(), + ) + .into()); + } + let mut tree = LargeSmt::new(storage)?; + if entries.is_empty() { + return Ok(tree); + } + tree.build_subtrees(entries)?; + Ok(tree) + } fn build_subtrees(&mut self, mut entries: Vec<(Word, Word)>) -> Result<(), MerkleError> { entries.par_sort_unstable_by_key(|item| { diff --git a/miden-crypto/src/merkle/smt/large/iter.rs b/miden-crypto/src/merkle/smt/large/iter.rs index f1e36a3a9f..4af4a81a48 100644 --- a/miden-crypto/src/merkle/smt/large/iter.rs +++ b/miden-crypto/src/merkle/smt/large/iter.rs @@ -1,6 +1,6 @@ use alloc::{boxed::Box, vec::Vec}; -use super::{IN_MEMORY_DEPTH, LargeSmt, SmtStorage, is_empty_parent}; +use super::{IN_MEMORY_DEPTH, LargeSmt, SmtStorageReader, is_empty_parent}; use crate::{ Word, hash::poseidon2::Poseidon2, @@ -22,12 +22,12 @@ enum InnerNodeIteratorState<'a> { Done, } -pub struct LargeSmtInnerNodeIterator<'a, S: SmtStorage> { +pub struct LargeSmtInnerNodeIterator<'a, S: SmtStorageReader> { large_smt: &'a LargeSmt, state: InnerNodeIteratorState<'a>, } -impl<'a, S: SmtStorage> LargeSmtInnerNodeIterator<'a, S> { +impl<'a, S: SmtStorageReader> LargeSmtInnerNodeIterator<'a, S> { pub(super) fn new(large_smt: &'a LargeSmt) -> Self { // in-memory nodes should never be empty Self { @@ -40,7 +40,7 @@ impl<'a, S: SmtStorage> LargeSmtInnerNodeIterator<'a, S> { } } -impl Iterator for LargeSmtInnerNodeIterator<'_, S> { +impl Iterator for LargeSmtInnerNodeIterator<'_, S> { type Item = InnerNodeInfo; /// Returns the next inner node info in the tree. diff --git a/miden-crypto/src/merkle/smt/large/mod.rs b/miden-crypto/src/merkle/smt/large/mod.rs index 6ce71ee4f7..c03360adee 100644 --- a/miden-crypto/src/merkle/smt/large/mod.rs +++ b/miden-crypto/src/merkle/smt/large/mod.rs @@ -176,9 +176,12 @@ use alloc::vec::Vec; use super::{ EmptySubtreeRoots, InnerNode, InnerNodeInfo, InnerNodes, LeafIndex, MerkleError, NodeIndex, - NodeMutation, SMT_DEPTH, SmtLeaf, SmtProof, SparseMerkleTree, Word, + NodeMutation, SMT_DEPTH, SmtLeaf, SmtProof, SparseMerklePath, SparseMerkleTree, Word, +}; +use crate::{ + EMPTY_WORD, + merkle::smt::{Map, full::concurrent::MutatedSubtreeLeaves}, }; -use crate::merkle::smt::{Map, full::concurrent::MutatedSubtreeLeaves}; mod error; pub use error::LargeSmtError; @@ -193,7 +196,8 @@ pub use subtree::{Subtree, SubtreeError}; mod storage; pub use storage::{ - MemoryStorage, SmtStorage, StorageError, StorageUpdateParts, StorageUpdates, SubtreeUpdate, + MemoryStorage, SmtStorage, SmtStorageReader, StorageError, StorageUpdateParts, StorageUpdates, + SubtreeUpdate, }; #[cfg(feature = "rocksdb")] pub use storage::{RocksDbConfig, RocksDbStorage}; @@ -271,8 +275,8 @@ type MutatedLeaves = (MutatedSubtreeLeaves, Map, Map, /// in-memory nodes while continuing to share the storage backend, which is misleading. If you need /// to share an instance between threads or components, wrap it in an /// [`Arc`](alloc::sync::Arc) explicitly so the ownership semantics are clear. -#[derive(Debug)] -pub struct LargeSmt { +#[derive(Clone, Debug)] +pub struct LargeSmt { storage: S, /// Flat vector representation of in-memory nodes. /// Index 0 is unused; index 1 is root. @@ -286,11 +290,14 @@ pub struct LargeSmt { entry_count: usize, } -impl LargeSmt { +impl LargeSmt { // CONSTANTS // -------------------------------------------------------------------------------------------- /// The default value used to compute the hash of empty leaves. - pub const EMPTY_VALUE: Word = >::EMPTY_VALUE; + pub const EMPTY_VALUE: Word = EMPTY_WORD; + + /// The root of an empty tree. + pub const EMPTY_ROOT: Word = *EmptySubtreeRoots::entry(SMT_DEPTH, 0); /// Subtree depths for the subtrees stored in storage. pub const SUBTREE_DEPTHS: [u8; 5] = [56, 48, 40, 32, 24]; @@ -326,18 +333,78 @@ impl LargeSmt { /// Returns the leaf to which `key` maps pub fn get_leaf(&self, key: &Word) -> SmtLeaf { - >::get_leaf(self, key) + let leaf_pos = LeafIndex::::from(*key).position(); + match self.storage.get_leaf(leaf_pos) { + Ok(Some(leaf)) => leaf, + Ok(None) => SmtLeaf::new_empty((*key).into()), + Err(_) => { + panic!("Storage error during get_leaf"); + }, + } } /// Returns the value associated with `key` pub fn get_value(&self, key: &Word) -> Word { - >::get_value(self, key) + let leaf_pos = LeafIndex::::from(*key); + match self.storage.get_leaf(leaf_pos.position()) { + Ok(Some(leaf)) => leaf.get_value(key).unwrap_or_default(), + Ok(None) => EMPTY_WORD, + Err(_) => { + panic!("Storage error during get_value"); + }, + } } /// Returns an opening of the leaf associated with `key`. Conceptually, an opening is a Merkle /// path to the leaf, as well as the leaf itself. pub fn open(&self, key: &Word) -> SmtProof { - >::open(self, key) + let leaf_pos = LeafIndex::::from(*key); + + let mut idx: NodeIndex = leaf_pos.into(); + + let subtree_roots: Vec = (0..NUM_SUBTREE_LEVELS) + .scan(idx.parent(), |cursor, _| { + let subtree_root = Subtree::find_subtree_root(*cursor); + *cursor = subtree_root.parent(); + Some(subtree_root) + }) + .collect(); + + let (leaf_opt, subtree_opts) = self + .storage + .get_leaf_and_subtrees(leaf_pos.position(), &subtree_roots) + .expect("Fetching leaf and subtrees succeeds"); + + let leaf = leaf_opt.unwrap_or_else(|| SmtLeaf::new_empty((*key).into())); + + let mut cache = Map::::new(); + for (&root, subtree_opt) in subtree_roots.iter().zip(subtree_opts) { + let subtree = subtree_opt.unwrap_or_else(|| Subtree::new(root)); + cache.insert(root, subtree); + } + let mut path = Vec::with_capacity(idx.depth() as usize); + while idx.depth() > 0 { + let is_right = idx.is_position_odd(); + idx = idx.parent(); + + let sibling_hash = if idx.depth() < IN_MEMORY_DEPTH { + let InnerNode { left, right } = self.get_inner_node(idx); + if is_right { left } else { right } + } else { + let root = Subtree::find_subtree_root(idx); + let subtree = &cache[&root]; + let InnerNode { left, right } = subtree + .get_inner_node(idx) + .unwrap_or_else(|| EmptySubtreeRoots::get_inner_node(SMT_DEPTH, idx.depth())); + if is_right { left } else { right } + }; + + path.push(sibling_hash); + } + + let merkle_path = + SparseMerklePath::from_sized_iter(path).expect("failed to convert to SparseMerklePath"); + SmtProof::new_unchecked(merkle_path, leaf) } /// Returns a boolean value indicating whether the SMT is empty. @@ -388,25 +455,27 @@ impl LargeSmt { Ok(LargeSmtInnerNodeIterator::new(self)) } - // STATE MUTATORS + // HELPERS // -------------------------------------------------------------------------------------------- - /// Inserts a value at the specified key, returning the previous value associated with that key. - /// Recall that by definition, any key that hasn't been updated is associated with - /// [`Self::EMPTY_VALUE`]. - /// - /// This also recomputes all hashes between the leaf (associated with the key) and the root, - /// updating the root itself. + /// Returns the inner node at the given index. /// - /// # Errors - /// Returns an error if inserting the key-value pair would exceed - /// [`MAX_LEAF_ENTRIES`](super::MAX_LEAF_ENTRIES) (1024 entries) in the leaf. - pub fn insert(&mut self, key: Word, value: Word) -> Result { - >::insert(self, key, value) - } + /// For in-memory depths (< 24), reads from the flat in-memory array. + /// For deeper nodes, reads from storage. + pub(crate) fn get_inner_node(&self, index: NodeIndex) -> InnerNode { + if index.depth() < IN_MEMORY_DEPTH { + let memory_index = to_memory_index(&index); + return InnerNode { + left: self.in_memory_nodes[memory_index * 2], + right: self.in_memory_nodes[memory_index * 2 + 1], + }; + } - // HELPERS - // -------------------------------------------------------------------------------------------- + self.storage + .get_inner_node(index) + .expect("Failed to get inner node") + .unwrap_or_else(|| EmptySubtreeRoots::get_inner_node(SMT_DEPTH, index.depth())) + } /// Helper to get an in-memory node if not empty. /// @@ -438,6 +507,25 @@ impl LargeSmt { } } +impl LargeSmt { + // STATE MUTATORS + // -------------------------------------------------------------------------------------------- + + /// Inserts a value at the specified key, returning the previous value associated with that key. + /// Recall that by definition, any key that hasn't been updated is associated with + /// [`Self::EMPTY_VALUE`]. + /// + /// This also recomputes all hashes between the leaf (associated with the key) and the root, + /// updating the root itself. + /// + /// # Errors + /// Returns an error if inserting the key-value pair would exceed + /// [`MAX_LEAF_ENTRIES`](super::MAX_LEAF_ENTRIES) (1024 entries) in the leaf. + pub fn insert(&mut self, key: Word, value: Word) -> Result { + >::insert(self, key, value) + } +} + // HELPERS // ================================================================================================ @@ -457,7 +545,7 @@ pub(super) fn to_memory_index(index: &NodeIndex) -> usize { (1usize << index.depth()) + index.position() as usize } -impl PartialEq for LargeSmt { +impl PartialEq for LargeSmt { /// Compares two LargeSmt instances based on their root hash and metadata. /// /// Note: This comparison only checks the root hash and counts, not the underlying @@ -470,4 +558,4 @@ impl PartialEq for LargeSmt { } } -impl Eq for LargeSmt {} +impl Eq for LargeSmt {} diff --git a/miden-crypto/src/merkle/smt/large/storage/memory.rs b/miden-crypto/src/merkle/smt/large/storage/memory.rs index 51d4a90174..028d75554f 100644 --- a/miden-crypto/src/merkle/smt/large/storage/memory.rs +++ b/miden-crypto/src/merkle/smt/large/storage/memory.rs @@ -1,6 +1,8 @@ use alloc::{boxed::Box, vec::Vec}; -use super::{SmtStorage, StorageError, StorageUpdateParts, StorageUpdates, SubtreeUpdate}; +use super::{ + SmtStorage, SmtStorageReader, StorageError, StorageUpdateParts, StorageUpdates, SubtreeUpdate, +}; use crate::{ EMPTY_WORD, Map, MapEntry, Word, merkle::{ @@ -43,7 +45,7 @@ impl Default for MemoryStorage { } } -impl SmtStorage for MemoryStorage { +impl SmtStorageReader for MemoryStorage { /// Gets the total number of non-empty leaves currently stored. fn leaf_count(&self) -> Result { Ok(self.leaves.len()) @@ -54,6 +56,84 @@ impl SmtStorage for MemoryStorage { Ok(self.leaves.values().map(|leaf| leaf.num_entries()).sum()) } + /// Retrieves a single leaf node. + fn get_leaf(&self, index: u64) -> Result, StorageError> { + Ok(self.leaves.get(&index).cloned()) + } + + /// Retrieves multiple leaf nodes. Returns Ok(None) for indices not found. + fn get_leaves(&self, indices: &[u64]) -> Result>, StorageError> { + let leaves = indices.iter().map(|idx| self.leaves.get(idx).cloned()).collect(); + Ok(leaves) + } + + /// Returns true if the storage has any leaves. + fn has_leaves(&self) -> Result { + Ok(!self.leaves.is_empty()) + } + + /// Retrieves a single Subtree (representing deep nodes) by its root NodeIndex. + /// Assumes index.depth() >= IN_MEMORY_DEPTH. Returns Ok(None) if not found. + fn get_subtree(&self, index: NodeIndex) -> Result, StorageError> { + Ok(self.subtrees.get(&index).cloned()) + } + + /// Retrieves multiple Subtrees. + /// Assumes index.depth() >= IN_MEMORY_DEPTH for all indices. Returns Ok(None) for indices not + /// found. + fn get_subtrees(&self, indices: &[NodeIndex]) -> Result>, StorageError> { + let subtrees: Vec<_> = indices.iter().map(|idx| self.subtrees.get(idx).cloned()).collect(); + Ok(subtrees) + } + + /// Retrieves a single inner node from a Subtree. + /// + /// This function is intended for accessing nodes within a Subtree, meaning + /// `index.depth()` must be greater than or equal to `IN_MEMORY_DEPTH`. + /// + /// # Errors + /// - `StorageError::Unsupported`: If `index.depth() < IN_MEMORY_DEPTH`. + /// + /// Returns `Ok(None)` if the subtree or the specific inner node within it is not found. + fn get_inner_node(&self, index: NodeIndex) -> Result, StorageError> { + if index.depth() < IN_MEMORY_DEPTH { + return Err(StorageError::Unsupported( + "Cannot get inner node from upper part of the tree".into(), + )); + } + let subtree_root_index = Subtree::find_subtree_root(index); + Ok(self + .subtrees + .get(&subtree_root_index) + .and_then(|subtree| subtree.get_inner_node(index))) + } + + /// Returns an iterator over all (index, SmtLeaf) pairs in the storage. + /// + /// The iterator provides access to the current state of the leaves. + fn iter_leaves(&self) -> Result + '_>, StorageError> { + let leaves_vec = self.leaves.iter().map(|(&k, v)| (k, v.clone())).collect::>(); + Ok(Box::new(leaves_vec.into_iter())) + } + + /// Returns an iterator over all Subtrees in the storage. + /// + /// The iterator provides access to the current subtrees from storage. + fn iter_subtrees(&self) -> Result + '_>, StorageError> { + let subtrees_vec = self.subtrees.values().cloned().collect::>(); + Ok(Box::new(subtrees_vec.into_iter())) + } + + /// Retrieves all depth 24 roots for fast tree rebuilding. + /// + /// For MemoryStorage, this returns an empty vector since all data is already in memory + /// and there's no startup performance benefit to caching depth 24 roots. + fn get_depth24(&self) -> Result, StorageError> { + Ok(Vec::new()) + } +} + +impl SmtStorage for MemoryStorage { /// Inserts a key-value pair into the leaf at the given index. /// /// - If the leaf at `index` does not exist, a new `SmtLeaf::Single` is created. @@ -102,11 +182,6 @@ impl SmtStorage for MemoryStorage { Ok(old_value) } - /// Retrieves a single leaf node. - fn get_leaf(&self, index: u64) -> Result, StorageError> { - Ok(self.leaves.get(&index).cloned()) - } - /// Sets multiple leaf nodes in storage. /// /// If a leaf at a given index already exists, it is overwritten. @@ -120,31 +195,6 @@ impl SmtStorage for MemoryStorage { Ok(self.leaves.remove(&index)) } - /// Retrieves multiple leaf nodes. Returns Ok(None) for indices not found. - fn get_leaves(&self, indices: &[u64]) -> Result>, StorageError> { - let leaves = indices.iter().map(|idx| self.leaves.get(idx).cloned()).collect(); - Ok(leaves) - } - - /// Returns true if the storage has any leaves. - fn has_leaves(&self) -> Result { - Ok(!self.leaves.is_empty()) - } - - /// Retrieves a single Subtree (representing deep nodes) by its root NodeIndex. - /// Assumes index.depth() >= IN_MEMORY_DEPTH. Returns Ok(None) if not found. - fn get_subtree(&self, index: NodeIndex) -> Result, StorageError> { - Ok(self.subtrees.get(&index).cloned()) - } - - /// Retrieves multiple Subtrees. - /// Assumes index.depth() >= IN_MEMORY_DEPTH for all indices. Returns Ok(None) for indices not - /// found. - fn get_subtrees(&self, indices: &[NodeIndex]) -> Result>, StorageError> { - let subtrees: Vec<_> = indices.iter().map(|idx| self.subtrees.get(idx).cloned()).collect(); - Ok(subtrees) - } - /// Sets a single Subtree (representing deep nodes) by its root NodeIndex. /// /// If a subtree with the same root NodeIndex already exists, it is overwritten. @@ -170,28 +220,6 @@ impl SmtStorage for MemoryStorage { Ok(()) } - /// Retrieves a single inner node from a Subtree. - /// - /// This function is intended for accessing nodes within a Subtree, meaning - /// `index.depth()` must be greater than or equal to `IN_MEMORY_DEPTH`. - /// - /// # Errors - /// - `StorageError::Unsupported`: If `index.depth() < IN_MEMORY_DEPTH`. - /// - /// Returns `Ok(None)` if the subtree or the specific inner node within it is not found. - fn get_inner_node(&self, index: NodeIndex) -> Result, StorageError> { - if index.depth() < IN_MEMORY_DEPTH { - return Err(StorageError::Unsupported( - "Cannot get inner node from upper part of the tree".into(), - )); - } - let subtree_root_index = Subtree::find_subtree_root(index); - Ok(self - .subtrees - .get(&subtree_root_index) - .and_then(|subtree| subtree.get_inner_node(index))) - } - /// Sets a single inner node within a Subtree. /// /// - `index.depth()` must be greater than or equal to `IN_MEMORY_DEPTH`. @@ -283,28 +311,4 @@ impl SmtStorage for MemoryStorage { } Ok(()) } - - /// Returns an iterator over all (index, SmtLeaf) pairs in the storage. - /// - /// The iterator provides access to the current state of the leaves. - fn iter_leaves(&self) -> Result + '_>, StorageError> { - let leaves_vec = self.leaves.iter().map(|(&k, v)| (k, v.clone())).collect::>(); - Ok(Box::new(leaves_vec.into_iter())) - } - - /// Returns an iterator over all Subtrees in the storage. - /// - /// The iterator provides access to the current subtrees from storage. - fn iter_subtrees(&self) -> Result + '_>, StorageError> { - let subtrees_vec = self.subtrees.values().cloned().collect::>(); - Ok(Box::new(subtrees_vec.into_iter())) - } - - /// Retrieves all depth 24 roots for fast tree rebuilding. - /// - /// For MemoryStorage, this returns an empty vector since all data is already in memory - /// and there's no startup performance benefit to caching depth 24 roots. - fn get_depth24(&self) -> Result, StorageError> { - Ok(Vec::new()) - } } diff --git a/miden-crypto/src/merkle/smt/large/storage/mod.rs b/miden-crypto/src/merkle/smt/large/storage/mod.rs index 9cf058b083..3a255f35a5 100644 --- a/miden-crypto/src/merkle/smt/large/storage/mod.rs +++ b/miden-crypto/src/merkle/smt/large/storage/mod.rs @@ -26,16 +26,15 @@ pub use memory::MemoryStorage; mod updates; pub use updates::{StorageUpdateParts, StorageUpdates, SubtreeUpdate}; -/// Sparse Merkle Tree storage backend. +/// Read-only operations for the Sparse Merkle Tree storage backend. /// -/// This trait outlines the fundamental operations required to persist and retrieve -/// the components of an SMT: leaves and deeper subtrees. -/// Implementations of this trait can provide various storage solutions, like in-memory -/// maps or persistent databases (e.g., RocksDB). +/// This trait outlines the operations required to retrieve the components of an SMT: leaves and +/// deeper subtrees. Implementations of this trait can provide various storage solutions, like +/// in-memory maps or persistent databases (e.g., RocksDB). /// /// All methods are expected to handle potential storage errors by returning a /// `Result<_, StorageError>`. -pub trait SmtStorage: 'static + fmt::Debug + Send + Sync { +pub trait SmtStorageReader: 'static + fmt::Debug + Send + Sync { /// Retrieves the total number of leaf nodes currently stored. /// /// # Errors @@ -48,6 +47,99 @@ pub trait SmtStorage: 'static + fmt::Debug + Send + Sync { /// Returns `StorageError` if the storage read operation fails. fn entry_count(&self) -> Result; + /// Retrieves a single SMT leaf node by its logical `index`. + /// Returns `Ok(None)` if no leaf exists at the given `index`. + fn get_leaf(&self, index: u64) -> Result, StorageError>; + + /// Retrieves multiple SMT leaf nodes by their logical `indices`. + /// + /// The returned `Vec` will have the same length as the input `indices` slice. + /// For each `index` in the input, the corresponding element in the output `Vec` + /// will be `Some(SmtLeaf)` if found, or `None` if not found. + fn get_leaves(&self, indices: &[u64]) -> Result>, StorageError>; + + /// Returns true if the storage has any leaves. + /// + /// # Errors + /// Returns `StorageError` if the storage read operation fails. + fn has_leaves(&self) -> Result; + + /// Retrieves a single SMT Subtree by its root `NodeIndex`. + /// + /// Subtrees typically represent deeper, compacted parts of the SMT. + /// Returns `Ok(None)` if no subtree is found for the given `index`. + fn get_subtree(&self, index: NodeIndex) -> Result, StorageError>; + + /// Retrieves multiple Subtrees by their root `NodeIndex` values. + /// + /// The returned `Vec` will have the same length as the input `indices` slice. + /// For each `index` in the input, the corresponding element in the output `Vec` + /// will be `Some(Subtree)` if found, or `None` if not found. + fn get_subtrees(&self, indices: &[NodeIndex]) -> Result>, StorageError>; + + /// Retrieves a single leaf and multiple subtrees in one call. + /// + /// The default implementation delegates to [`Self::get_leaf`] and [`Self::get_subtree`]. + /// Backends can override this with a more-optimized implementation if one is available. This + /// default implementation does not employ parallelism, and hence may be slower than separately + /// issuing [`Self::get_leaf`] and [`Self::get_subtrees`] for large numbers of subtrees. + /// + /// # Errors + /// + /// - [`StorageError::Backend`] if the backing storage cannot be accessed during the query. + fn get_leaf_and_subtrees( + &self, + leaf_index: u64, + subtree_indices: &[NodeIndex], + ) -> Result<(Option, Vec>), StorageError> { + let leaf = self.get_leaf(leaf_index)?; + + // We explicitly do NOT want to delegate to `get_subtrees` here as it can be a very heavy + // hammer. We instead use the simplest solution that has no potential for unpredictable + // performance, even if it is slower for large numbers of subtrees. + let subtrees = subtree_indices + .iter() + .map(|&idx| self.get_subtree(idx)) + .collect::, _>>()?; + Ok((leaf, subtrees)) + } + + /// Retrieves a single inner node from within a Subtree. + /// + /// This method is intended for accessing nodes at depths greater than the in-memory horizon. + /// Returns `Ok(None)` if the containing Subtree or the specific inner node is not found. + fn get_inner_node(&self, index: NodeIndex) -> Result, StorageError>; + + /// Returns an iterator over all (logical_index, SmtLeaf) pairs currently in storage. + /// + /// The order of iteration is not guaranteed unless specified by the implementation. + fn iter_leaves(&self) -> Result + '_>, StorageError>; + + /// Returns an iterator over all `Subtree` instances currently in storage. + /// + /// The order of iteration is not guaranteed unless specified by the implementation. + fn iter_subtrees(&self) -> Result + '_>, StorageError>; + + /// Retrieves all depth 24 hashes from storage for efficient startup reconstruction. + /// + /// Returns a vector of `(node_index_value, InnerNode)` tuples representing + /// the cached roots of nodes at depth 24 (the in-memory/storage boundary). + /// These roots enable fast reconstruction of the upper tree without loading + /// entire subtrees. + /// + /// The hash cache is automatically maintained by subtree operations - no manual + /// cache management is required. + fn get_depth24(&self) -> Result, StorageError>; +} + +/// Sparse Merkle Tree storage backend with full read and write capabilities. +/// +/// This trait extends [`SmtStorageReader`] with mutation operations required to persist changes +/// to the SMT. +/// +/// All methods are expected to handle potential storage errors by returning a +/// `Result<_, StorageError>`. +pub trait SmtStorage: SmtStorageReader { /// Inserts a key-value pair into the SMT leaf at the specified logical `index`. /// /// - If the leaf at `index` does not exist, it may be created. @@ -89,10 +181,6 @@ pub trait SmtStorage: 'static + fmt::Debug + Send + Sync { /// write permission issues, serialization failures). fn remove_value(&mut self, index: u64, key: Word) -> Result, StorageError>; - /// Retrieves a single SMT leaf node by its logical `index`. - /// Returns `Ok(None)` if no leaf exists at the given `index`. - fn get_leaf(&self, index: u64) -> Result, StorageError>; - /// Sets or updates multiple SMT leaf nodes in storage. /// /// For each entry in the `leaves` map, if a leaf at the given index already exists, @@ -116,59 +204,6 @@ pub trait SmtStorage: 'static + fmt::Debug + Send + Sync { /// the overall leaf and entry counts. fn remove_leaf(&mut self, index: u64) -> Result, StorageError>; - /// Retrieves multiple SMT leaf nodes by their logical `indices`. - /// - /// The returned `Vec` will have the same length as the input `indices` slice. - /// For each `index` in the input, the corresponding element in the output `Vec` - /// will be `Some(SmtLeaf)` if found, or `None` if not found. - fn get_leaves(&self, indices: &[u64]) -> Result>, StorageError>; - - /// Returns true if the storage has any leaves. - /// - /// # Errors - /// Returns `StorageError` if the storage read operation fails. - fn has_leaves(&self) -> Result; - - /// Retrieves a single SMT Subtree by its root `NodeIndex`. - /// - /// Subtrees typically represent deeper, compacted parts of the SMT. - /// Returns `Ok(None)` if no subtree is found for the given `index`. - fn get_subtree(&self, index: NodeIndex) -> Result, StorageError>; - - /// Retrieves multiple Subtrees by their root `NodeIndex` values. - /// - /// The returned `Vec` will have the same length as the input `indices` slice. - /// For each `index` in the input, the corresponding element in the output `Vec` - /// will be `Some(Subtree)` if found, or `None` if not found. - fn get_subtrees(&self, indices: &[NodeIndex]) -> Result>, StorageError>; - - /// Retrieves a single leaf and multiple subtrees in one call. - /// - /// The default implementation delegates to [`Self::get_leaf`] and [`Self::get_subtree`]. - /// Backends can override this with a more-optimized implementation if one is available. This - /// default implementation does not employ parallelism, and hence may be slower than separately - /// issuing [`Self::get_leaf`] and [`Self::get_subtrees`] for large numbers of subtrees. - /// - /// # Errors - /// - /// - [`StorageError::Backend`] if the backing storage cannot be accessed during the query. - fn get_leaf_and_subtrees( - &self, - leaf_index: u64, - subtree_indices: &[NodeIndex], - ) -> Result<(Option, Vec>), StorageError> { - let leaf = self.get_leaf(leaf_index)?; - - // We explicitly do NOT want to delegate to `get_subtrees` here as it can be a very heavy - // hammer. We instead use the simplest solution that has no potential for unpredictable - // performance, even if it is slower for large numbers of subtrees. - let subtrees = subtree_indices - .iter() - .map(|&idx| self.get_subtree(idx)) - .collect::, _>>()?; - Ok((leaf, subtrees)) - } - /// Sets or updates a single SMT Subtree in storage, identified by its root `NodeIndex`. /// /// If a subtree with the same root `NodeIndex` already exists, it is overwritten. @@ -185,12 +220,6 @@ pub trait SmtStorage: 'static + fmt::Debug + Send + Sync { /// Returns `Ok(())` on successful removal or if the subtree did not exist. fn remove_subtree(&mut self, index: NodeIndex) -> Result<(), StorageError>; - /// Retrieves a single inner node from within a Subtree. - /// - /// This method is intended for accessing nodes at depths greater than the in-memory horizon. - /// Returns `Ok(None)` if the containing Subtree or the specific inner node is not found. - fn get_inner_node(&self, index: NodeIndex) -> Result, StorageError>; - /// Sets or updates a single inner node (non-leaf node) within a Subtree. /// /// - If the target Subtree does not exist, it might need to be created by the implementation. @@ -216,30 +245,9 @@ pub trait SmtStorage: 'static + fmt::Debug + Send + Sync { /// If any part of the update fails, the entire transaction should be rolled back, leaving /// the storage in its previous state. fn apply(&mut self, updates: StorageUpdates) -> Result<(), StorageError>; - - /// Returns an iterator over all (logical_index, SmtLeaf) pairs currently in storage. - /// - /// The order of iteration is not guaranteed unless specified by the implementation. - fn iter_leaves(&self) -> Result + '_>, StorageError>; - - /// Returns an iterator over all `Subtree` instances currently in storage. - /// - /// The order of iteration is not guaranteed unless specified by the implementation. - fn iter_subtrees(&self) -> Result + '_>, StorageError>; - - /// Retrieves all depth 24 hashes from storage for efficient startup reconstruction. - /// - /// Returns a vector of `(node_index_value, InnerNode)` tuples representing - /// the cached roots of nodes at depth 24 (the in-memory/storage boundary). - /// These roots enable fast reconstruction of the upper tree without loading - /// entire subtrees. - /// - /// The hash cache is automatically maintained by subtree operations - no manual - /// cache management is required. - fn get_depth24(&self) -> Result, StorageError>; } -impl SmtStorage for Box { +impl SmtStorageReader for Box { #[inline] fn leaf_count(&self) -> Result { self.deref().leaf_count() @@ -250,36 +258,11 @@ impl SmtStorage for Box { self.deref().entry_count() } - #[inline] - fn insert_value( - &mut self, - index: u64, - key: Word, - value: Word, - ) -> Result, StorageError> { - self.deref_mut().insert_value(index, key, value) - } - - #[inline] - fn remove_value(&mut self, index: u64, key: Word) -> Result, StorageError> { - self.deref_mut().remove_value(index, key) - } - #[inline] fn get_leaf(&self, index: u64) -> Result, StorageError> { self.deref().get_leaf(index) } - #[inline] - fn set_leaves(&mut self, leaves: Map) -> Result<(), StorageError> { - self.deref_mut().set_leaves(leaves) - } - - #[inline] - fn remove_leaf(&mut self, index: u64) -> Result, StorageError> { - self.deref_mut().remove_leaf(index) - } - #[inline] fn get_leaves(&self, indices: &[u64]) -> Result>, StorageError> { self.deref().get_leaves(indices) @@ -309,6 +292,53 @@ impl SmtStorage for Box { self.deref().get_leaf_and_subtrees(leaf_index, subtree_indices) } + #[inline] + fn get_inner_node(&self, index: NodeIndex) -> Result, StorageError> { + self.deref().get_inner_node(index) + } + + #[inline] + fn iter_leaves(&self) -> Result + '_>, StorageError> { + self.deref().iter_leaves() + } + + #[inline] + fn iter_subtrees(&self) -> Result + '_>, StorageError> { + self.deref().iter_subtrees() + } + + #[inline] + fn get_depth24(&self) -> Result, StorageError> { + self.deref().get_depth24() + } +} + +impl SmtStorage for Box { + #[inline] + fn insert_value( + &mut self, + index: u64, + key: Word, + value: Word, + ) -> Result, StorageError> { + self.deref_mut().insert_value(index, key, value) + } + + #[inline] + fn remove_value(&mut self, index: u64, key: Word) -> Result, StorageError> { + self.deref_mut().remove_value(index, key) + } + + #[inline] + fn set_leaves(&mut self, leaves: Map) -> Result<(), StorageError> { + self.deref_mut().set_leaves(leaves) + } + + #[inline] + fn remove_leaf(&mut self, index: u64) -> Result, StorageError> { + self.deref_mut().remove_leaf(index) + } + #[inline] fn set_subtree(&mut self, subtree: &Subtree) -> Result<(), StorageError> { self.deref_mut().set_subtree(subtree) @@ -324,11 +354,6 @@ impl SmtStorage for Box { self.deref_mut().remove_subtree(index) } - #[inline] - fn get_inner_node(&self, index: NodeIndex) -> Result, StorageError> { - self.deref().get_inner_node(index) - } - #[inline] fn set_inner_node( &mut self, @@ -347,19 +372,4 @@ impl SmtStorage for Box { fn apply(&mut self, updates: StorageUpdates) -> Result<(), StorageError> { self.deref_mut().apply(updates) } - - #[inline] - fn iter_leaves(&self) -> Result + '_>, StorageError> { - self.deref().iter_leaves() - } - - #[inline] - fn iter_subtrees(&self) -> Result + '_>, StorageError> { - self.deref().iter_subtrees() - } - - #[inline] - fn get_depth24(&self) -> Result, StorageError> { - self.deref().get_depth24() - } } diff --git a/miden-crypto/src/merkle/smt/large/storage/rocksdb.rs b/miden-crypto/src/merkle/smt/large/storage/rocksdb.rs index 4a18673ff4..bdb8a5b16e 100644 --- a/miden-crypto/src/merkle/smt/large/storage/rocksdb.rs +++ b/miden-crypto/src/merkle/smt/large/storage/rocksdb.rs @@ -6,7 +6,9 @@ use rocksdb::{ DBIteratorWithThreadMode, FlushOptions, IteratorMode, Options, ReadOptions, WriteBatch, }; -use super::{SmtStorage, StorageError, StorageUpdateParts, StorageUpdates, SubtreeUpdate}; +use super::{ + SmtStorage, SmtStorageReader, StorageError, StorageUpdateParts, StorageUpdates, SubtreeUpdate, +}; use crate::{ EMPTY_WORD, Word, merkle::{ @@ -249,7 +251,7 @@ impl RocksDbStorage { } } -impl SmtStorage for RocksDbStorage { +impl SmtStorageReader for RocksDbStorage { /// Retrieves the total count of non-empty leaves from the `METADATA_CF` column family. /// Returns 0 if the count is not found. /// @@ -290,6 +292,248 @@ impl SmtStorage for RocksDbStorage { }) } + /// Retrieves a single SMT leaf node by its logical `index` from the `LEAVES_CF` column family. + /// + /// # Errors + /// - `StorageError::Backend`: If the leaves column family is missing or a RocksDB error occurs. + /// - `StorageError::DeserializationError`: If the retrieved leaf data is corrupt. + fn get_leaf(&self, index: u64) -> Result, StorageError> { + let cf = self.cf_handle(LEAVES_CF)?; + let key = Self::index_db_key(index); + match self.db.get_cf(cf, key)? { + Some(bytes) => { + let leaf = SmtLeaf::read_from_bytes_with_budget(&bytes, bytes.len())?; + Ok(Some(leaf)) + }, + None => Ok(None), + } + } + + /// Retrieves multiple SMT leaf nodes by their logical `indices` using RocksDB's `multi_get_cf`. + /// + /// # Errors + /// - `StorageError::Backend`: If the leaves column family is missing or a RocksDB error occurs. + /// - `StorageError::DeserializationError`: If any retrieved leaf data is corrupt. + fn get_leaves(&self, indices: &[u64]) -> Result>, StorageError> { + let cf = self.cf_handle(LEAVES_CF)?; + let db_keys: Vec<[u8; 8]> = indices.iter().map(|&idx| Self::index_db_key(idx)).collect(); + let results = self.db.multi_get_cf(db_keys.iter().map(|k| (cf, k.as_ref()))); + + results + .into_iter() + .map(|result| match result { + Ok(Some(bytes)) => { + Ok(Some(SmtLeaf::read_from_bytes_with_budget(&bytes, bytes.len())?)) + }, + Ok(None) => Ok(None), + Err(e) => Err(e.into()), + }) + .collect() + } + + /// Returns true if the storage has any leaves. + /// + /// # Errors + /// Returns `StorageError` if the storage read operation fails. + fn has_leaves(&self) -> Result { + Ok(self.leaf_count()? > 0) + } + + /// Batch-retrieves multiple subtrees from RocksDB by their node indices. + /// + /// This method groups requests by subtree depth into column family buckets, + /// then performs parallel `multi_get` operations to efficiently retrieve + /// all subtrees. Results are deserialized and placed in the same order as + /// the input indices. + /// + /// Note: Retrieval is performed in parallel. If multiple errors occur (e.g., + /// deserialization or backend errors), only the first one encountered is returned. + /// Other errors will be discarded. + /// + /// # Parameters + /// - `indices`: A slice of subtree root indices to retrieve. + /// + /// # Returns + /// - A `Vec>` where each index corresponds to the original input. + /// - `Ok(...)` if all fetches succeed. + /// - `Err(StorageError)` if any RocksDB access or deserialization fails. + fn get_subtree(&self, index: NodeIndex) -> Result, StorageError> { + let cf = self.subtree_cf(index); + let key = Self::subtree_db_key(index); + match self.db.get_cf(cf, key)? { + Some(bytes) => { + let subtree = Subtree::from_vec(index, &bytes)?; + Ok(Some(subtree)) + }, + None => Ok(None), + } + } + + /// Batch-retrieves multiple subtrees from RocksDB by their node indices. + /// + /// This method groups requests by subtree depth into column family buckets, + /// then performs parallel `multi_get` operations to efficiently retrieve + /// all subtrees. Results are deserialized and placed in the same order as + /// the input indices. + /// + /// # Parameters + /// - `indices`: A slice of subtree root indices to retrieve. + /// + /// # Returns + /// - A `Vec>` where each index corresponds to the original input. + /// - `Ok(...)` if all fetches succeed. + /// - `Err(StorageError)` if any RocksDB access or deserialization fails. + fn get_subtrees(&self, indices: &[NodeIndex]) -> Result>, StorageError> { + use p3_maybe_rayon::prelude::*; + + let mut depth_buckets: [Vec<(usize, NodeIndex)>; 5] = Default::default(); + + for (original_index, &node_index) in indices.iter().enumerate() { + let depth = node_index.depth(); + let bucket_index = match depth { + 56 => 0, + 48 => 1, + 40 => 2, + 32 => 3, + 24 => 4, + _ => { + return Err(StorageError::Unsupported(format!( + "unsupported subtree depth {depth}" + ))); + }, + }; + depth_buckets[bucket_index].push((original_index, node_index)); + } + let mut results = vec![None; indices.len()]; + + // Process depth buckets in parallel + let bucket_results: Result, StorageError> = depth_buckets + .into_par_iter() + .enumerate() + .filter(|(_, bucket)| !bucket.is_empty()) + .map( + |(bucket_index, bucket)| -> Result)>, StorageError> { + let depth = LargeSmt::::SUBTREE_DEPTHS[bucket_index]; + let cf = self.cf_handle(cf_for_depth(depth))?; + let keys: Vec<_> = + bucket.iter().map(|(_, idx)| Self::subtree_db_key(*idx)).collect(); + + let db_results = self.db.multi_get_cf(keys.iter().map(|k| (cf, k.as_ref()))); + + // Process results for this bucket + bucket + .into_iter() + .zip(db_results) + .map(|((original_index, node_index), db_result)| { + let subtree = match db_result { + Ok(Some(bytes)) => Some(Subtree::from_vec(node_index, &bytes)?), + Ok(None) => None, + Err(e) => return Err(e.into()), + }; + Ok((original_index, subtree)) + }) + .collect() + }, + ) + .collect(); + + // Flatten results and place them in correct positions + for bucket_result in bucket_results? { + for (original_index, subtree) in bucket_result { + results[original_index] = subtree; + } + } + + Ok(results) + } + + /// Retrieves a single inner node (non-leaf node) from within a Subtree. + /// + /// This method is intended for accessing nodes at depths greater than or equal to + /// `IN_MEMORY_DEPTH`. It first finds the appropriate Subtree containing the `index`, then + /// delegates to `Subtree::get_inner_node()`. + /// + /// # Errors + /// - `StorageError::Backend`: If `index.depth() < IN_MEMORY_DEPTH`, or if RocksDB errors occur. + /// - `StorageError::Value`: If the containing Subtree data is corrupt. + fn get_inner_node(&self, index: NodeIndex) -> Result, StorageError> { + if index.depth() < IN_MEMORY_DEPTH { + return Err(StorageError::Unsupported( + "Cannot get inner node from upper part of the tree".into(), + )); + } + let subtree_root_index = Subtree::find_subtree_root(index); + Ok(self + .get_subtree(subtree_root_index)? + .and_then(|subtree| subtree.get_inner_node(index))) + } + + /// Returns an iterator over all (logical u64 index, `SmtLeaf`) pairs in the `LEAVES_CF`. + /// + /// The iterator uses a RocksDB snapshot for consistency and iterates in lexicographical + /// order of the keys (leaf indices). Errors during iteration (e.g., deserialization issues) + /// cause the iterator to skip the problematic item and attempt to continue. + /// + /// # Errors + /// - `StorageError::Backend`: If the leaves column family is missing or a RocksDB error occurs + /// during iterator creation. + fn iter_leaves(&self) -> Result + '_>, StorageError> { + let cf = self.cf_handle(LEAVES_CF)?; + let mut read_opts = ReadOptions::default(); + read_opts.set_total_order_seek(true); + let db_iter = self.db.iterator_cf_opt(cf, read_opts, IteratorMode::Start); + + Ok(Box::new(RocksDbDirectLeafIterator { iter: db_iter })) + } + + /// Returns an iterator over all `Subtree` instances across all subtree column families. + /// + /// The iterator uses a RocksDB snapshot and iterates in lexicographical order of keys + /// (subtree root NodeIndex) across all depth column families (24, 32, 40, 48, 56). + /// Errors during iteration (e.g., deserialization issues) cause the iterator to skip + /// the problematic item and attempt to continue. + /// + /// # Errors + /// - `StorageError::Backend`: If any subtree column family is missing or a RocksDB error occurs + /// during iterator creation. + fn iter_subtrees(&self) -> Result + '_>, StorageError> { + // All subtree column family names in order + const SUBTREE_CFS: [&str; 5] = + [SUBTREE_24_CF, SUBTREE_32_CF, SUBTREE_40_CF, SUBTREE_48_CF, SUBTREE_56_CF]; + + let mut cf_handles = Vec::new(); + for cf_name in SUBTREE_CFS { + cf_handles.push(self.cf_handle(cf_name)?); + } + + Ok(Box::new(RocksDbSubtreeIterator::new(&self.db, cf_handles))) + } + + /// Retrieves all depth 24 hashes for fast tree rebuilding. + /// + /// # Errors + /// - `StorageError::Backend`: If the depth24 column family is missing or a RocksDB error + /// occurs. + /// - `StorageError::Value`: If any hash bytes are corrupt. + fn get_depth24(&self) -> Result, StorageError> { + let cf = self.cf_handle(DEPTH_24_CF)?; + let iter = self.db.iterator_cf(cf, IteratorMode::Start); + let mut hashes = Vec::new(); + + for item in iter { + let (key_bytes, value_bytes) = item?; + + let index = index_from_key_bytes(&key_bytes)?; + let hash = Word::read_from_bytes_with_budget(&value_bytes, value_bytes.len())?; + + hashes.push((index, hash)); + } + + Ok(hashes) + } +} + +impl SmtStorage for RocksDbStorage { /// Inserts a key-value pair into the SMT leaf at the specified logical `index`. /// /// This operation involves: @@ -408,23 +652,6 @@ impl SmtStorage for RocksDbStorage { Ok(current_value) } - /// Retrieves a single SMT leaf node by its logical `index` from the `LEAVES_CF` column family. - /// - /// # Errors - /// - `StorageError::Backend`: If the leaves column family is missing or a RocksDB error occurs. - /// - `StorageError::DeserializationError`: If the retrieved leaf data is corrupt. - fn get_leaf(&self, index: u64) -> Result, StorageError> { - let cf = self.cf_handle(LEAVES_CF)?; - let key = Self::index_db_key(index); - match self.db.get_cf(cf, key)? { - Some(bytes) => { - let leaf = SmtLeaf::read_from_bytes_with_budget(&bytes, bytes.len())?; - Ok(Some(leaf)) - }, - None => Ok(None), - } - } - /// Sets or updates multiple SMT leaf nodes in the `LEAVES_CF` column family. /// /// This method performs a batch write to RocksDB. It also updates the global @@ -480,144 +707,6 @@ impl SmtStorage for RocksDbStorage { })) } - /// Retrieves multiple SMT leaf nodes by their logical `indices` using RocksDB's `multi_get_cf`. - /// - /// # Errors - /// - `StorageError::Backend`: If the leaves column family is missing or a RocksDB error occurs. - /// - `StorageError::DeserializationError`: If any retrieved leaf data is corrupt. - fn get_leaves(&self, indices: &[u64]) -> Result>, StorageError> { - let cf = self.cf_handle(LEAVES_CF)?; - let db_keys: Vec<[u8; 8]> = indices.iter().map(|&idx| Self::index_db_key(idx)).collect(); - let results = self.db.multi_get_cf(db_keys.iter().map(|k| (cf, k.as_ref()))); - - results - .into_iter() - .map(|result| match result { - Ok(Some(bytes)) => { - Ok(Some(SmtLeaf::read_from_bytes_with_budget(&bytes, bytes.len())?)) - }, - Ok(None) => Ok(None), - Err(e) => Err(e.into()), - }) - .collect() - } - - /// Returns true if the storage has any leaves. - /// - /// # Errors - /// Returns `StorageError` if the storage read operation fails. - fn has_leaves(&self) -> Result { - Ok(self.leaf_count()? > 0) - } - - /// Batch-retrieves multiple subtrees from RocksDB by their node indices. - /// - /// This method groups requests by subtree depth into column family buckets, - /// then performs parallel `multi_get` operations to efficiently retrieve - /// all subtrees. Results are deserialized and placed in the same order as - /// the input indices. - /// - /// Note: Retrieval is performed in parallel. If multiple errors occur (e.g., - /// deserialization or backend errors), only the first one encountered is returned. - /// Other errors will be discarded. - /// - /// # Parameters - /// - `indices`: A slice of subtree root indices to retrieve. - /// - /// # Returns - /// - A `Vec>` where each index corresponds to the original input. - /// - `Ok(...)` if all fetches succeed. - /// - `Err(StorageError)` if any RocksDB access or deserialization fails. - fn get_subtree(&self, index: NodeIndex) -> Result, StorageError> { - let cf = self.subtree_cf(index); - let key = Self::subtree_db_key(index); - match self.db.get_cf(cf, key)? { - Some(bytes) => { - let subtree = Subtree::from_vec(index, &bytes)?; - Ok(Some(subtree)) - }, - None => Ok(None), - } - } - - /// Batch-retrieves multiple subtrees from RocksDB by their node indices. - /// - /// This method groups requests by subtree depth into column family buckets, - /// then performs parallel `multi_get` operations to efficiently retrieve - /// all subtrees. Results are deserialized and placed in the same order as - /// the input indices. - /// - /// # Parameters - /// - `indices`: A slice of subtree root indices to retrieve. - /// - /// # Returns - /// - A `Vec>` where each index corresponds to the original input. - /// - `Ok(...)` if all fetches succeed. - /// - `Err(StorageError)` if any RocksDB access or deserialization fails. - fn get_subtrees(&self, indices: &[NodeIndex]) -> Result>, StorageError> { - use p3_maybe_rayon::prelude::*; - - let mut depth_buckets: [Vec<(usize, NodeIndex)>; 5] = Default::default(); - - for (original_index, &node_index) in indices.iter().enumerate() { - let depth = node_index.depth(); - let bucket_index = match depth { - 56 => 0, - 48 => 1, - 40 => 2, - 32 => 3, - 24 => 4, - _ => { - return Err(StorageError::Unsupported(format!( - "unsupported subtree depth {depth}" - ))); - }, - }; - depth_buckets[bucket_index].push((original_index, node_index)); - } - let mut results = vec![None; indices.len()]; - - // Process depth buckets in parallel - let bucket_results: Result, StorageError> = depth_buckets - .into_par_iter() - .enumerate() - .filter(|(_, bucket)| !bucket.is_empty()) - .map( - |(bucket_index, bucket)| -> Result)>, StorageError> { - let depth = LargeSmt::::SUBTREE_DEPTHS[bucket_index]; - let cf = self.cf_handle(cf_for_depth(depth))?; - let keys: Vec<_> = - bucket.iter().map(|(_, idx)| Self::subtree_db_key(*idx)).collect(); - - let db_results = self.db.multi_get_cf(keys.iter().map(|k| (cf, k.as_ref()))); - - // Process results for this bucket - bucket - .into_iter() - .zip(db_results) - .map(|((original_index, node_index), db_result)| { - let subtree = match db_result { - Ok(Some(bytes)) => Some(Subtree::from_vec(node_index, &bytes)?), - Ok(None) => None, - Err(e) => return Err(e.into()), - }; - Ok((original_index, subtree)) - }) - .collect() - }, - ) - .collect(); - - // Flatten results and place them in correct positions - for bucket_result in bucket_results? { - for (original_index, subtree) in bucket_result { - results[original_index] = subtree; - } - } - - Ok(results) - } - /// Stores a single subtree in RocksDB and optionally updates the depth-24 root cache. /// /// The subtree is serialized and written to its corresponding column family. @@ -713,27 +802,6 @@ impl SmtStorage for RocksDbStorage { Ok(()) } - /// Retrieves a single inner node (non-leaf node) from within a Subtree. - /// - /// This method is intended for accessing nodes at depths greater than or equal to - /// `IN_MEMORY_DEPTH`. It first finds the appropriate Subtree containing the `index`, then - /// delegates to `Subtree::get_inner_node()`. - /// - /// # Errors - /// - `StorageError::Backend`: If `index.depth() < IN_MEMORY_DEPTH`, or if RocksDB errors occur. - /// - `StorageError::Value`: If the containing Subtree data is corrupt. - fn get_inner_node(&self, index: NodeIndex) -> Result, StorageError> { - if index.depth() < IN_MEMORY_DEPTH { - return Err(StorageError::Unsupported( - "Cannot get inner node from upper part of the tree".into(), - )); - } - let subtree_root_index = Subtree::find_subtree_root(index); - Ok(self - .get_subtree(subtree_root_index)? - .and_then(|subtree| subtree.get_inner_node(index))) - } - /// Sets or updates a single inner node (non-leaf node) within a Subtree. /// /// This method is intended for `index.depth() >= IN_MEMORY_DEPTH`. @@ -902,70 +970,6 @@ impl SmtStorage for RocksDbStorage { Ok(()) } - - /// Returns an iterator over all (logical u64 index, `SmtLeaf`) pairs in the `LEAVES_CF`. - /// - /// The iterator uses a RocksDB snapshot for consistency and iterates in lexicographical - /// order of the keys (leaf indices). Errors during iteration (e.g., deserialization issues) - /// cause the iterator to skip the problematic item and attempt to continue. - /// - /// # Errors - /// - `StorageError::Backend`: If the leaves column family is missing or a RocksDB error occurs - /// during iterator creation. - fn iter_leaves(&self) -> Result + '_>, StorageError> { - let cf = self.cf_handle(LEAVES_CF)?; - let mut read_opts = ReadOptions::default(); - read_opts.set_total_order_seek(true); - let db_iter = self.db.iterator_cf_opt(cf, read_opts, IteratorMode::Start); - - Ok(Box::new(RocksDbDirectLeafIterator { iter: db_iter })) - } - - /// Returns an iterator over all `Subtree` instances across all subtree column families. - /// - /// The iterator uses a RocksDB snapshot and iterates in lexicographical order of keys - /// (subtree root NodeIndex) across all depth column families (24, 32, 40, 48, 56). - /// Errors during iteration (e.g., deserialization issues) cause the iterator to skip - /// the problematic item and attempt to continue. - /// - /// # Errors - /// - `StorageError::Backend`: If any subtree column family is missing or a RocksDB error occurs - /// during iterator creation. - fn iter_subtrees(&self) -> Result + '_>, StorageError> { - // All subtree column family names in order - const SUBTREE_CFS: [&str; 5] = - [SUBTREE_24_CF, SUBTREE_32_CF, SUBTREE_40_CF, SUBTREE_48_CF, SUBTREE_56_CF]; - - let mut cf_handles = Vec::new(); - for cf_name in SUBTREE_CFS { - cf_handles.push(self.cf_handle(cf_name)?); - } - - Ok(Box::new(RocksDbSubtreeIterator::new(&self.db, cf_handles))) - } - - /// Retrieves all depth 24 hashes for fast tree rebuilding. - /// - /// # Errors - /// - `StorageError::Backend`: If the depth24 column family is missing or a RocksDB error - /// occurs. - /// - `StorageError::Value`: If any hash bytes are corrupt. - fn get_depth24(&self) -> Result, StorageError> { - let cf = self.cf_handle(DEPTH_24_CF)?; - let iter = self.db.iterator_cf(cf, IteratorMode::Start); - let mut hashes = Vec::new(); - - for item in iter { - let (key_bytes, value_bytes) = item?; - - let index = index_from_key_bytes(&key_bytes)?; - let hash = Word::read_from_bytes_with_budget(&value_bytes, value_bytes.len())?; - - hashes.push((index, hash)); - } - - Ok(hashes) - } } /// Syncs the RocksDB database to disk before dropping the storage. diff --git a/miden-crypto/src/merkle/smt/large_forest/mod.rs b/miden-crypto/src/merkle/smt/large_forest/mod.rs index 04e8f72bfa..fb1bf3de65 100644 --- a/miden-crypto/src/merkle/smt/large_forest/mod.rs +++ b/miden-crypto/src/merkle/smt/large_forest/mod.rs @@ -346,7 +346,7 @@ use crate::{ /// A high-performance forest of sparse merkle trees with pluggable storage backends. /// /// See the module documentation for more information. -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct LargeSmtForest { /// The configuration for how the forest functions. config: Config, diff --git a/miden-crypto/src/merkle/smt/mod.rs b/miden-crypto/src/merkle/smt/mod.rs index d532c4c9eb..4cb27ce518 100644 --- a/miden-crypto/src/merkle/smt/mod.rs +++ b/miden-crypto/src/merkle/smt/mod.rs @@ -22,8 +22,8 @@ mod large; pub use full::concurrent::{SubtreeLeaf, build_subtree_for_bench}; #[cfg(feature = "concurrent")] pub use large::{ - LargeSmt, LargeSmtError, MemoryStorage, SmtStorage, StorageError, StorageUpdateParts, - StorageUpdates, Subtree, SubtreeError, SubtreeUpdate, + LargeSmt, LargeSmtError, MemoryStorage, SmtStorage, SmtStorageReader, StorageError, + StorageUpdateParts, StorageUpdates, Subtree, SubtreeError, SubtreeUpdate, }; #[cfg(feature = "rocksdb")] pub use large::{RocksDbConfig, RocksDbStorage};