Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
580e44a
feat(checkpoint): hard-link snapshot for PITR backup
polaz May 19, 2026
fcbab5c
refactor(checkpoint): prefer core/alloc imports for no-std friendliness
polaz May 19, 2026
dd23588
fix(checkpoint): address race conditions, cleanup, and test hardening
polaz May 19, 2026
8fde911
fix(checkpoint): tighten cleanup, TOCTOU, hard-link strategy, and tests
polaz May 19, 2026
5c5db06
fix(fs,docs): gate raw EXDEV test to Unix + clarify watermark docstring
polaz May 19, 2026
efa793b
refactor(checkpoint,fs,deletion-pause): tighten public surface + log …
polaz May 19, 2026
5faae35
fix(checkpoint,fs,tests): parent fsync, copy cleanup, race-test hands…
polaz May 20, 2026
16e2118
fix(checkpoint): seqno watermark race + warn→debug doc alignment
polaz May 20, 2026
0abd9ea
fix(checkpoint,fs,tests): align hard_link docs/behavior + log cross-b…
polaz May 20, 2026
3e1e880
test(checkpoint): regression for MVCC GC leak through flush
polaz May 20, 2026
27c1b2d
fix(checkpoint): pass 0 as flush GC threshold, not SeqNo::MAX
polaz May 20, 2026
4471fef
docs(checkpoint): correct seqno in MVCC regression test docstring
polaz May 20, 2026
8378b2f
docs(checkpoint): correct crash-recovery comment for missing CURRENT
polaz May 20, 2026
b42bf79
docs(checkpoint): explain why link_or_copy_cross_fs re-stats dst
polaz May 20, 2026
4d4d5b2
feat(fs): add Fs::backend_id namespace capability check
polaz May 20, 2026
8ec05fe
test(checkpoint): regression for missing/corrupt CURRENT pointer
polaz May 20, 2026
7d513eb
fix(tree): reject half-written checkpoint when CURRENT is missing
polaz May 20, 2026
33914fd
fix(tree): treat missing directory as 'no state' in version-state probe
polaz May 20, 2026
70904ba
feat(tree): log open failure when CURRENT is missing with stale state
polaz May 20, 2026
b2a4558
refactor(deletion_pause): swap std::sync::Mutex for spin::Mutex
polaz May 20, 2026
6f2f7b5
fix(checkpoint): fsync parent dir for relative target paths + name CU…
polaz May 20, 2026
3dfec59
test(checkpoint): assert ErrorKind::AlreadyExists structurally
polaz May 20, 2026
3ef4e07
test(deletion_pause): replace timing-based race reproducer with hands…
polaz May 20, 2026
21d031a
docs(readme): mention create_checkpoint in the Concurrency & API section
polaz May 20, 2026
0b8ba45
feat(tree): return structured Io(InvalidData) for half-written checkp…
polaz May 20, 2026
8f00597
test(checkpoint): structural ErrorKind assert in early-reject failure…
polaz May 20, 2026
f3bd055
fix(vlog): close blob file accessor before unlinking on drop
polaz May 20, 2026
e369bf0
test(checkpoint): regression for manifest-GC race deleting captured vN
polaz May 20, 2026
80f731e
fix(checkpoint): serialise captured Version into target instead of co…
polaz May 20, 2026
6aaeba8
test(checkpoint): tighten tamper test + use authoritative version id
polaz May 20, 2026
40f0b3f
test(checkpoint): regression for parent-fsync . path on non-StdFs
polaz May 20, 2026
54c202e
fix(checkpoint): skip parent fsync when target.parent() is empty
polaz May 20, 2026
8a2c200
test(checkpoint): regression for ./checkpoint relative target on MemFs
polaz May 20, 2026
a6e8afc
fix(checkpoint): normalise target_root by stripping CurDir components
polaz May 20, 2026
79a0ad8
test(checkpoint): make concurrent_writes watermark assertion meaningful
polaz May 20, 2026
dda582a
feat(filter)!: V5 storage format breaks V3/V4 compatibility
polaz May 20, 2026
a2d5d81
test(deletion_pause): make race test actually interleave A's drop and…
polaz May 20, 2026
9808f74
docs(checkpoint): correct CheckpointInfo::seqno watermark semantics
polaz May 20, 2026
4be0ddb
refactor(checkpoint): no-std-friendlier deletion pause + cleanup on c…
polaz May 20, 2026
240019d
refactor(deletion-pause,compression): switch to OnceBox for no-std-fr…
polaz May 20, 2026
9d839cc
fix(merge): align IteratorValue Eq with Ord (key + Reverse(seqno))
polaz May 20, 2026
d767602
docs(compression,cargo): correct OnceBox wording + once_cell rationale
polaz May 20, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 71 additions & 0 deletions src/abstract_tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,27 @@ pub type RangeItem = crate::Result<KvPair>;

type FlushToTablesResult = (Vec<Table>, Option<Vec<BlobFile>>);

/// Summary of a checkpoint produced by
/// [`AbstractTree::create_checkpoint`].
///
/// All byte counts are *logical* file sizes — hard links share the
/// underlying inode storage, so a checkpoint's marginal disk usage is
/// typically zero until the original files are compacted away.
#[derive(Debug, Clone, Copy)]
pub struct CheckpointInfo {
/// Number of SST files captured.
pub sst_files: usize,
/// Number of blob (value-log) files captured. Always `0` for a
/// standard [`Tree`].
pub blob_files: usize,
/// Sum of the logical file sizes of every captured SST + blob.
pub total_bytes: u64,
/// The version ID embedded in the checkpoint's `current` pointer.
pub version_id: u64,
/// The maximum visible sequence number at checkpoint time.
pub seqno: SeqNo,
Comment thread
polaz marked this conversation as resolved.
}

// Sealed on purpose: this trait is still public as a consumer-side bound
// (`&impl AbstractTree`), but external implementations are no longer part of
// the supported extension surface. Internal flush/version hooks keep evolving
Expand Down Expand Up @@ -62,6 +83,56 @@ pub trait AbstractTree: sealed::Sealed {
#[doc(hidden)]
fn get_version_history_lock(&self) -> RwLockWriteGuard<'_, crate::version::SuperVersions>;

/// Creates a hard-linked checkpoint of the tree's on-disk state in
/// `target_path` for point-in-time recovery (PITR) backup.
///
/// The checkpoint is a fully functional tree that can be opened
/// independently via [`Config::open`](crate::Config::open). For the
/// common single-filesystem case all SST files (and blob files, for
/// [`BlobTree`]) are hard-linked rather than copied, so the operation
/// is O(1) per file and consumes zero additional disk space until the
/// original files are compacted away — at which point the inode is
/// kept alive by the checkpoint link.
///
/// # Cross-filesystem / cross-backend fall-back
///
/// When a source file lives on a different filesystem than the
/// checkpoint target — e.g. an SST routed to a hot tier via
/// [`level_routes`](crate::Config::level_routes) on a separate volume,
/// or a backup directory on a foreign mount — the hard link cannot
/// be created (Unix `EXDEV`). In that case the checkpoint silently
/// falls back to a streamed byte copy, which:
///
/// - takes time linear in the file size instead of O(1), and
/// - consumes disk space equal to the copied bytes on the target
/// volume (no inode sharing across filesystems).
///
/// The fall-back is logged via [`log::warn`] so operators using
/// tiered storage or detached backup volumes can spot it. Same applies
/// when the source and target use entirely different [`Fs`](crate::fs::Fs)
/// backends (e.g. [`MemFs`](crate::fs::MemFs) → [`StdFs`](crate::fs::StdFs)
Comment thread
polaz marked this conversation as resolved.
/// in tests).
Comment thread
polaz marked this conversation as resolved.
///
/// # Concurrency
///
Comment thread
polaz marked this conversation as resolved.
/// While the checkpoint is being built, compaction continues normally
/// but the physical removal of obsolete files is deferred until the
/// checkpoint hard-links are in place. This is implemented by an
/// internal reference-counted deletion gate; callers do not have to
/// pause compaction themselves.
///
/// # Errors
///
/// Returns an error if:
/// - the active memtable could not be flushed,
/// - `target_path` already exists (to prevent accidental overwrites),
/// - a hard link / copy fall-back could not be created, or
/// - the manifest / version pointer files could not be replicated.
///
/// On error any partial checkpoint files are removed automatically
/// (best-effort) so callers can safely retry against the same path.
fn create_checkpoint(&self, target_path: &std::path::Path) -> crate::Result<CheckpointInfo>;

/// Seals the active memtable and flushes to table(s).
///
/// If there are already other sealed memtables lined up, those will be flushed as well.
Expand Down
18 changes: 18 additions & 0 deletions src/blob_tree/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,24 @@ impl BlobTree {
impl crate::abstract_tree::sealed::Sealed for BlobTree {}

impl AbstractTree for BlobTree {
fn create_checkpoint(
&self,
target_path: &std::path::Path,
) -> crate::Result<crate::CheckpointInfo> {
crate::checkpoint::run_checkpoint(
self,
&crate::checkpoint::CheckpointParams {
target_root: target_path,
target_fs: &self.index.config.fs,
src_root: &self.index.config.path,
src_fs: &self.index.config.fs,
deletion_pause: &self.index.deletion_pause,
visible_seqno: &self.index.config.visible_seqno,
include_blobs: true,
},
)
}

fn print_trace(&self, key: &[u8]) -> crate::Result<()> {
self.index.print_trace(key)
}
Expand Down
Loading
Loading