From dcdb48212eec9e7449d00bb35a5cbb6155f9165f Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sat, 23 May 2026 12:17:00 +0300 Subject: [PATCH 01/14] feat(fs): add AlignedBuf primitive for O_DIRECT I/O MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Heap-allocated byte buffer with caller-specified alignment. Vec defaults to align_of::() = 1, which violates O_DIRECT's typical 4 KiB userspace-buffer alignment requirement (EINVAL on unaligned write/read). AlignedBuf allocates via Layout::from_size_align with the requested boundary, rounds capacity up to a multiple of alignment, and exposes a Vec-like surface (len/capacity/as_slice/spare_capacity_mut/ set_len/clear) plus raw ptr accessors for kernel handoff. Zero-capacity allocations use a dangling NonNull cast from the alignment value, matching the std convention for empty owned buffers. Foundation only; no callers wired yet — direct_io path lands in subsequent commits on this branch. Part of #133 --- src/fs/aligned_buf.rs | 370 ++++++++++++++++++++++++++++++++++++++++++ src/fs/mod.rs | 3 + 2 files changed, 373 insertions(+) create mode 100644 src/fs/aligned_buf.rs diff --git a/src/fs/aligned_buf.rs b/src/fs/aligned_buf.rs new file mode 100644 index 000000000..b60136a0e --- /dev/null +++ b/src/fs/aligned_buf.rs @@ -0,0 +1,370 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright (c) 2026-present, Structured World Foundation + +//! Heap-allocated byte buffer with caller-specified alignment. +//! +//! `AlignedBuf` exists for the `O_DIRECT` I/O path: Linux requires +//! both the file offset and the userspace buffer to be aligned to +//! the filesystem's logical block size (typically 512 B on legacy +//! disks, 4 KiB on Advanced Format SSDs). A `Vec` is aligned +//! to `align_of::() = 1`, so an unaligned write to an +//! `O_DIRECT` file errors with `EINVAL`. +//! +//! This wrapper exists exclusively for the `O_DIRECT` pairing +//! (#133 Phase 2). Normal cached I/O has no alignment requirement +//! and should keep using `Vec` / `BytesMut` — using +//! `AlignedBuf` there would waste the extra alignment slack with +//! no benefit. + +use core::alloc::Layout; +use core::ptr::NonNull; +use core::slice; + +/// A heap-allocated byte buffer aligned to a caller-specified +/// boundary. +/// +/// Used for the `O_DIRECT` I/O path where kernel alignment +/// requirements (typically 4 KiB) exceed `Vec`'s default +/// `align_of::() = 1`. +/// +/// # Invariants +/// +/// - `ptr` is always non-null and points to a region of at least +/// `capacity` bytes allocated via the global allocator with +/// `Layout::from_size_align(capacity, alignment)`. +/// - `len <= capacity`. +/// - `alignment` is a power of two ≥ 1 and ≤ `isize::MAX as usize` +/// (enforced at construction). +/// - `capacity` is a power-of-two multiple of `alignment` (rounded +/// up at construction). +/// +/// # `Send` + `Sync` +/// +/// The raw pointer doesn't carry any cross-thread state; the +/// buffer's bytes are owned, immobile until `Drop`, and only +/// reachable via `&self` / `&mut self`. So `Send` + `Sync` are +/// both safe. +pub struct AlignedBuf { + /// Non-null pointer to the start of the aligned allocation. + ptr: NonNull, + /// Number of bytes currently written (`<= capacity`). + len: usize, + /// Number of bytes allocated. + capacity: usize, + /// Alignment boundary the allocation satisfies (power of two). + alignment: usize, +} + +// SAFETY: AlignedBuf owns its allocation; the raw pointer doesn't +// alias anything else and is only reachable through &self / &mut +// self. Sending the buffer to another thread is sound; concurrent +// shared access through &self is sound (the bytes are immutable +// behind a shared reference). +#[expect( + unsafe_code, + reason = "raw-pointer wrapper; Send/Sync soundness justified" +)] +unsafe impl Send for AlignedBuf {} +#[expect( + unsafe_code, + reason = "raw-pointer wrapper; Send/Sync soundness justified" +)] +unsafe impl Sync for AlignedBuf {} + +impl AlignedBuf { + /// Allocates a zero-initialised buffer of `capacity` bytes + /// aligned to `alignment`. `capacity` is rounded up to the + /// next multiple of `alignment` so the trailing slack is + /// large enough for aligned writes that consume the whole + /// buffer. + /// + /// # Errors + /// + /// Returns `None` if: + /// - `alignment` is not a power of two, OR + /// - `alignment > isize::MAX as usize`, OR + /// - the rounded-up capacity overflows `isize::MAX as usize`, OR + /// - the global allocator fails (returns null). + /// + /// # Examples + /// + /// ```ignore + /// # // ignored: AlignedBuf is pub(crate), not exposed publicly. + /// use lsm_tree::fs::AlignedBuf; + /// let buf = AlignedBuf::new_zeroed(8192, 4096).unwrap(); + /// assert_eq!(buf.capacity(), 8192); + /// assert_eq!(buf.as_ptr().addr() % 4096, 0); + /// ``` + #[must_use] + pub fn new_zeroed(capacity: usize, alignment: usize) -> Option { + if !alignment.is_power_of_two() { + return None; + } + if alignment > (isize::MAX as usize) { + return None; + } + // Round up so the trailing slack is large enough for an + // aligned write that consumes the whole capacity. + let rounded = capacity.checked_add(alignment - 1)? & !(alignment - 1); + if rounded > (isize::MAX as usize) { + return None; + } + // 0-byte allocation is undefined for the global allocator; + // synthesise a non-null dangling pointer with the requested + // alignment instead. Reads / writes through it are bounded + // by `len == 0`, so they never touch the dangling address. + if rounded == 0 { + // SAFETY: alignment is a power of two ≥ 1, so casting it + // to a pointer is well-defined and the pointer is non- + // null. We never deref past `len = 0`. + let dangling = { + #[expect(unsafe_code, reason = "non-null dangling for 0-cap buffer")] + unsafe { + NonNull::new_unchecked(alignment as *mut u8) + } + }; + return Some(Self { + ptr: dangling, + len: 0, + capacity: 0, + alignment, + }); + } + let layout = Layout::from_size_align(rounded, alignment).ok()?; + // SAFETY: layout was just validated; alloc_zeroed is safe to + // call for any valid non-zero layout. Returns null on OOM, + // which we surface as None. + #[expect(unsafe_code, reason = "global allocator call with validated layout")] + let raw = unsafe { alloc::alloc::alloc_zeroed(layout) }; + let ptr = NonNull::new(raw)?; + Some(Self { + ptr, + len: 0, + capacity: rounded, + alignment, + }) + } + + /// Number of bytes currently written. + #[must_use] + pub const fn len(&self) -> usize { + self.len + } + + /// Buffer capacity in bytes (`>= len`, rounded up to a + /// multiple of `alignment` at construction time). + #[must_use] + pub const fn capacity(&self) -> usize { + self.capacity + } + + /// Alignment the allocation was constructed with (power of two). + #[must_use] + pub const fn alignment(&self) -> usize { + self.alignment + } + + /// `true` when `len == 0`. + #[must_use] + pub const fn is_empty(&self) -> bool { + self.len == 0 + } + + /// Raw const pointer to the buffer's first byte. Stable across + /// the lifetime of `self` (no reallocation). Valid for reads + /// of `len` bytes. + #[must_use] + pub const fn as_ptr(&self) -> *const u8 { + self.ptr.as_ptr().cast_const() + } + + /// Raw mut pointer to the buffer's first byte. Valid for + /// writes of `capacity` bytes. + #[must_use] + pub const fn as_mut_ptr(&mut self) -> *mut u8 { + self.ptr.as_ptr() + } + + /// Shared slice over the currently-written `len` bytes. + #[must_use] + pub const fn as_slice(&self) -> &[u8] { + // SAFETY: `ptr` is valid for reads of `capacity >= len` + // bytes by invariant; the lifetime is tied to `&self`. + #[expect(unsafe_code, reason = "slice over owned aligned allocation")] + unsafe { + slice::from_raw_parts(self.ptr.as_ptr(), self.len) + } + } + + /// Mut slice over the full `capacity` (NOT just `len`). Caller + /// is responsible for updating `len` via [`Self::set_len`] + /// after writing. + #[must_use] + pub const fn spare_capacity_mut(&mut self) -> &mut [u8] { + // SAFETY: `ptr` is valid for writes of `capacity` bytes by + // invariant; the lifetime is tied to `&mut self`. + #[expect(unsafe_code, reason = "mut slice over owned aligned allocation")] + unsafe { + slice::from_raw_parts_mut(self.ptr.as_ptr(), self.capacity) + } + } + + /// Updates the written-bytes count. + /// + /// # Panics + /// + /// Panics if `new_len > capacity`. + pub const fn set_len(&mut self, new_len: usize) { + assert!( + new_len <= self.capacity, + "AlignedBuf::set_len exceeds capacity", + ); + self.len = new_len; + } + + /// Resets `len` to 0 without touching the allocation. + pub const fn clear(&mut self) { + self.len = 0; + } +} + +impl Drop for AlignedBuf { + fn drop(&mut self) { + if self.capacity == 0 { + // Dangling sentinel from `new_zeroed(0, _)`; nothing to + // free. + return; + } + // SAFETY: layout reproduces the one used at allocation; + // `ptr` was obtained from the global allocator with that + // exact layout and hasn't been freed yet (Drop runs once). + // The unwrap_or_else fast-paths the impossible case + // (Layout was valid at construction; we never mutate + // capacity / alignment after) without panicking — Drop + // panics during unwinding would abort the process. + let Ok(layout) = Layout::from_size_align(self.capacity, self.alignment) else { + // Unreachable: invariants enforced at construction + // guarantee Layout::from_size_align succeeds here. + // Skipping dealloc leaks `capacity` bytes — preferable + // to aborting if the invariant ever drifts. + return; + }; + #[expect(unsafe_code, reason = "matched dealloc for owned allocation")] + unsafe { + alloc::alloc::dealloc(self.ptr.as_ptr(), layout); + } + } +} + +#[cfg(test)] +#[expect(clippy::unwrap_used, reason = "test assertions")] +mod tests { + use super::*; + + #[test] + fn new_zeroed_4k_aligned() { + let buf = AlignedBuf::new_zeroed(8192, 4096).unwrap(); + assert_eq!(buf.capacity(), 8192); + assert_eq!(buf.len(), 0); + assert_eq!(buf.alignment(), 4096); + assert_eq!(buf.as_ptr().addr() % 4096, 0, "pointer not 4 KiB aligned"); + assert!(buf.is_empty()); + } + + #[test] + fn new_zeroed_rounds_capacity_up_to_alignment() { + // 5000 bytes requested at 4 KiB alignment → rounded to 8 KiB. + let buf = AlignedBuf::new_zeroed(5000, 4096).unwrap(); + assert_eq!(buf.capacity(), 8192); + // Already a multiple → no rounding. + let buf = AlignedBuf::new_zeroed(8192, 4096).unwrap(); + assert_eq!(buf.capacity(), 8192); + } + + #[test] + fn new_zeroed_returns_zeroed_memory() { + let buf = AlignedBuf::new_zeroed(4096, 4096).unwrap(); + // Spare slice covers the full capacity; every byte must be zero. + let slice = unsafe { slice::from_raw_parts(buf.as_ptr(), buf.capacity()) }; + assert!(slice.iter().all(|&b| b == 0)); + } + + #[test] + fn new_zeroed_rejects_non_power_of_two_alignment() { + assert!(AlignedBuf::new_zeroed(4096, 3000).is_none()); + assert!(AlignedBuf::new_zeroed(4096, 0).is_none()); + } + + #[test] + fn new_zeroed_rejects_excessive_alignment() { + // isize::MAX + 1 is a power of two but exceeds the cap. + assert!(AlignedBuf::new_zeroed(4096, (isize::MAX as usize) + 1).is_none()); + } + + #[test] + fn new_zeroed_zero_capacity_returns_dangling() { + // Zero-byte AlignedBuf is allowed and never touches the + // allocator; the dangling sentinel must still satisfy the + // alignment promise so callers that pass it to FFI don't + // surprise the kernel. + let buf = AlignedBuf::new_zeroed(0, 4096).unwrap(); + assert_eq!(buf.capacity(), 0); + assert_eq!(buf.as_ptr().addr() % 4096, 0); + assert!(buf.as_slice().is_empty()); + } + + #[test] + fn set_len_grows_visible_slice() { + let mut buf = AlignedBuf::new_zeroed(4096, 4096).unwrap(); + assert_eq!(buf.as_slice().len(), 0); + buf.set_len(1024); + assert_eq!(buf.as_slice().len(), 1024); + assert_eq!(buf.len(), 1024); + } + + #[test] + #[should_panic(expected = "AlignedBuf::set_len exceeds capacity")] + fn set_len_panics_past_capacity() { + let mut buf = AlignedBuf::new_zeroed(4096, 4096).unwrap(); + buf.set_len(buf.capacity() + 1); + } + + #[test] + fn clear_resets_len_but_preserves_capacity() { + let mut buf = AlignedBuf::new_zeroed(4096, 4096).unwrap(); + buf.set_len(2048); + buf.clear(); + assert_eq!(buf.len(), 0); + assert_eq!(buf.capacity(), 4096); + } + + #[test] + fn spare_capacity_mut_covers_full_capacity() { + let mut buf = AlignedBuf::new_zeroed(4096, 4096).unwrap(); + let spare = buf.spare_capacity_mut(); + assert_eq!(spare.len(), 4096); + *spare.first_mut().unwrap() = 0xAB; + *spare.last_mut().unwrap() = 0xCD; + buf.set_len(4096); + let slice = buf.as_slice(); + assert_eq!(slice.first().copied(), Some(0xAB)); + assert_eq!(slice.last().copied(), Some(0xCD)); + } + + #[test] + fn send_sync_compile_check() { + fn assert_send_sync() {} + assert_send_sync::(); + } + + #[test] + fn pointer_stays_stable_across_writes() { + let mut buf = AlignedBuf::new_zeroed(4096, 4096).unwrap(); + let initial = buf.as_ptr(); + // Write some content + set_len; pointer must not move + // (no reallocation: AlignedBuf has no growth API). + *buf.spare_capacity_mut().first_mut().unwrap() = 1; + buf.set_len(1); + assert_eq!(buf.as_ptr(), initial); + } +} diff --git a/src/fs/mod.rs b/src/fs/mod.rs index 11b79f07f..9490baa5c 100644 --- a/src/fs/mod.rs +++ b/src/fs/mod.rs @@ -23,9 +23,12 @@ //! - **macOS / BSD**: no batched I/O API exists (`dispatch_io` and `kqueue` //! do not help for storage I/O patterns); [`StdFs`] is the correct choice +pub mod aligned_buf; mod mem_fs; mod std_fs; +pub use aligned_buf::AlignedBuf; + #[cfg(all(target_os = "linux", feature = "io-uring"))] mod io_uring_fs; From 12e2cd583937905e9c41cc8ea8ed9e6d304b3211 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sat, 23 May 2026 12:21:50 +0300 Subject: [PATCH 02/14] feat(fs): add FsOpenOptions::direct_io flag + wire O_DIRECT into StdFs/IoUringFs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a `direct_io: bool` field + builder method to FsOpenOptions and threads it into the open path of both StdFs and IoUringFs. On Linux and Android (x86, x86_64, aarch64, riscv32/64, loongarch64, s390x — the arches where asm-generic/fcntl.h's O_DIRECT=0o40000 is the authoritative value) the flag becomes a `custom_flags(O_DIRECT)` on the std OpenOptions builder. O_DIRECT is declared as a named constant rather than pulled from libc — matches the EXDEV / flock pattern already established in std_fs.rs, keeps the crate libc-free, and lets the constant carry its own comment documenting the asm-generic source. Architectures with a divergent O_DIRECT bit (arm 0o200000, mips 0o100000, parisc, sparc) are NOT gated on purpose: emitting the wrong bit silently would be worse than honouring the doc'd "direct_io may be ignored" contract. macOS, Windows, and other Unix targets honour the doc contract by falling through to a cached open — F_NOCACHE on macOS and FILE_FLAG_NO_BUFFERING on Windows would each need their own opt-in plumbing and are out of scope for this hook. MemFs ignores `direct_io` (in-memory; the flag has no meaning). No consumers wired yet — callers landing in subsequent commits. Part of #133 --- src/fs/io_uring_fs.rs | 27 +++++++++++++++++++++++--- src/fs/mod.rs | 23 ++++++++++++++++++++++ src/fs/std_fs.rs | 44 +++++++++++++++++++++++++++++++++++++++---- 3 files changed, 87 insertions(+), 7 deletions(-) diff --git a/src/fs/io_uring_fs.rs b/src/fs/io_uring_fs.rs index d080917d7..14f299bc6 100644 --- a/src/fs/io_uring_fs.rs +++ b/src/fs/io_uring_fs.rs @@ -111,14 +111,35 @@ impl std::fmt::Debug for IoUringFs { impl Fs for IoUringFs { fn open(&self, path: &Path, opts: &FsOpenOptions) -> io::Result> { - let file = OpenOptions::new() + let mut builder = OpenOptions::new(); + builder .read(opts.read) .write(opts.write) .create(opts.create) .create_new(opts.create_new) .truncate(opts.truncate) - .append(opts.append) - .open(path)?; + .append(opts.append); + + // O_DIRECT: identical arch gating to StdFs::open (see the comment + // there for rationale). io_uring is Linux-only, so the os-gate is + // implicit, but the arch gate still matters: O_DIRECT's bit value + // diverges on arm/mips/parisc/sparc. + #[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "riscv32", + target_arch = "riscv64", + target_arch = "loongarch64", + target_arch = "s390x", + ))] + if opts.direct_io { + use std::os::unix::fs::OpenOptionsExt; + const O_DIRECT: i32 = 0o0_040_000; + builder.custom_flags(O_DIRECT); + } + + let file = builder.open(path)?; // When opened in append mode, io_uring writes use an explicit offset // so the kernel's O_APPEND semantics don't apply. Initialize the diff --git a/src/fs/mod.rs b/src/fs/mod.rs index 9490baa5c..fe1973498 100644 --- a/src/fs/mod.rs +++ b/src/fs/mod.rs @@ -62,6 +62,21 @@ pub struct FsOpenOptions { pub truncate: bool, /// Open in append mode, so writes go to the end of the file. pub append: bool, + /// Bypass the kernel page cache for this file (`O_DIRECT` on Linux). + /// + /// When set, the caller is responsible for issuing reads and writes + /// at offsets aligned to the filesystem's logical block size, with + /// userspace buffers aligned to the same boundary and lengths that + /// are a multiple of that block size. See [`AlignedBuf`] for an + /// aligned heap buffer suitable for `O_DIRECT` I/O. + /// + /// Platforms other than Linux/Android treat this as a no-op: macOS + /// has no equivalent flag (use `F_NOCACHE` on the open file descriptor + /// instead, which is not yet wired here), and Windows requires + /// `FILE_FLAG_NO_BUFFERING` at `CreateFile` time (also not wired). + /// Callers should treat `direct_io` as a hint that may be silently + /// ignored — correctness must not depend on it. + pub direct_io: bool, } impl Default for FsOpenOptions { @@ -81,6 +96,7 @@ impl FsOpenOptions { create_new: false, truncate: false, append: false, + direct_io: false, } } @@ -125,6 +141,13 @@ impl FsOpenOptions { self.append = append; self } + + /// Sets the `direct_io` flag. + #[must_use] + pub const fn direct_io(mut self, direct_io: bool) -> Self { + self.direct_io = direct_io; + self + } } /// Metadata about a file or directory. diff --git a/src/fs/std_fs.rs b/src/fs/std_fs.rs index 3a648b2a4..fb27bfae7 100644 --- a/src/fs/std_fs.rs +++ b/src/fs/std_fs.rs @@ -103,14 +103,47 @@ impl FsFile for File { impl Fs for StdFs { fn open(&self, path: &Path, opts: &FsOpenOptions) -> io::Result> { - let file = OpenOptions::new() + let mut builder = OpenOptions::new(); + builder .read(opts.read) .write(opts.write) .create(opts.create) .create_new(opts.create_new) .truncate(opts.truncate) - .append(opts.append) - .open(path)?; + .append(opts.append); + + // O_DIRECT on Linux/Android (architectures with `asm-generic/fcntl.h` + // value 0o40000: x86, x86_64, aarch64, riscv32/64, loongarch64, + // s390x — i.e. every Linux arch we plausibly run on). Architectures + // with a divergent O_DIRECT (arm 0o200000, mips 0o100000, parisc, + // sparc) are not gated here on purpose: misencoding the flag would + // silently pass the wrong bit to open(2). The FsOpenOptions doc + // contract permits `direct_io` to be ignored, so divergent archs + // simply fall through to a cached open — correctness preserved. + // + // macOS / Windows / other Unixes: same "may be silently ignored" + // contract. macOS has no O_DIRECT (F_NOCACHE via fcntl post-open + // is the closest equivalent and is out of scope here). + #[cfg(all( + any(target_os = "linux", target_os = "android"), + any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "riscv32", + target_arch = "riscv64", + target_arch = "loongarch64", + target_arch = "s390x", + ), + ))] + if opts.direct_io { + use std::os::unix::fs::OpenOptionsExt; + // asm-generic/fcntl.h: #define O_DIRECT 00040000 + const O_DIRECT: i32 = 0o0_040_000; + builder.custom_flags(O_DIRECT); + } + + let file = builder.open(path)?; Ok(Box::new(file)) } @@ -617,6 +650,7 @@ mod tests { assert!(!opts.create_new); assert!(!opts.truncate); assert!(!opts.append); + assert!(!opts.direct_io); } #[test] @@ -627,13 +661,15 @@ mod tests { .create(true) .create_new(false) .truncate(true) - .append(false); + .append(false) + .direct_io(true); assert!(opts.read); assert!(opts.write); assert!(opts.create); assert!(!opts.create_new); assert!(opts.truncate); assert!(!opts.append); + assert!(opts.direct_io); } #[test] From 38b65f5a32d1980b5068be9ffdd385c5e36726db Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sat, 23 May 2026 12:48:06 +0300 Subject: [PATCH 03/14] docs(fs): clarify AlignedBuf invariant + direct_io arch gating MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three doc-only corrections raised in PR review: - AlignedBuf: invariant said "power-of-two multiple of alignment" but new_zeroed only rounds up to the next multiple of alignment (e.g. capacity=9000 → 3 × 4096 = 12288). Reword to match actual guarantee. - AlignedBuf::new_zeroed example was marked `ignore` with a stale comment about pub(crate) visibility; the type is in fact `pub` + re-exported from lsm_tree::fs. Promote the example to a real doctest so it actually runs. - FsOpenOptions::direct_io doc said the flag is a no-op only on non-Linux platforms; in reality it's also dropped on Linux architectures with a divergent O_DIRECT bit (arm, mips, parisc, sparc). Spell out the arch gate + restate \"this is a hint\" so callers don't assume cache bypass is in effect on every Linux target. --- src/fs/aligned_buf.rs | 9 +++++---- src/fs/mod.rs | 30 +++++++++++++++++++++--------- 2 files changed, 26 insertions(+), 13 deletions(-) diff --git a/src/fs/aligned_buf.rs b/src/fs/aligned_buf.rs index b60136a0e..2086be7e9 100644 --- a/src/fs/aligned_buf.rs +++ b/src/fs/aligned_buf.rs @@ -35,8 +35,10 @@ use core::slice; /// - `len <= capacity`. /// - `alignment` is a power of two ≥ 1 and ≤ `isize::MAX as usize` /// (enforced at construction). -/// - `capacity` is a power-of-two multiple of `alignment` (rounded -/// up at construction). +/// - `capacity` is an integer multiple of `alignment` (rounded up +/// at construction from the caller's requested size). The +/// multiplier itself is NOT required to be a power of two — e.g. +/// `new_zeroed(9000, 4096)` yields `capacity = 12288 = 3 × 4096`. /// /// # `Send` + `Sync` /// @@ -88,8 +90,7 @@ impl AlignedBuf { /// /// # Examples /// - /// ```ignore - /// # // ignored: AlignedBuf is pub(crate), not exposed publicly. + /// ``` /// use lsm_tree::fs::AlignedBuf; /// let buf = AlignedBuf::new_zeroed(8192, 4096).unwrap(); /// assert_eq!(buf.capacity(), 8192); diff --git a/src/fs/mod.rs b/src/fs/mod.rs index fe1973498..c400a8ee7 100644 --- a/src/fs/mod.rs +++ b/src/fs/mod.rs @@ -67,15 +67,27 @@ pub struct FsOpenOptions { /// When set, the caller is responsible for issuing reads and writes /// at offsets aligned to the filesystem's logical block size, with /// userspace buffers aligned to the same boundary and lengths that - /// are a multiple of that block size. See [`AlignedBuf`] for an - /// aligned heap buffer suitable for `O_DIRECT` I/O. - /// - /// Platforms other than Linux/Android treat this as a no-op: macOS - /// has no equivalent flag (use `F_NOCACHE` on the open file descriptor - /// instead, which is not yet wired here), and Windows requires - /// `FILE_FLAG_NO_BUFFERING` at `CreateFile` time (also not wired). - /// Callers should treat `direct_io` as a hint that may be silently - /// ignored — correctness must not depend on it. + /// are a multiple of that block size. + /// + /// `direct_io` is a HINT, not a guarantee. The flag is honoured only + /// on Linux and Android, and only on architectures where the + /// `asm-generic/fcntl.h` value `O_DIRECT = 0o40000` is authoritative + /// — `x86`, `x86_64`, `aarch64`, `riscv32`/`riscv64`, `loongarch64`, + /// `s390x`. On Linux + /// architectures with a divergent `O_DIRECT` bit (arm `0o200000`, + /// mips `0o100000`, parisc, sparc) the flag is silently dropped to + /// avoid passing the wrong bit to `open(2)`. Other platforms — macOS + /// (would need `F_NOCACHE` post-open via `fcntl`, not wired here), + /// Windows (would need `FILE_FLAG_NO_BUFFERING` at `CreateFile` time, + /// not wired here), other Unixes — also silently drop the flag. + /// + /// Callers must therefore treat `direct_io` as best-effort: + /// correctness must not depend on cache bypass being in effect, and + /// any alignment requirements imposed by the kernel only apply when + /// the flag is actually honoured (you cannot tell from this API + /// alone whether it was). See [`AlignedBuf`] for an aligned heap + /// buffer suitable for `O_DIRECT` reads and writes when the flag is + /// honoured. pub direct_io: bool, } From a752eff88fe2b10b6dfdd63d114bf89b8808daf7 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sat, 23 May 2026 15:52:09 +0300 Subject: [PATCH 04/14] refactor(fs): extract O_DIRECT flag application + sharpen AlignedBuf docs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three review follow-ups: 1. `AlignedBuf` invariant doc: the `ptr` invariant claimed an always-valid allocation, but `new_zeroed(0, _)` intentionally returns a dangling sentinel synthesised from the requested alignment. Split the invariant into the capacity > 0 and capacity == 0 cases so the safety story matches the code. 2. `new_zeroed` zero-capacity comment said "0-byte allocation is undefined for the global allocator" — slightly imprecise: `Layout::from_size_align(0, _)` itself succeeds; what's UB is calling `alloc::alloc::alloc(layout)` with size==0 (per its trait doc). Reword to spell that out + cite std's own `Vec` / `NonNull::dangling()` precedent for the sentinel choice. 3. Extracted the `O_DIRECT` `OpenOptions::custom_flags` block into a new `fs::direct_io` module with one `apply_direct_io_flag` fn. `StdFs::open` and `IoUringFs::open` both call it instead of keeping their own copies of the arch-gating list + `O_DIRECT` constant. Removes the drift risk Copilot flagged (if one backend was updated to add a new arch and the other wasn't, `direct_io` would silently work for one and not the other on the same target). No behaviour change. --- src/fs/aligned_buf.rs | 24 +++++++++----- src/fs/direct_io.rs | 73 +++++++++++++++++++++++++++++++++++++++++++ src/fs/io_uring_fs.rs | 19 +---------- src/fs/mod.rs | 1 + src/fs/std_fs.rs | 31 +----------------- 5 files changed, 93 insertions(+), 55 deletions(-) create mode 100644 src/fs/direct_io.rs diff --git a/src/fs/aligned_buf.rs b/src/fs/aligned_buf.rs index 2086be7e9..133f739c4 100644 --- a/src/fs/aligned_buf.rs +++ b/src/fs/aligned_buf.rs @@ -29,9 +29,13 @@ use core::slice; /// /// # Invariants /// -/// - `ptr` is always non-null and points to a region of at least -/// `capacity` bytes allocated via the global allocator with -/// `Layout::from_size_align(capacity, alignment)`. +/// - `ptr` is always non-null. When `capacity > 0`, it points to a +/// region of at least `capacity` bytes allocated via the global +/// allocator with `Layout::from_size_align(capacity, alignment)`. +/// When `capacity == 0`, it is a non-dereferenceable dangling +/// sentinel synthesised from the requested alignment (see +/// `new_zeroed` for the special-case path) — `len == 0` always +/// holds in that case, so the sentinel is never dereferenced. /// - `len <= capacity`. /// - `alignment` is a power of two ≥ 1 and ≤ `isize::MAX as usize` /// (enforced at construction). @@ -110,10 +114,16 @@ impl AlignedBuf { if rounded > (isize::MAX as usize) { return None; } - // 0-byte allocation is undefined for the global allocator; - // synthesise a non-null dangling pointer with the requested - // alignment instead. Reads / writes through it are bounded - // by `len == 0`, so they never touch the dangling address. + // `alloc::alloc::alloc(layout)` requires `layout.size() > 0` + // — calling it with a zero-size layout is UB per the trait + // docs (Layout itself accepts size==0, but the allocator + // call does not). Std handles this for `Vec` etc. by + // using `NonNull::dangling()` internally; we do the same + // here but synthesise the sentinel from the caller's + // requested alignment so `as_ptr().addr() % alignment == 0` + // still holds for zero-capacity buffers. The sentinel is + // never dereferenced — every read/write path is bounded by + // `len`, which is 0 here. if rounded == 0 { // SAFETY: alignment is a power of two ≥ 1, so casting it // to a pointer is well-defined and the pointer is non- diff --git a/src/fs/direct_io.rs b/src/fs/direct_io.rs new file mode 100644 index 000000000..341af54aa --- /dev/null +++ b/src/fs/direct_io.rs @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright (c) 2026-present, Structured World Foundation + +//! `O_DIRECT` flag application shared between [`StdFs`] and +//! [`IoUringFs`] backends. +//! +//! Lives here (rather than inline in each backend's `open()`) so the +//! arch-gating list and the `O_DIRECT` bit value are defined in +//! exactly one place. Two backends with their own copy would silently +//! diverge if one was updated to support a new arch and the other +//! wasn't. +//! +//! Doc-contract for `direct_io` is on +//! [`FsOpenOptions::direct_io`](super::FsOpenOptions::direct_io): +//! the flag is best-effort, may be silently dropped, and correctness +//! must not depend on it. +//! +//! [`StdFs`]: super::StdFs +//! [`IoUringFs`]: super::IoUringFs + +#[cfg(all( + any(target_os = "linux", target_os = "android"), + any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "riscv32", + target_arch = "riscv64", + target_arch = "loongarch64", + target_arch = "s390x", + ), +))] +mod apply { + /// `asm-generic/fcntl.h`: `#define O_DIRECT 00040000`. Authoritative + /// on every arch listed in the parent `cfg`. Arches with a + /// divergent bit (arm `0o200000`, mips `0o100000`, parisc, sparc) + /// are excluded from the `cfg` rather than handled here so we + /// never risk passing the wrong bit to `open(2)`. + const O_DIRECT: i32 = 0o0_040_000; + + /// Apply the `O_DIRECT` flag to a `std::fs::OpenOptions` builder + /// when `direct_io` is requested AND the running target supports + /// the authoritative `asm-generic/fcntl.h` value. + pub fn apply_direct_io_flag(builder: &mut std::fs::OpenOptions, direct_io: bool) { + if direct_io { + use std::os::unix::fs::OpenOptionsExt; + builder.custom_flags(O_DIRECT); + } + } +} + +#[cfg(not(all( + any(target_os = "linux", target_os = "android"), + any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "riscv32", + target_arch = "riscv64", + target_arch = "loongarch64", + target_arch = "s390x", + ), +)))] +mod apply { + /// No-op outside Linux/Android on a supported arch. macOS would + /// need `F_NOCACHE` via `fcntl` post-open, Windows would need + /// `FILE_FLAG_NO_BUFFERING` at `CreateFile` time, divergent Linux + /// arches need a different `O_DIRECT` bit — all out of scope per + /// the [`super::FsOpenOptions::direct_io`] best-effort contract. + pub fn apply_direct_io_flag(_builder: &mut std::fs::OpenOptions, _direct_io: bool) {} +} + +pub(super) use apply::apply_direct_io_flag; diff --git a/src/fs/io_uring_fs.rs b/src/fs/io_uring_fs.rs index 14f299bc6..8db560456 100644 --- a/src/fs/io_uring_fs.rs +++ b/src/fs/io_uring_fs.rs @@ -120,24 +120,7 @@ impl Fs for IoUringFs { .truncate(opts.truncate) .append(opts.append); - // O_DIRECT: identical arch gating to StdFs::open (see the comment - // there for rationale). io_uring is Linux-only, so the os-gate is - // implicit, but the arch gate still matters: O_DIRECT's bit value - // diverges on arm/mips/parisc/sparc. - #[cfg(any( - target_arch = "x86", - target_arch = "x86_64", - target_arch = "aarch64", - target_arch = "riscv32", - target_arch = "riscv64", - target_arch = "loongarch64", - target_arch = "s390x", - ))] - if opts.direct_io { - use std::os::unix::fs::OpenOptionsExt; - const O_DIRECT: i32 = 0o0_040_000; - builder.custom_flags(O_DIRECT); - } + super::direct_io::apply_direct_io_flag(&mut builder, opts.direct_io); let file = builder.open(path)?; diff --git a/src/fs/mod.rs b/src/fs/mod.rs index c400a8ee7..1e9855fa9 100644 --- a/src/fs/mod.rs +++ b/src/fs/mod.rs @@ -24,6 +24,7 @@ //! do not help for storage I/O patterns); [`StdFs`] is the correct choice pub mod aligned_buf; +mod direct_io; mod mem_fs; mod std_fs; diff --git a/src/fs/std_fs.rs b/src/fs/std_fs.rs index fb27bfae7..ad0162b18 100644 --- a/src/fs/std_fs.rs +++ b/src/fs/std_fs.rs @@ -112,36 +112,7 @@ impl Fs for StdFs { .truncate(opts.truncate) .append(opts.append); - // O_DIRECT on Linux/Android (architectures with `asm-generic/fcntl.h` - // value 0o40000: x86, x86_64, aarch64, riscv32/64, loongarch64, - // s390x — i.e. every Linux arch we plausibly run on). Architectures - // with a divergent O_DIRECT (arm 0o200000, mips 0o100000, parisc, - // sparc) are not gated here on purpose: misencoding the flag would - // silently pass the wrong bit to open(2). The FsOpenOptions doc - // contract permits `direct_io` to be ignored, so divergent archs - // simply fall through to a cached open — correctness preserved. - // - // macOS / Windows / other Unixes: same "may be silently ignored" - // contract. macOS has no O_DIRECT (F_NOCACHE via fcntl post-open - // is the closest equivalent and is out of scope here). - #[cfg(all( - any(target_os = "linux", target_os = "android"), - any( - target_arch = "x86", - target_arch = "x86_64", - target_arch = "aarch64", - target_arch = "riscv32", - target_arch = "riscv64", - target_arch = "loongarch64", - target_arch = "s390x", - ), - ))] - if opts.direct_io { - use std::os::unix::fs::OpenOptionsExt; - // asm-generic/fcntl.h: #define O_DIRECT 00040000 - const O_DIRECT: i32 = 0o0_040_000; - builder.custom_flags(O_DIRECT); - } + super::direct_io::apply_direct_io_flag(&mut builder, opts.direct_io); let file = builder.open(path)?; Ok(Box::new(file)) From 1157c005c8ede41bf708ec617aec4d0a2ca7efe5 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sat, 23 May 2026 16:19:00 +0300 Subject: [PATCH 05/14] =?UTF-8?q?refactor(fs):=20rename=20AlignedBuf::spar?= =?UTF-8?q?e=5Fcapacity=5Fmut=20=E2=86=92=20as=5Fcapacity=5Fmut?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two review follow-ups, resolved as one change because they concern the same method: - The method returns a mutable slice over the FULL capacity, not just the tail beyond len. Vec's / BytesMut's `spare_capacity` is the tail (len..capacity), so the old name implied semantics the method doesn't have. Rename to `as_capacity_mut` so the name matches the actual surface ("full buffer mut"), and spell out in the doc-comment WHY we expose the whole allocation: `O_DIRECT` reads need to overwrite already-buffered bytes when refilling a recycled buffer. - `new_zeroed_returns_zeroed_memory` was reaching into the buffer via `unsafe { slice::from_raw_parts(buf.as_ptr(), buf.capacity()) }` with no SAFETY comment. Swap to the safe `as_capacity_mut()` API — same observable behaviour, no unsafe in tests. --- src/fs/aligned_buf.rs | 38 ++++++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/src/fs/aligned_buf.rs b/src/fs/aligned_buf.rs index 133f739c4..0c6e37ebb 100644 --- a/src/fs/aligned_buf.rs +++ b/src/fs/aligned_buf.rs @@ -207,11 +207,20 @@ impl AlignedBuf { } } - /// Mut slice over the full `capacity` (NOT just `len`). Caller - /// is responsible for updating `len` via [`Self::set_len`] - /// after writing. + /// Mut slice over the FULL `capacity` — including bytes already + /// in the `0..len` written region. + /// + /// Named `as_capacity_mut` (not `spare_capacity_mut`) because + /// `spare_capacity` in `Vec` / `BytesMut` means the tail + /// `len..capacity` only. This method intentionally exposes the + /// entire allocation: `O_DIRECT` kernel reads need to overwrite + /// already-buffered bytes when refilling a recycled buffer, so + /// the right primitive is "full buffer", not "tail beyond len". + /// + /// Caller is responsible for updating `len` via + /// [`Self::set_len`] after writing. #[must_use] - pub const fn spare_capacity_mut(&mut self) -> &mut [u8] { + pub const fn as_capacity_mut(&mut self) -> &mut [u8] { // SAFETY: `ptr` is valid for writes of `capacity` bytes by // invariant; the lifetime is tied to `&mut self`. #[expect(unsafe_code, reason = "mut slice over owned aligned allocation")] @@ -294,10 +303,11 @@ mod tests { #[test] fn new_zeroed_returns_zeroed_memory() { - let buf = AlignedBuf::new_zeroed(4096, 4096).unwrap(); - // Spare slice covers the full capacity; every byte must be zero. - let slice = unsafe { slice::from_raw_parts(buf.as_ptr(), buf.capacity()) }; - assert!(slice.iter().all(|&b| b == 0)); + let mut buf = AlignedBuf::new_zeroed(4096, 4096).unwrap(); + // `as_capacity_mut` covers the full capacity — safe API, + // no need for raw-pointer slicing in tests. + let cap = buf.as_capacity_mut(); + assert!(cap.iter().all(|&b| b == 0)); } #[test] @@ -350,12 +360,12 @@ mod tests { } #[test] - fn spare_capacity_mut_covers_full_capacity() { + fn as_capacity_mut_covers_full_capacity() { let mut buf = AlignedBuf::new_zeroed(4096, 4096).unwrap(); - let spare = buf.spare_capacity_mut(); - assert_eq!(spare.len(), 4096); - *spare.first_mut().unwrap() = 0xAB; - *spare.last_mut().unwrap() = 0xCD; + let cap = buf.as_capacity_mut(); + assert_eq!(cap.len(), 4096); + *cap.first_mut().unwrap() = 0xAB; + *cap.last_mut().unwrap() = 0xCD; buf.set_len(4096); let slice = buf.as_slice(); assert_eq!(slice.first().copied(), Some(0xAB)); @@ -374,7 +384,7 @@ mod tests { let initial = buf.as_ptr(); // Write some content + set_len; pointer must not move // (no reallocation: AlignedBuf has no growth API). - *buf.spare_capacity_mut().first_mut().unwrap() = 1; + *buf.as_capacity_mut().first_mut().unwrap() = 1; buf.set_len(1); assert_eq!(buf.as_ptr(), initial); } From f64b24a7916f7c93997fa901eb39c36accb14e99 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sat, 23 May 2026 19:23:10 +0300 Subject: [PATCH 06/14] docs(fs): explain std-only status of direct_io module MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Spell out that the entire fs::* backend is std-bound today and that the gate-unit is the whole backend (tracked under no-std migration epic #274), not individual sub-modules. Gating direct_io alone while its consumers (StdFs, IoUringFs) stay ungated would be a no-op — std_fs would drag it in transitively. Documents the design choice in the module header so the same question doesn't get raised again. --- src/fs/direct_io.rs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/fs/direct_io.rs b/src/fs/direct_io.rs index 341af54aa..a405c6930 100644 --- a/src/fs/direct_io.rs +++ b/src/fs/direct_io.rs @@ -15,6 +15,21 @@ //! the flag is best-effort, may be silently dropped, and correctness //! must not depend on it. //! +//! # `std` dependency +//! +//! This module touches `std::fs::OpenOptions` directly, so it is +//! std-only. No `#[cfg(feature = "std")]` gate is added here on +//! purpose: its sole consumers — [`StdFs`] and [`IoUringFs`] — +//! are themselves unconditionally std-bound today (the entire +//! `fs::*` backend builds on `std::fs`). Gating *only* this module +//! while leaving the consumers ungated would be a no-op — the std +//! backend would still compile and drag this module in +//! transitively, then fail. The unit of gating is the whole +//! `fs::*` std backend; that move is tracked under the no-std +//! migration epic (issue `#274`), where the +//! `#[cfg(feature = "std")]` gate will land on `pub mod fs::std_fs` +//! (and `io_uring_fs`) and this module follows automatically. +//! //! [`StdFs`]: super::StdFs //! [`IoUringFs`]: super::IoUringFs From 2a0b4c322b02c631f59948e8042b19c726c01bd3 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sat, 23 May 2026 19:24:19 +0300 Subject: [PATCH 07/14] docs(fs): cross-reference direct_io std-only rationale at declaration site Add a short comment next to `mod direct_io;` pointing at the full module-header explanation in direct_io.rs. The previous commit documented the design choice inside the module; this commit puts a signpost where reviewers reading fs/mod.rs first land, so they do not need to chase the rationale across files. --- src/fs/mod.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/fs/mod.rs b/src/fs/mod.rs index 1e9855fa9..35887e342 100644 --- a/src/fs/mod.rs +++ b/src/fs/mod.rs @@ -24,6 +24,12 @@ //! do not help for storage I/O patterns); [`StdFs`] is the correct choice pub mod aligned_buf; +// `direct_io` is std-only (touches `std::fs::OpenOptions`). It is +// intentionally not feature-gated here: its sole consumers `std_fs` +// and `io_uring_fs` are themselves unconditionally std-bound, so the +// effective unit of gating is the whole `fs::*` backend (tracked +// under the no-std migration epic, issue #274). See the module +// header in `direct_io.rs` for the full rationale. mod direct_io; mod mem_fs; mod std_fs; From 218bbd994cfa3d448606020ea0a4578e2e4ea9e3 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sat, 23 May 2026 19:39:02 +0300 Subject: [PATCH 08/14] refactor(fs): non_exhaustive FsOpenOptions + strict-provenance ptr + doc link fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three review follow-ups bundled because each is one-or-two lines and they share the same module surface: - `FsOpenOptions` gains `#[non_exhaustive]`. The new `direct_io` field landed in this branch is already a breaking change for downstream struct-literal callers; marking the struct non_exhaustive in the same release confines the break to exactly one release cycle and lets future fields land as semver-minor. Builder API was already complete for every field (`.read()`, `.write()`, …, `.direct_io()`), so the only callers affected are those bypassing the builder. - `direct_io.rs` module docs no longer link `[IoUringFs]` via intra-doc reference — that link breaks rustdoc when the `io-uring` feature is off (the type is `#[cfg]`-gated). Plain inline-code formatting (`` `IoUringFs` ``) renders identically in all feature configurations. - `AlignedBuf::new_zeroed` zero-capacity sentinel is now built via `core::ptr::without_provenance_mut(alignment)` instead of the raw `alignment as *mut u8` cast. Same observable address, but the API explicitly signals "address-only, no provenance, no associated allocation" — exactly what the never-dereferenced sentinel needs, and stays clean under strict-provenance lints. --- src/fs/aligned_buf.rs | 13 +++++++++---- src/fs/direct_io.rs | 5 ++--- src/fs/mod.rs | 1 + 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/src/fs/aligned_buf.rs b/src/fs/aligned_buf.rs index 0c6e37ebb..b5c293c31 100644 --- a/src/fs/aligned_buf.rs +++ b/src/fs/aligned_buf.rs @@ -125,13 +125,18 @@ impl AlignedBuf { // never dereferenced — every read/write path is bounded by // `len`, which is 0 here. if rounded == 0 { - // SAFETY: alignment is a power of two ≥ 1, so casting it - // to a pointer is well-defined and the pointer is non- - // null. We never deref past `len = 0`. + // SAFETY: alignment is a power of two ≥ 1, so the + // resulting pointer is non-null and properly aligned. + // `without_provenance_mut` constructs an address-only + // pointer (no provenance, no associated allocation) — + // exactly right for a sentinel that must never be + // dereferenced. We never deref past `len = 0`. Strict- + // provenance-friendly: avoids the integer-to-pointer + // cast lint by using the canonical exposed-address API. let dangling = { #[expect(unsafe_code, reason = "non-null dangling for 0-cap buffer")] unsafe { - NonNull::new_unchecked(alignment as *mut u8) + NonNull::new_unchecked(core::ptr::without_provenance_mut::(alignment)) } }; return Some(Self { diff --git a/src/fs/direct_io.rs b/src/fs/direct_io.rs index a405c6930..69cd7b65c 100644 --- a/src/fs/direct_io.rs +++ b/src/fs/direct_io.rs @@ -2,7 +2,7 @@ // Copyright (c) 2026-present, Structured World Foundation //! `O_DIRECT` flag application shared between [`StdFs`] and -//! [`IoUringFs`] backends. +//! `IoUringFs` backends. //! //! Lives here (rather than inline in each backend's `open()`) so the //! arch-gating list and the `O_DIRECT` bit value are defined in @@ -19,7 +19,7 @@ //! //! This module touches `std::fs::OpenOptions` directly, so it is //! std-only. No `#[cfg(feature = "std")]` gate is added here on -//! purpose: its sole consumers — [`StdFs`] and [`IoUringFs`] — +//! purpose: its sole consumers — [`StdFs`] and `IoUringFs` — //! are themselves unconditionally std-bound today (the entire //! `fs::*` backend builds on `std::fs`). Gating *only* this module //! while leaving the consumers ungated would be a no-op — the std @@ -31,7 +31,6 @@ //! (and `io_uring_fs`) and this module follows automatically. //! //! [`StdFs`]: super::StdFs -//! [`IoUringFs`]: super::IoUringFs #[cfg(all( any(target_os = "linux", target_os = "android"), diff --git a/src/fs/mod.rs b/src/fs/mod.rs index 35887e342..c99153817 100644 --- a/src/fs/mod.rs +++ b/src/fs/mod.rs @@ -56,6 +56,7 @@ use std::path::{Path, PathBuf}; reason = "mirrors std::fs::OpenOptions which uses bool flags for each mode" )] #[derive(Clone, Debug)] +#[non_exhaustive] pub struct FsOpenOptions { /// Open for reading. pub read: bool, From e1eedf9c1a008187aca6e2b042dd4305487f67eb Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sat, 23 May 2026 20:09:43 +0300 Subject: [PATCH 09/14] refactor(fs): make aligned_buf module private MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `AlignedBuf` is already re-exported via `pub use aligned_buf::AlignedBuf`, so `lsm_tree::fs::AlignedBuf` stays public — the `aligned_buf::*` sub-path adds no value and locks the file structure into the public API. Other fs sibling modules (`mem_fs`, `std_fs`, `io_uring_fs`) are already private; this matches that convention. Doctest still compiles (uses the re-exported path). --- src/fs/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fs/mod.rs b/src/fs/mod.rs index c99153817..18ab9c5e4 100644 --- a/src/fs/mod.rs +++ b/src/fs/mod.rs @@ -23,7 +23,7 @@ //! - **macOS / BSD**: no batched I/O API exists (`dispatch_io` and `kqueue` //! do not help for storage I/O patterns); [`StdFs`] is the correct choice -pub mod aligned_buf; +mod aligned_buf; // `direct_io` is std-only (touches `std::fs::OpenOptions`). It is // intentionally not feature-gated here: its sole consumers `std_fs` // and `io_uring_fs` are themselves unconditionally std-bound, so the From acbc3936257703b8196bac33966ce9d8a1ff4fb6 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sat, 23 May 2026 21:08:07 +0300 Subject: [PATCH 10/14] docs(fs): explain non_exhaustive choice on FsOpenOptions inline Document the rationale next to the attribute itself so the reasoning lives at the declaration site (not only in release notes or the PR description). Pairs with the existing inline note on `mod direct_io;` that explains why that submodule is not cfg-gated. --- src/fs/mod.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/fs/mod.rs b/src/fs/mod.rs index 87ef6e1e2..57afce8e3 100644 --- a/src/fs/mod.rs +++ b/src/fs/mod.rs @@ -55,6 +55,13 @@ use std::path::{Path, PathBuf}; clippy::struct_excessive_bools, reason = "mirrors std::fs::OpenOptions which uses bool flags for each mode" )] +// `non_exhaustive` paired with the `direct_io` field landing in the +// same release. The new field already breaks struct-literal +// callers; bundling `non_exhaustive` in the same semver-major bump +// confines the break to one release and lets every future field +// land as semver-minor. Builder methods (`.read()`, `.write()`, …, +// `.direct_io()`) cover every field, so callers using the builder +// API are unaffected. #[derive(Clone, Debug)] #[non_exhaustive] pub struct FsOpenOptions { From a745ec1868ba396a08c6aea8caf07564c540ea4a Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sat, 23 May 2026 21:20:04 +0300 Subject: [PATCH 11/14] refactor(fs): gate direct_io submodule behind feature = "std" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit First concrete step toward making the `fs::*` backend honest about its std-bound surface. The wider Fs / FsFile trait + std_fs / io_uring_fs still pull std::io::{Read, Write, Seek} and std::path::Path unconditionally (no core::* equivalents), so this single gate does not unblock the no-std build by itself — that remains tracked under #274. But the `direct_io` submodule is purely a new std-side helper, so gating it now keeps the new addition honest with the policy and signals intent at the declaration site. Default-features (std) build path is unchanged. --- src/fs/mod.rs | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/fs/mod.rs b/src/fs/mod.rs index 57afce8e3..30a07d69a 100644 --- a/src/fs/mod.rs +++ b/src/fs/mod.rs @@ -25,11 +25,15 @@ mod aligned_buf; // `direct_io` is std-only (touches `std::fs::OpenOptions`). It is -// intentionally not feature-gated here: its sole consumers `std_fs` -// and `io_uring_fs` are themselves unconditionally std-bound, so the -// effective unit of gating is the whole `fs::*` backend (tracked -// under the no-std migration epic, issue #274). See the module -// header in `direct_io.rs` for the full rationale. +// gated behind the `std` feature so a `no_std + alloc` build of +// this crate does not even attempt to compile it. The wider +// `fs::*` backend (Fs / FsFile traits, std_fs, io_uring_fs) +// still depends on `std::io::{Read, Write, Seek}` + `std::path::Path` +// — those have no `core::*` equivalents, so feature-gating just +// this submodule does not yet make a no-std build work end-to-end. +// The full backend split is tracked under no-std migration epic +// (issue #274); this gate is the first step. +#[cfg(feature = "std")] mod direct_io; mod mem_fs; mod std_fs; From b3d5fef4ccd05f07b0ec1ff9567872454004f915 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sat, 23 May 2026 21:26:35 +0300 Subject: [PATCH 12/14] docs(fs): align direct_io std-gate rationale with current state Earlier rounds documented \"not gated, gate-unit is whole backend\". The submodule is now actually gated behind feature = \"std\", so the old prose contradicted the code. Rewrite both touch points (module header in direct_io.rs and the inline comment in fs/mod.rs) to match: the gate IS in place as the first concrete step; full no-std viability is blocked on porting the Fs/FsFile traits off std::io + std::path (now tracked as #311), itself part of the no-std migration epic (#274). --- src/fs/direct_io.rs | 24 +++++++++++++----------- src/fs/mod.rs | 5 +++-- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/src/fs/direct_io.rs b/src/fs/direct_io.rs index 69cd7b65c..dfee63417 100644 --- a/src/fs/direct_io.rs +++ b/src/fs/direct_io.rs @@ -18,17 +18,19 @@ //! # `std` dependency //! //! This module touches `std::fs::OpenOptions` directly, so it is -//! std-only. No `#[cfg(feature = "std")]` gate is added here on -//! purpose: its sole consumers — [`StdFs`] and `IoUringFs` — -//! are themselves unconditionally std-bound today (the entire -//! `fs::*` backend builds on `std::fs`). Gating *only* this module -//! while leaving the consumers ungated would be a no-op — the std -//! backend would still compile and drag this module in -//! transitively, then fail. The unit of gating is the whole -//! `fs::*` std backend; that move is tracked under the no-std -//! migration epic (issue `#274`), where the -//! `#[cfg(feature = "std")]` gate will land on `pub mod fs::std_fs` -//! (and `io_uring_fs`) and this module follows automatically. +//! std-only and the parent `mod direct_io;` declaration in +//! `fs/mod.rs` is gated behind `#[cfg(feature = "std")]`. That gate +//! is the first concrete honest step toward the no-std backend +//! split, but it does NOT by itself unblock a `no_std + alloc` +//! build: the wider `Fs` / `FsFile` trait surface (`std::io::{Read, +//! Write, Seek}`, `std::path::Path`) is std-bound at the trait +//! definition level, so even `MemFs` (alloc-only in its body) can't +//! compile under `--no-default-features --features alloc`. Porting +//! the traits off `std::io` / `std::path` is tracked separately +//! (issue `#311`), as a prerequisite for the rest of `fs::*` to +//! become honestly feature-gateable. When that lands, this module's +//! gate becomes load-bearing; until then it's a forward-looking +//! marker that keeps new std-side helpers honest with the policy. //! //! [`StdFs`]: super::StdFs diff --git a/src/fs/mod.rs b/src/fs/mod.rs index 30a07d69a..d4591d046 100644 --- a/src/fs/mod.rs +++ b/src/fs/mod.rs @@ -31,8 +31,9 @@ mod aligned_buf; // still depends on `std::io::{Read, Write, Seek}` + `std::path::Path` // — those have no `core::*` equivalents, so feature-gating just // this submodule does not yet make a no-std build work end-to-end. -// The full backend split is tracked under no-std migration epic -// (issue #274); this gate is the first step. +// Porting the traits off std::io / std::path is tracked as #311 +// (prerequisite); the wider no-std migration epic is #274. This +// gate is the first concrete step. #[cfg(feature = "std")] mod direct_io; mod mem_fs; From 57018b329f86a584c400f5fb06e4bdfc0184fb8e Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sat, 23 May 2026 22:24:33 +0300 Subject: [PATCH 13/14] docs(fs): disambiguate FsOpenOptions::direct_io intra-doc links to the field Both intra-doc links in direct_io.rs (module header + apply_direct_io_flag no-op variant) used the bare `FsOpenOptions::direct_io` path, which is ambiguous because the struct has both a field and a same-named builder method. Rustdoc would resolve to the method and hide the actual best- effort contract that's documented on the field. Switch to the `field@crate::fs::FsOpenOptions::direct_io` disambiguator so readers land on the contract docs. The apply.rs variant also corrected the path: it used `super::` from inside the inner `mod apply { ... }`, which resolved to the wrong parent (direct_io rather than fs). The absolute `crate::fs::*` form makes the link work from either nesting level. --- src/fs/direct_io.rs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/fs/direct_io.rs b/src/fs/direct_io.rs index dfee63417..bd7799de1 100644 --- a/src/fs/direct_io.rs +++ b/src/fs/direct_io.rs @@ -10,9 +10,10 @@ //! diverge if one was updated to support a new arch and the other //! wasn't. //! -//! Doc-contract for `direct_io` is on -//! [`FsOpenOptions::direct_io`](super::FsOpenOptions::direct_io): -//! the flag is best-effort, may be silently dropped, and correctness +//! Doc-contract for the `direct_io` flag is on the +//! [`FsOpenOptions::direct_io`](field@super::FsOpenOptions::direct_io) +//! field (disambiguated against the same-named builder method): the +//! flag is best-effort, may be silently dropped, and correctness //! must not depend on it. //! //! # `std` dependency @@ -82,7 +83,9 @@ mod apply { /// need `F_NOCACHE` via `fcntl` post-open, Windows would need /// `FILE_FLAG_NO_BUFFERING` at `CreateFile` time, divergent Linux /// arches need a different `O_DIRECT` bit — all out of scope per - /// the [`super::FsOpenOptions::direct_io`] best-effort contract. + /// the [`FsOpenOptions::direct_io`](field@crate::fs::FsOpenOptions::direct_io) + /// best-effort contract (disambiguated against the same-named + /// builder method). pub fn apply_direct_io_flag(_builder: &mut std::fs::OpenOptions, _direct_io: bool) {} } From 163c66568726671ea4a538023c828caa25e1a4c1 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Sat, 23 May 2026 23:05:43 +0300 Subject: [PATCH 14/14] refactor(fs): gate apply_direct_io_flag call sites to match direct_io's std feature MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mod direct_io; in fs/mod.rs is gated behind #[cfg(feature = "std")], but the two callers (StdFs::open, IoUringFs::open) referenced it unconditionally. Under `--no-default-features --features alloc` the submodule is absent, so each call site added one extra resolution error on top of the type-checking errors std_fs / io_uring_fs already incur from their own std::* usage. Add matching #[cfg(feature = "std")] on both call sites so feature combinations stay coherent. io_uring_fs's whole module is already gated by cfg(all(target_os = "linux", feature = "io-uring")), and io-uring transitively pulls std (see Cargo.toml), so the new predicate is redundant in isolation — kept for symmetry with StdFs and to make the dependency on direct_io's gate explicit at the call site, without requiring auditors to chase the implication through the manifest. --- src/fs/io_uring_fs.rs | 12 ++++++++++++ src/fs/std_fs.rs | 9 +++++++++ 2 files changed, 21 insertions(+) diff --git a/src/fs/io_uring_fs.rs b/src/fs/io_uring_fs.rs index 8db560456..237cbba85 100644 --- a/src/fs/io_uring_fs.rs +++ b/src/fs/io_uring_fs.rs @@ -120,6 +120,18 @@ impl Fs for IoUringFs { .truncate(opts.truncate) .append(opts.append); + // Gate matches the `mod direct_io;` declaration in `fs/mod.rs` + // — the submodule only exists when `feature = "std"` is on. + // The whole `io_uring_fs` module is already gated by + // `cfg(all(target_os = "linux", feature = "io-uring"))`, and + // the `io-uring` feature transitively enables `std` (see the + // feature declaration in Cargo.toml), so this extra + // `feature = "std"` predicate is logically redundant here. + // Kept for consistency with `StdFs::open` and to make the + // dependency on direct_io's gate explicit at the call site, + // so feature-gate audits don't have to chase the implication + // through Cargo.toml. + #[cfg(feature = "std")] super::direct_io::apply_direct_io_flag(&mut builder, opts.direct_io); let file = builder.open(path)?; diff --git a/src/fs/std_fs.rs b/src/fs/std_fs.rs index 2c961be59..292995972 100644 --- a/src/fs/std_fs.rs +++ b/src/fs/std_fs.rs @@ -116,6 +116,15 @@ impl Fs for StdFs { .truncate(opts.truncate) .append(opts.append); + // Gate matches the `mod direct_io;` declaration in `fs/mod.rs` + // — the submodule only exists when `feature = "std"` is on. + // Without the gate this site would fail to compile under + // `--no-default-features --features alloc` even before the + // wider std-bound surface of `StdFs` itself hits the trait + // signatures; keeping the cfg in sync prevents adding a + // resolution-time error on top of the type-checking ones + // already tracked under the no-std migration epic. + #[cfg(feature = "std")] super::direct_io::apply_direct_io_flag(&mut builder, opts.direct_io); let file = builder.open(path)?;