Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions .github/workflows/coordinode-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,40 @@
- name: Run tests (zstd backend)
run: cargo nextest run --profile ci --no-default-features --features zstd,lz4

no-std-check:
# Tracks no-std + alloc migration progress. The crate is incrementally
# being ported to support `#![no_std]` builds: modules with std-only
# dependencies stay gated behind `#[cfg(feature = "std")]` until they
# are migrated. This job runs `cargo check --no-default-features
# --features alloc` on a true no-std-only target (the `thumbv7em-none-eabihf`
# cross-compile target has no std at all, so any leaked `std::*` import
# surfaces here even if the host toolchain has std available).
#
# The job is currently EXPECTED TO FAIL — that's the whole point. It is
# the canonical progress meter for the migration: the number of compile
# errors must monotonically decrease across PRs until it reaches zero,
# at which point the `continue-on-error: true` flag below is removed and
# this becomes a gating check. New code added to this crate MUST be
# no-std-friendly (prefer `core::*` / `alloc::*`, gate std-only behind
# `#[cfg(feature = "std")]`); reviewers should request the error count
# from this job and compare against main.
needs: lint
continue-on-error: true
timeout-minutes: 10
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
- uses: dtolnay/rust-toolchain@stable
Comment thread
polaz marked this conversation as resolved.
with:
toolchain: "1.92.0"
targets: thumbv7em-none-eabihf
- uses: Swatinem/rust-cache@v2
Comment thread
polaz marked this conversation as resolved.
with:
prefix-key: ubuntu-cargo-no-std
- name: Check (no_std + alloc)
run: cargo check --target thumbv7em-none-eabihf --no-default-features --features alloc

cross:

Check warning

Code scanning / CodeQL

Workflow does not contain permissions Medium

Actions job or workflow does not limit the permissions of the GITHUB_TOKEN. Consider setting an explicit permissions block, using the following as a minimal starting point: {contents: read}
needs: lint
timeout-minutes: 15
strategy:
Expand Down
27 changes: 25 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,24 @@ name = "lsm_tree"
path = "src/lib.rs"

[features]
default = []
io-uring = ["dep:io-uring"]
# `std` is the canonical platform feature. Default-on so every existing
# downstream consumer keeps the current build profile (file I/O, threads,
# `std::sync` primitives, system clock). Turning it OFF puts the crate
# into no-std + alloc mode — currently in progress: only foundational
# modules compile without `std`, the rest still imports `std::*` directly
# and is gated to migrate incrementally. The `cargo check
# --no-default-features --features alloc` CI job tracks migration progress
# (expected to fail until each module is ported). New code added to this
# crate MUST be no-std-friendly: prefer `core::*` / `alloc::*`, gate
# std-only primitives behind `#[cfg(feature = "std")]`, and pick external
# crates that support no-std.
default = ["std"]
std = []
# `alloc` is the minimal hard requirement — the crate uses `Arc`,
# `Vec`, `Box` everywhere. Listed explicitly so consumers can pick it up
# without enabling `std` once the no-std migration completes per-module.
alloc = []
io-uring = ["dep:io-uring", "std"]
lz4 = ["dep:lz4_flex"]
Comment thread
polaz marked this conversation as resolved.
Outdated
# The previous `zstd-pure = ["zstd"]` alias was removed. It was
# documented as deprecated when there were two candidate zstd backends
Expand Down Expand Up @@ -61,6 +77,13 @@ interval-heap = "0.0.5"
log = "0.4.27"
lz4_flex = { version = "0.13.0", optional = true, default-features = false }
structured-zstd = { version = "0.0.21", optional = true, default-features = false, features = ["std"] }
# `once_cell::sync::OnceCell` has stable `get_or_try_init`; `std::sync::OnceLock`
# does not until 1.86+ (still unstable on our 1.92 MSRV path via `once_cell_try`).
# Use the external crate for the canonical single-parse-across-racers primitive
# without an auxiliary `Mutex`. The `race` module from this same crate provides
# the no-std + alloc variant (`OnceBox`) we'll swap to during the no-std
# migration.
once_cell = "1"
Comment thread
polaz marked this conversation as resolved.
Outdated
quick_cache = { version = "0.6.16", default-features = false, features = [] }
rustc-hash = "2.1.1"
self_cell = "1.2.0"
Expand Down
178 changes: 178 additions & 0 deletions src/compression/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ use std::io::{Read, Write};
#[cfg(zstd_any)]
use std::sync::Arc;

#[cfg(feature = "zstd")]
use once_cell::sync::OnceCell;

/// Zstd compression backend operations.
///
/// Abstracts the zstd implementation so callsites are independent of the
Expand Down Expand Up @@ -78,6 +81,16 @@ pub struct ZstdDictionary {
/// the lower 32 bits for external consumers.
id: u64,
raw: Arc<[u8]>,
/// Lazily-parsed shared `DictionaryHandle` (Arc-backed inside structured-zstd).
/// Populated on first decompress call and reused across all subsequent calls
/// and all threads — eliminates the per-thread dictionary re-parse the TLS
/// `FrameDecoder` cache used to incur on every miss. `OnceCell::get_or_try_init`
/// guarantees exactly one successful parse across racing threads with no
/// auxiliary mutex: losers wait on the internal one-shot synchronisation
/// primitive and reuse the winner's `Arc` clone. This keeps the `new()`
/// constructor infallible AND preserves the single-parse contract.
#[cfg(feature = "zstd")]
prepared: Arc<OnceCell<structured_zstd::decoding::DictionaryHandle>>,
}

#[cfg(zstd_any)]
Expand All @@ -86,6 +99,8 @@ impl Clone for ZstdDictionary {
Self {
id: self.id,
raw: Arc::clone(&self.raw),
#[cfg(feature = "zstd")]
prepared: Arc::clone(&self.prepared),
}
}
}
Expand Down Expand Up @@ -130,9 +145,62 @@ impl ZstdDictionary {
Self {
id: compute_dict_id(raw),
raw: Arc::from(raw),
#[cfg(feature = "zstd")]
prepared: Arc::new(OnceCell::new()),
}
}

/// Returns the shared pre-parsed `DictionaryHandle`, parsing on first call
/// and reusing the cached handle on every subsequent call (across threads).
///
/// The handle wraps an `Arc<Dictionary>` inside structured-zstd, so cloning
/// it is an atomic refcount bump — cheap enough to use on every decompress
/// call. Frame decoders register the dictionary via
/// `FrameDecoder::add_dict_handle`, which shares the same `Arc` rather than
/// cloning the underlying entropy tables.
///
/// On the first call we attempt finalized-dict parsing (magic bytes
/// `37 A4 30 EC`); buffers without that prefix are treated as raw-content
/// dictionaries via `Dictionary::from_raw_content` with the same synthetic
/// 32-bit id formula the compressor uses (`xxh3(raw) as u32, clamped ≥ 1`).
/// Parse failures are NOT cached — the next caller will retry — but the
/// raw bytes are immutable for the dictionary's lifetime so a successful
/// parse on one thread is permanent.
#[cfg(feature = "zstd")]
pub(crate) fn prepared_handle(
&self,
) -> crate::Result<structured_zstd::decoding::DictionaryHandle> {
use structured_zstd::decoding::{Dictionary, DictionaryHandle};
const DICT_MAGIC: [u8; 4] = [0x37, 0xA4, 0x30, 0xEC];

// `get_or_try_init` is the canonical single-init-across-racers
// primitive: the closure runs at most once globally regardless of
// contention; concurrent callers wait on the OnceCell's internal
// one-shot synchronisation and read the cached `Arc` afterwards.
// The fast path (cached value) is lock-free; the slow path runs
// exactly once per `ZstdDictionary` lifetime even under heavy
// cold-start contention. On a parse failure the OnceCell stays
// empty and the next caller retries from scratch — preserving
// the retry-on-failure contract pinned by the rejection test.
self.prepared
.get_or_try_init(|| -> crate::Result<DictionaryHandle> {
if self.raw.starts_with(&DICT_MAGIC) {
DictionaryHandle::decode_dict(&self.raw)
.map_err(|e| crate::Error::Io(std::io::Error::other(e)))
} else {
#[expect(
clippy::cast_possible_truncation,
reason = "intentional: lower 32 bits of xxh3 as internal dict id (matches compressor)"
)]
let raw_content_id = (self.id as u32).max(1);
let dict = Dictionary::from_raw_content(raw_content_id, self.raw.to_vec())
.map_err(|e| crate::Error::Io(std::io::Error::other(e)))?;
Ok(DictionaryHandle::from_dictionary(dict))
}
})
.cloned()
}

/// Returns a 32-bit fingerprint derived from the dictionary content.
///
/// The fingerprint is the lower 32 bits of the xxh3-64 hash of the raw
Expand Down Expand Up @@ -562,5 +630,115 @@ mod tests {
assert!(debug.contains("ZstdDictionary"));
assert!(debug.contains("size: 4"));
}

// --- prepared_handle: pre-parsed `DictionaryHandle` cache ---
//
// The whole point of #232: parse the dictionary ONCE per
// `ZstdDictionary` instance and reuse the Arc-backed handle on every
// subsequent decompress call, across all threads. The tests below
// pin the contract: success / memoization / shared-OnceCell-across-
// clones / both finalized + raw-content paths / error surfacing.

#[cfg(feature = "zstd")]
#[test]
fn prepared_handle_raw_content_dict_parses_and_memoizes() {
// Raw-content path: no magic prefix. structured-zstd builds a
// `Dictionary` from the bytes treated as LZ77 history. First
// call parses; second call must hit the OnceCell cache and
// return a handle that compares-equal to the first.
let dict = ZstdDictionary::new(b"raw-content training bytes here");
let h1 = dict
.prepared_handle()
.expect("first call must parse raw-content dict");
let h2 = dict
.prepared_handle()
.expect("second call must hit the cache");
assert_eq!(
h1.id(),
h2.id(),
"cached handle must report the same dict id"
);
}

#[cfg(feature = "zstd")]
#[test]
fn prepared_handle_rejects_corrupted_finalized_magic() {
// Bytes that LOOK like a finalized dict (magic prefix matches)
// but are otherwise malformed must surface a parse error
// through `prepared_handle` rather than panicking. The OnceCell
// must NOT be populated with anything on failure — otherwise a
// future caller would skip the (now-deterministically-failing)
// parse and silently fall back to a stale cached value, breaking
// the retry-on-failure contract.
let mut bad = vec![0x37, 0xA4, 0x30, 0xEC]; // valid magic
bad.extend_from_slice(&[0xFF; 16]); // garbage payload
let dict = ZstdDictionary::new(&bad);
let result = dict.prepared_handle();
assert!(
result.is_err(),
"corrupted finalized dict must surface parse error",
);
assert!(
dict.prepared.get().is_none(),
"failed parse must NOT populate the OnceCell — retry-on-failure contract",
);
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.

#[cfg(feature = "zstd")]
#[test]
fn prepared_handle_shared_across_clones() {
// `ZstdDictionary::clone` shares the inner `Arc<OnceCell<…>>`.
// Parsing through one clone must be visible to the other —
// otherwise each clone would re-parse independently, defeating
// the purpose of the cache when dictionaries are distributed
// across threads via clones.
let dict_a = ZstdDictionary::new(b"shared dict bytes for clone test");
let dict_b = dict_a.clone();

let _ = dict_a
.prepared_handle()
.expect("parse via dict_a must succeed");
// After dict_a parsed, dict_b's OnceCell (same Arc) must be
// populated. We cannot directly observe "did not re-parse"
// without instrumentation, but we can assert the cached
// handle round-trips through dict_b and reports the same id.
let h_b = dict_b
.prepared_handle()
.expect("dict_b must see cached handle");
assert_eq!(h_b.id(), dict_a.id());
// Cross-check OnceCell state directly: it is .get()-readable
// from both clones.
assert!(
dict_b.prepared.get().is_some(),
"OnceCell must be populated on dict_b after dict_a parsed",
);
}

#[cfg(feature = "zstd")]
#[test]
fn prepared_handle_is_lazy_and_populated_after_first_call() {
// The cache contract is lazy-init: `ZstdDictionary::new` must
// NOT eagerly parse, and the OnceCell must transition from
// `None` to `Some(_)` precisely on the first `prepared_handle`
// call. This pins both halves of the contract — a regression
// either way (eager parse OR no caching) lights up the assert.
//
// The end-to-end "real finalized dict parses successfully" path
// is exercised by the existing `zstd_backend` round-trip suite
// (which feeds real compressed frames through `decompress_with_dict`,
// implicitly going through `prepared_handle`); duplicating the
// dict-builder here would require linking the zstd dict trainer
// and adds no coverage over what the backend tests already give.
let dict = ZstdDictionary::new(b"laziness test bytes");
assert!(
dict.prepared.get().is_none(),
"ZstdDictionary::new must NOT eagerly parse the dictionary",
);
let _ = dict.prepared_handle().expect("explicit parse must succeed");
assert!(
dict.prepared.get().is_some(),
"OnceCell must be populated after first prepared_handle call",
);
}
}
}
52 changes: 16 additions & 36 deletions src/compression/zstd_backend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -464,26 +464,22 @@ impl CompressionProvider for ZstdProvider {
dict: &crate::compression::ZstdDictionary,
capacity: usize,
) -> crate::Result<Vec<u8>> {
use structured_zstd::decoding::{Dictionary, FrameDecoder};
use structured_zstd::decoding::FrameDecoder;

// Thread-local `FrameDecoder` with the dictionary pre-loaded.
//
// Parsing a zstd dictionary involves building Huffman and FSE decoding
// tables — expensive relative to per-block decompression for the small
// 4–64 KiB blocks typical in LSM-trees. This single-entry cache
// amortises that cost: if the active dictionary (identified by its
// 64-bit xxh3 fingerprint) matches the stored entry the decoder is
// reused; otherwise the entry is replaced.
// `FrameDecoder` is not `Send`, so we keep one per thread. The cached
// entry is keyed by the full 64-bit xxh3 fingerprint (`dict.id64()`),
// not the truncated 32-bit public fingerprint, to avoid decoder reuse
// when two distinct dictionaries happen to share the same 32-bit
// prefix. A 64-bit collision is 2^32× less likely than a 32-bit one.
//
// Thread-local storage is appropriate because `FrameDecoder` is not
// `Send` and each thread decompresses independently; no mutex is
// needed. If the active dictionary changes (e.g. different table),
// the decoder is re-initialised transparently.
// On miss we register the *pre-parsed* dictionary handle held by the
// `ZstdDictionary` itself (lazy-parsed once, shared via Arc inside
// structured-zstd). This eliminates the per-thread `Dictionary` re-parse
// the cache used to do on every miss — the dictionary's entropy tables
// are built once globally and the FrameDecoder just shares the Arc.
thread_local! {
// Keyed by the full 64-bit xxh3 fingerprint (`dict.id64()`), not
// the truncated 32-bit public fingerprint, to avoid decoder reuse
// when two distinct dictionaries happen to share the same 32-bit
// prefix. A 64-bit collision is 2^32× less likely than a 32-bit one.
static TLS_DECODER: std::cell::RefCell<Option<(u64, FrameDecoder)>> =
const { std::cell::RefCell::new(None) };
}
Expand All @@ -505,29 +501,13 @@ impl CompressionProvider for ZstdProvider {
// Re-initialise if this is the first call in this thread or if
// the dictionary has changed (different id64 → different table).
if !matches!(&*state, Some((id, _)) if *id == dict.id64()) {
// Mirror the format-detection logic in `compress_with_dict`:
// finalized dictionaries (magic bytes `37 A4 30 EC`) are parsed with
// `decode_dict`; raw content bytes use `from_raw_content` with
// the same synthetic id formula as `compress_with_dict` so that
// `force_dict` can locate the dict in the internal dicts map.
let parsed = if dict.raw().starts_with(&DICT_MAGIC) {
Dictionary::decode_dict(dict.raw())
.map_err(|e| crate::Error::Io(std::io::Error::other(e)))?
} else {
#[expect(
clippy::cast_possible_truncation,
reason = "intentional: lower 32 bits of xxh3 as internal dict id"
)]
let raw_content_id = {
let h = xxhash_rust::xxh3::xxh3_64(dict.raw()) as u32;
h.max(1) // id=0 is rejected; used internally for force_dict keying
};
Dictionary::from_raw_content(raw_content_id, dict.raw().to_vec())
.map_err(|e| crate::Error::Io(std::io::Error::other(e)))?
};
// Pull the shared pre-parsed handle from the dictionary. First
// caller across all threads parses; everyone after gets an Arc
// clone of the cached entropy tables.
let handle = dict.prepared_handle()?;
let mut decoder = FrameDecoder::new();
decoder
.add_dict(parsed)
.add_dict_handle(handle)
.map_err(|e| crate::Error::Io(std::io::Error::other(e)))?;
*state = Some((dict.id64(), decoder));
}
Expand Down
15 changes: 15 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,15 @@
// Copyright (c) 2024-present, fjall-rs
// Copyright (c) 2026-present, Structured World Foundation

// no-std foundation: when the `std` feature is OFF the crate root opts into
// `no_std`. Default builds keep `std` enabled (file I/O, threading, system
// clock all live in `std`), so existing consumers see no behaviour change.
// The migration to a fully no-std-clean build is incremental — modules with
// std-only dependencies stay gated behind `#[cfg(feature = "std")]` until
// they are ported. The CI job `no-std-check` exercises `cargo check
// --no-default-features --features alloc` and tracks remaining work.
#![cfg_attr(not(feature = "std"), no_std)]

//! Embedded LSM-tree storage engine.
//!
//! Provides keyed point reads, prefix and range scans, MVCC snapshots, block
Expand Down Expand Up @@ -65,6 +74,12 @@
#![warn(clippy::redundant_feature_names)]
#![cfg_attr(coverage_nightly, feature(coverage_attribute))]

// `alloc` is the minimal hard dependency — the crate uses `Arc`, `Vec`,
// `Box`, and other heap types throughout. `extern crate alloc` makes the
// `alloc` crate root visible to `no_std` builds; under `std` it is a
// no-op alias because the standard library re-exports the same types.
extern crate alloc;

#[doc(hidden)]
pub type HashMap<K, V> = std::collections::HashMap<K, V, rustc_hash::FxBuildHasher>;

Expand Down
Loading