Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
746a513
feat(compression): enable dictionary compression in pure Rust backend
polaz Apr 7, 2026
aacc1f1
fix(compression): normalize ZstdDictionary::id() to always return non…
polaz Apr 7, 2026
7650e6b
perf(compression): cache FrameCompressor in TLS for pure Rust dict path
polaz Apr 7, 2026
1aec535
perf(compression): reuse source Vec capacity in TLS compress_with_dict
polaz Apr 7, 2026
59bd591
refactor(compression): extract DICT_MAGIC to module-level constant
polaz Apr 7, 2026
9ce1517
test(compression): add cross-backend raw-content dict interop tests
polaz Apr 7, 2026
a8118f2
fix(compression): guard raw-content decompress against decompression …
polaz Apr 7, 2026
e765717
refactor(compression): extract decode_raw_content_bounded helper
polaz Apr 8, 2026
4f8b969
test(compression): add raw-content dict capacity guard tests
polaz Apr 8, 2026
e8c9fb3
refactor(compression): extract decompress dispatch into named function
polaz Apr 8, 2026
381957b
docs: note ZstdDict support in zstd-pure feature section
polaz Apr 8, 2026
4c52232
ci(codecov): add zstd-pure coverage run to merge report
polaz Apr 8, 2026
8887d18
fix(compression): use read_exact to drain FrameDecoder buffer
polaz Apr 8, 2026
f8c33a2
test(compression): add unit tests for strip_dict_id and error branches
polaz Apr 8, 2026
08bd0fa
test(compression): cover bounded_read Io paths; tighten unreachable b…
polaz Apr 8, 2026
ffd9512
test(compression): directly test decode_raw_content_bounded error paths
polaz Apr 8, 2026
ac31ce6
feat(compression): enable dictionary compression in pure Rust backend
polaz Apr 8, 2026
13a8f45
test(compression): add regression tests for empty raw-content dict ro…
polaz Apr 8, 2026
53befa2
fix(compression): allow empty raw-content frames at capacity=0 in bou…
polaz Apr 8, 2026
2bfa811
ci: add MSRV to test-zstd matrix
polaz Apr 8, 2026
3f47c25
docs(compression): remove C FFI references; rename cold bench to tls_hit
polaz Apr 8, 2026
299cfe2
docs(compression): correct DICT_MAGIC endian notation; expand ZstdDic…
polaz Apr 8, 2026
44bb55e
fix(compression): use checked_add for overflow guard; reuse cached di…
polaz Apr 8, 2026
f6dde9f
docs(compression): TLS cache is backend-internal; ZstdDictionary hold…
polaz Apr 8, 2026
a6200d3
build(deps): update structured-zstd 0.0.7 → 0.0.10
polaz Apr 8, 2026
28676bb
test(compression): add compaction path integration test for ZstdDict
polaz Apr 8, 2026
ef55325
docs(compression): clarify ZstdDictionary::new doc and test name
polaz Apr 8, 2026
c9c7ddf
docs(compression): standardize magic byte notation and fix minor doc …
polaz Apr 8, 2026
dd0275f
test(compression): assert L0 is empty after major_compact in zstd dic…
polaz Apr 9, 2026
5f2e3a0
build(deps): update structured-zstd 0.0.10 → 0.0.11
polaz Apr 9, 2026
eb34b7f
docs(compression): document UptoBytes one-block over-decode behaviour…
polaz Apr 9, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@ jobs:
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- uses: actions/checkout@v4
# v6 requires runner ≥2.327.1 (node24 runtime) — github-hosted ubuntu-latest satisfies this.
# Credentials are NOT read from .git/config here; git pushes use the explicit App token
# generated by the create-github-app-token step below.
- uses: actions/checkout@v6
Comment thread
polaz marked this conversation as resolved.

- name: Generate bot token
id: bot-token
Expand Down
26 changes: 26 additions & 0 deletions .github/workflows/coordinode-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,28 @@ jobs:
working-directory: tools/db_bench
run: cargo check --all-features

test-zstd:
needs: lint
timeout-minutes: 15
strategy:
fail-fast: true
matrix:
rust: [stable, "1.92.0"]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
- uses: dtolnay/rust-toolchain@stable
with:
toolchain: ${{ matrix.rust }}
- uses: Swatinem/rust-cache@v2
with:
prefix-key: ubuntu-cargo-zstd-${{ matrix.rust }}
- uses: taiki-e/install-action@nextest
- name: Clippy (zstd backend)
run: cargo clippy --no-default-features --features zstd,lz4 --all-targets -- -D warnings
- name: Run tests (zstd backend)
run: cargo nextest run --profile ci --no-default-features --features zstd,lz4

cross:
needs: lint
timeout-minutes: 15
Expand Down Expand Up @@ -97,6 +119,10 @@ jobs:
- uses: taiki-e/install-action@nextest
# proptest cases: 32 hardcoded in ProptestConfig
- run: cargo +nightly llvm-cov --no-report nextest --all-features
# zstd feature: run with a narrower feature set (zstd + lz4, no defaults)
# to validate the zstd backend in isolation. --all-features already enables
# zstd (zstd-pure is an alias), so this step covers the non-default path.
- run: cargo +nightly llvm-cov --no-report nextest --no-default-features --features zstd,lz4
- run: cargo +nightly llvm-cov --no-report --doc --features lz4
- run: cargo +nightly llvm-cov report --doctests --lcov --output-path lcov.info
- uses: codecov/codecov-action@v5
Expand Down
7 changes: 3 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ path = "src/lib.rs"
default = []
io-uring = ["dep:io-uring"]
lz4 = ["dep:lz4_flex"]
zstd = ["dep:zstd"]
zstd-pure = ["dep:structured-zstd"]
zstd = ["dep:structured-zstd"]
zstd-pure = ["zstd"]
encryption = ["dep:aes-gcm", "dep:rand_chacha"]
bytes_1 = ["dep:bytes"]
metrics = []
Expand All @@ -34,8 +34,7 @@ enum_dispatch = "0.3.13"
interval-heap = "0.0.5"
log = "0.4.27"
lz4_flex = { version = "0.13.0", optional = true, default-features = false }
zstd = { version = "0.13", optional = true, default-features = false }
structured-zstd = { version = "0.0.7", optional = true, default-features = false, features = ["std"] }
structured-zstd = { version = "0.0.11", optional = true, default-features = false, features = ["std"] }
quick_cache = { version = "0.6.16", default-features = false, features = [] }
Comment thread
polaz marked this conversation as resolved.
Comment thread
polaz marked this conversation as resolved.
rustc-hash = "2.1.1"
self_cell = "1.2.0"
Expand Down
20 changes: 8 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,26 +59,22 @@ Allows using `LZ4` compression, powered by [`lz4_flex`](https://github.com/PSeit

### zstd

Allows using `Zstd` compression via C FFI bindings to libzstd, powered by [`zstd`](https://github.com/gyscos/zstd-rs).
Allows using `Zstd` compression via a pure Rust implementation, powered by
[`structured-zstd`](https://github.com/structured-world/structured-zstd) (managed fork of ruzstd).
Requires no C compiler or system libraries — compiles with `cargo build` alone.
Supports both regular zstd (`CompressionType::Zstd`) and dictionary compression
(`CompressionType::ZstdDict`) for improved ratios on small table blocks (4–64 KiB).
Blob-file dictionary compression is currently not supported.

**Current limitations:**
- Decompression throughput is ~2–3.5× slower than the C reference implementation

*Disabled by default.*

### zstd-pure

Allows using `Zstd` compression via a pure Rust implementation, powered by
[`structured-zstd`](https://github.com/structured-world/structured-zstd) (managed fork of ruzstd).
Requires no C compiler or system libraries — compiles with `cargo build` alone.

Both backends produce RFC 8878-compliant zstd frames, so data compressed by one
can be decompressed by the other. When both `zstd` and `zstd-pure` are enabled,
the C FFI backend takes precedence.

**Current limitations:**
- Dictionary compression is not yet supported (dictionary decompression works)
- Decompression throughput is ~2–3.5× slower than the C reference
Deprecated alias for `zstd`. Enabling `zstd-pure` is equivalent to enabling `zstd`
and will be removed in a future release.

Comment thread
coderabbitai[bot] marked this conversation as resolved.
*Disabled by default.*

Expand Down
36 changes: 17 additions & 19 deletions benches/zstd_dict.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,21 @@

//! Benchmark: per-block zstd dictionary decompression latency.
//!
//! Measures the cost of `decompress_with_dict` for a single compressed block,
//! both cold (first call, dictionary not yet cached in the backend's TLS /
//! `OnceLock`) and warm (subsequent calls, dictionary already cached).
//! Measures the cost of `decompress_with_dict` for a single compressed block.
//! Two scenarios are covered:
//!
//! - **`warm`** — steady-state per-block cost with a long-lived `ZstdDictionary`
//! handle; the TLS `FrameDecoder` is pre-populated before timing starts.
//! - **`tls_hit`** — each iteration receives a *fresh* `ZstdDictionary` handle,
//! but because all iterations share the same dictionary bytes (same xxh3 hash
//! key), the thread-local `FrameDecoder` remains cached across iterations.
//! This measures the steady-state per-block cost when callers reconstruct the
//! handle on every operation.
//!
//! Run with:
//!
//! ```text
//! cargo bench --bench zstd_dict --features zstd # C FFI backend
//! cargo bench --bench zstd_dict --features zstd-pure # pure Rust backend
//! cargo bench --bench zstd_dict --features zstd
//! ```

use criterion::{Criterion, criterion_group, criterion_main};
Expand Down Expand Up @@ -74,20 +80,12 @@ fn bench_decompress_with_dict(c: &mut Criterion) {
});
});

// "Cold" benchmark: each iteration gets a fresh `ZstdDictionary` handle
// (new `OnceLock` for the C FFI backend, same dict bytes for both).
//
// For the C FFI backend this truly measures first-call cost: a fresh
// `ZstdDictionary` has an unpopulated `OnceLock`, so `ZSTD_createDDict`
// is invoked on the first decompression and cached in the handle.
//
// For the pure Rust backend the result is different: the TLS decoder is
// keyed by the 64-bit content hash. All iterations share the same DICT
// bytes and therefore the same hash, so after the first iteration the TLS
// entry is still live — subsequent iterations measure the TLS-hit path, not
// dict parsing. True "cold" cost for the pure Rust backend is therefore
// only observable on the very first iteration of the first benchmark run.
c.bench_function("decompress_with_dict/cold", |b| {
// TLS-hit benchmark: each iteration gets a fresh `ZstdDictionary` handle,
// but the TLS decoder is keyed by the 64-bit content hash. All iterations
// share the same DICT bytes and therefore the same hash, so the TLS entry
// remains live across iterations — this measures the steady-state per-block
// decompression cost with the decoder already cached.
c.bench_function("decompress_with_dict/tls_hit", |b| {
Comment thread
polaz marked this conversation as resolved.
b.iter_batched(
|| ZstdDictionary::new(DICT),
|d| {
Expand Down
6 changes: 1 addition & 5 deletions build.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,7 @@
fn main() {
println!("cargo:rerun-if-env-changed=CARGO_FEATURE_ZSTD");
println!("cargo:rerun-if-env-changed=CARGO_FEATURE_ZSTD_PURE");

let zstd = std::env::var("CARGO_FEATURE_ZSTD").is_ok();
let zstd_pure = std::env::var("CARGO_FEATURE_ZSTD_PURE").is_ok();

if zstd || zstd_pure {
if std::env::var("CARGO_FEATURE_ZSTD").is_ok() {
println!("cargo:rustc-cfg=zstd_any");
}
}
120 changes: 49 additions & 71 deletions src/compression/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,7 @@
// This source code is licensed under both the Apache 2.0 and MIT License
// (found in the LICENSE-* files in the repository)

// Backend modules — only one is compiled based on feature flags.
// When both `zstd` and `zstd-pure` are enabled, C FFI takes precedence.
#[cfg(feature = "zstd")]
mod zstd_ffi;

#[cfg(all(feature = "zstd-pure", not(feature = "zstd")))]
mod zstd_pure;

use crate::coding::{Decode, Encode};
Expand All @@ -19,14 +14,9 @@ use std::sync::Arc;

/// Zstd compression backend operations.
///
/// This trait abstracts the zstd implementation behind a compile-time
/// selected backend. The C FFI backend (`zstd` feature) provides full
/// compression levels 1–22 and dictionary support. The pure Rust backend
/// (`zstd-pure` feature) provides compression levels 1–22 with no C
/// dependencies (dictionary compression not yet supported).
///
/// Both backends produce RFC 8878 compliant zstd frames, so data
/// compressed by one can be decompressed by the other.
/// Abstracts the zstd implementation so callsites are independent of the
/// underlying crate. Enabled by the `zstd` feature (pure Rust, no C
/// dependencies). Produces RFC 8878 compliant zstd frames.
#[cfg(zstd_any)]
pub trait CompressionProvider {
/// Compress `data` at the given zstd level (1–22).
Expand All @@ -35,30 +25,31 @@ pub trait CompressionProvider {
/// Decompress a zstd frame, pre-allocating `capacity` bytes.
fn decompress(data: &[u8], capacity: usize) -> crate::Result<Vec<u8>>;

/// Compress `data` using a pre-trained dictionary.
/// Compress `data` using a zstd dictionary.
///
/// `dict_raw` may be either a finalized zstd dictionary (header bytes
/// `37 A4 30 EC`, i.e. little-endian integer `0xEC30A437`, followed by
/// entropy tables and content — produced by `zstd --train`; accessible
/// via [`ZstdDictionary::raw`] for persistence and interop) or raw content
/// bytes (bare bytes used as LZ77 history). The zstd backend in this crate
/// accepts either representation.
fn compress_with_dict(data: &[u8], level: i32, dict_raw: &[u8]) -> crate::Result<Vec<u8>>;

/// Decompress a zstd frame that was compressed with a dictionary.
///
/// `dict` exposes the raw dictionary bytes **and** a lazily-initialized
/// pre-compiled form (C FFI backend: `ZSTD_DDict`; pure Rust backend:
/// cached `FrameDecoder` in thread-local storage). Backends must use the
/// prepared form to avoid re-parsing the dictionary on every call.
/// `dict` provides the raw dictionary bytes and a 64-bit fingerprint used
/// as the TLS cache key. Implementations cache the parsed decoder in
/// thread-local storage keyed by that fingerprint to avoid re-parsing the
/// dictionary on every call.
fn decompress_with_dict(
data: &[u8],
dict: &ZstdDictionary,
capacity: usize,
) -> crate::Result<Vec<u8>>;
}

/// The active zstd backend, selected at compile time.
///
/// When `zstd` (C FFI) is enabled it takes precedence; otherwise
/// `zstd-pure` (structured-zstd) is used.
/// The active zstd backend (pure Rust via `structured-zstd`).
#[cfg(feature = "zstd")]
pub type ZstdBackend = zstd_ffi::ZstdFfiProvider;

#[cfg(all(feature = "zstd-pure", not(feature = "zstd")))]
pub type ZstdBackend = zstd_pure::ZstdPureProvider;

/// Pre-trained zstd dictionary for improved compression of small blocks.
Expand All @@ -83,23 +74,10 @@ pub type ZstdBackend = zstd_pure::ZstdPureProvider;
#[cfg(zstd_any)]
pub struct ZstdDictionary {
/// Full 64-bit xxh3 hash used as the collision-resistant cache key for the
/// thread-local `FrameDecoder` in the pure Rust backend. The public
/// `id() -> u32` method returns the lower 32 bits for external consumers.
/// thread-local `FrameDecoder`. The public `id() -> u32` method returns
/// the lower 32 bits for external consumers.
id: u64,
raw: Arc<[u8]>,

/// Pre-compiled decompressor dictionary, lazily initialized on first use.
///
/// Wrapped in `Arc<OnceLock<…>>` so all clones of the same
/// `ZstdDictionary` share one compiled instance. With the C FFI backend,
/// `ZSTD_DDict` is therefore created at most once per dictionary handle,
/// regardless of how many table readers hold a clone of that handle.
///
/// Available only with the C FFI backend (`zstd` feature). The pure Rust
/// backend caches an equivalent `FrameDecoder` in thread-local storage
/// inside `decompress_with_dict` instead.
#[cfg(feature = "zstd")]
prepared: Arc<std::sync::OnceLock<zstd::dict::DecoderDictionary<'static>>>,
}

#[cfg(zstd_any)]
Expand All @@ -108,52 +86,52 @@ impl Clone for ZstdDictionary {
Self {
id: self.id,
raw: Arc::clone(&self.raw),
#[cfg(feature = "zstd")]
prepared: Arc::clone(&self.prepared),
}
}
}

#[cfg(zstd_any)]
impl ZstdDictionary {
/// Creates a new dictionary from raw bytes.
/// Creates a new dictionary handle from raw bytes.
///
/// The raw bytes should be a pre-trained zstd dictionary (e.g., output
/// of `zstd::dict::from_continuous` or `zstd --train`). The dictionary
/// ID is stored as a full 64-bit xxh3 hash; the public [`ZstdDictionary::id`]
/// method returns the lower 32 bits for external consumers.
/// `raw` may be either:
///
/// * A **finalized zstd dictionary** — bytes starting with the magic
/// `37 A4 30 EC` (as produced by `zstd --train`; accessible via
/// [`ZstdDictionary::raw`] for persistence and interop). The backend
/// parses it with the full entropy-table decoder.
/// * A **raw content dictionary** — arbitrary bytes used as LZ77 history
/// (no magic header). Useful when the caller controls the training data
/// and does not need the full entropy-table overhead.
///
/// Both forms are accepted by [`CompressionProvider::compress_with_dict`]
/// and [`CompressionProvider::decompress_with_dict`].
///
/// The handle stores the full 64-bit xxh3 hash of `raw` internally.
/// [`ZstdDictionary::id`] returns the lower 32 bits for external consumers
/// (config validation, frame header); [`ZstdDictionary::id64`] exposes the
/// full fingerprint for use as a cache key.
#[must_use]
pub fn new(raw: &[u8]) -> Self {
Self {
id: compute_dict_id(raw),
raw: Arc::from(raw),
#[cfg(feature = "zstd")]
prepared: Arc::new(std::sync::OnceLock::new()),
}
}

/// Returns the lazily-initialized pre-compiled decompressor dictionary.
///
/// On first call this copies the raw bytes into a `ZSTD_DDict` (C
/// library's opaque pre-parsed form) and caches the result inside this
/// `ZstdDictionary`. Subsequent calls — from any thread — return the
/// cached reference with no further allocation or parsing.
/// Returns a 32-bit fingerprint derived from the dictionary content.
///
/// Using this together with
/// [`zstd::bulk::Decompressor::with_prepared_dictionary`] eliminates the
/// per-block `ZSTD_createDDict` call that was previously paid on every
/// `decompress_with_dict` invocation.
#[cfg(feature = "zstd")]
pub(crate) fn decoder_dict(&self) -> &zstd::dict::DecoderDictionary<'static> {
self.prepared
.get_or_init(|| zstd::dict::DecoderDictionary::copy(&self.raw))
}

/// Returns a 32-bit dictionary fingerprint (lower 32 bits of xxh3).
/// The fingerprint is the lower 32 bits of the xxh3-64 hash of the raw
/// dictionary bytes. It is stable for a given byte sequence and is
/// intended for config validation (matching a `CompressionType::ZstdDict`
/// `dict_id` field against the supplied `ZstdDictionary`) and external
/// interop.
///
/// Intended for display and external interop (e.g., matching against the
/// dict ID embedded in a zstd frame header). For internal cache keying
/// use [`id64`](ZstdDictionary::id64) to avoid hash collisions.
/// The value may theoretically be `0` (probability ≈ 1/2³²). Backends
/// that embed a dict ID in the zstd frame header (where id=0 is reserved)
/// are responsible for clamping to at least 1 themselves. Config
/// validation is unaffected: both sides derive the ID from the same bytes
/// and therefore agree even in the zero case.
#[must_use]
#[expect(
clippy::cast_possible_truncation,
Expand All @@ -164,8 +142,8 @@ impl ZstdDictionary {
}

/// Returns the full 64-bit xxh3 fingerprint used as a collision-resistant
/// cache key inside the pure Rust backend's TLS decoder.
#[cfg(all(feature = "zstd-pure", not(feature = "zstd")))]
/// cache key inside the TLS decoder.
#[cfg(feature = "zstd")]
#[must_use]
pub(crate) fn id64(&self) -> u64 {
self.id
Expand Down
Loading
Loading