Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
746a513
feat(compression): enable dictionary compression in pure Rust backend
polaz Apr 7, 2026
aacc1f1
fix(compression): normalize ZstdDictionary::id() to always return non…
polaz Apr 7, 2026
7650e6b
perf(compression): cache FrameCompressor in TLS for pure Rust dict path
polaz Apr 7, 2026
1aec535
perf(compression): reuse source Vec capacity in TLS compress_with_dict
polaz Apr 7, 2026
59bd591
refactor(compression): extract DICT_MAGIC to module-level constant
polaz Apr 7, 2026
9ce1517
test(compression): add cross-backend raw-content dict interop tests
polaz Apr 7, 2026
a8118f2
fix(compression): guard raw-content decompress against decompression …
polaz Apr 7, 2026
e765717
refactor(compression): extract decode_raw_content_bounded helper
polaz Apr 8, 2026
4f8b969
test(compression): add raw-content dict capacity guard tests
polaz Apr 8, 2026
e8c9fb3
refactor(compression): extract decompress dispatch into named function
polaz Apr 8, 2026
381957b
docs: note ZstdDict support in zstd-pure feature section
polaz Apr 8, 2026
4c52232
ci(codecov): add zstd-pure coverage run to merge report
polaz Apr 8, 2026
8887d18
fix(compression): use read_exact to drain FrameDecoder buffer
polaz Apr 8, 2026
f8c33a2
test(compression): add unit tests for strip_dict_id and error branches
polaz Apr 8, 2026
08bd0fa
test(compression): cover bounded_read Io paths; tighten unreachable b…
polaz Apr 8, 2026
ffd9512
test(compression): directly test decode_raw_content_bounded error paths
polaz Apr 8, 2026
ac31ce6
feat(compression): enable dictionary compression in pure Rust backend
polaz Apr 8, 2026
13a8f45
test(compression): add regression tests for empty raw-content dict ro…
polaz Apr 8, 2026
53befa2
fix(compression): allow empty raw-content frames at capacity=0 in bou…
polaz Apr 8, 2026
2bfa811
ci: add MSRV to test-zstd matrix
polaz Apr 8, 2026
3f47c25
docs(compression): remove C FFI references; rename cold bench to tls_hit
polaz Apr 8, 2026
299cfe2
docs(compression): correct DICT_MAGIC endian notation; expand ZstdDic…
polaz Apr 8, 2026
44bb55e
fix(compression): use checked_add for overflow guard; reuse cached di…
polaz Apr 8, 2026
f6dde9f
docs(compression): TLS cache is backend-internal; ZstdDictionary hold…
polaz Apr 8, 2026
a6200d3
build(deps): update structured-zstd 0.0.7 → 0.0.10
polaz Apr 8, 2026
28676bb
test(compression): add compaction path integration test for ZstdDict
polaz Apr 8, 2026
ef55325
docs(compression): clarify ZstdDictionary::new doc and test name
polaz Apr 8, 2026
c9c7ddf
docs(compression): standardize magic byte notation and fix minor doc …
polaz Apr 8, 2026
dd0275f
test(compression): assert L0 is empty after major_compact in zstd dic…
polaz Apr 9, 2026
5f2e3a0
build(deps): update structured-zstd 0.0.10 → 0.0.11
polaz Apr 9, 2026
eb34b7f
docs(compression): document UptoBytes one-block over-decode behaviour…
polaz Apr 9, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v6
Comment thread
polaz marked this conversation as resolved.

- name: Generate bot token
id: bot-token
Expand Down
22 changes: 22 additions & 0 deletions .github/workflows/coordinode-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,28 @@ jobs:
working-directory: tools/db_bench
run: cargo check --all-features

test-zstd-pure:
needs: lint
timeout-minutes: 15
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
- uses: dtolnay/rust-toolchain@stable
Comment thread
polaz marked this conversation as resolved.
Outdated
with:
toolchain: stable
- uses: Swatinem/rust-cache@v2
with:
prefix-key: ubuntu-cargo-zstd-pure
- uses: taiki-e/install-action@nextest
- name: Clippy (pure backend)
run: cargo clippy --no-default-features --features zstd-pure,lz4 --all-targets -- -D warnings
- name: Run tests (zstd-pure backend, no C zstd)
# zstd_pure_dict integration tests are gated with
# #[cfg(all(feature = "zstd-pure", not(feature = "zstd")))], so they
# are skipped by --all-features. Run without "zstd" to exercise the
# pure Rust dictionary compression path independently.
run: cargo nextest run --profile ci --no-default-features --features zstd-pure,lz4
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated

cross:
needs: lint
timeout-minutes: 15
Expand Down
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@ can be decompressed by the other. When both `zstd` and `zstd-pure` are enabled,
the C FFI backend takes precedence.

**Current limitations:**
- Dictionary compression is not yet supported (dictionary decompression works)
- Decompression throughput is ~2–3.5× slower than the C reference

Comment thread
coderabbitai[bot] marked this conversation as resolved.
*Disabled by default.*
Expand Down
26 changes: 20 additions & 6 deletions src/compression/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ use std::sync::Arc;
/// This trait abstracts the zstd implementation behind a compile-time
/// selected backend. The C FFI backend (`zstd` feature) provides full
/// compression levels 1–22 and dictionary support. The pure Rust backend
/// (`zstd-pure` feature) provides compression levels 1–22 with no C
/// dependencies (dictionary compression not yet supported).
/// (`zstd-pure` feature) provides compression levels 1–22 and dictionary
/// support with no C dependencies.
///
/// Both backends produce RFC 8878 compliant zstd frames, so data
/// compressed by one can be decompressed by the other.
Expand All @@ -35,7 +35,13 @@ pub trait CompressionProvider {
/// Decompress a zstd frame, pre-allocating `capacity` bytes.
fn decompress(data: &[u8], capacity: usize) -> crate::Result<Vec<u8>>;

/// Compress `data` using a pre-trained dictionary.
/// Compress `data` using a zstd dictionary.
///
/// `dict_raw` may be either a finalized zstd dictionary (magic `0x37A430EC`
/// header, entropy tables, content — produced by `zstd --train` or
Comment thread
polaz marked this conversation as resolved.
Outdated
/// [`ZstdDictionary::raw`]) or a raw content dictionary (bare bytes used as
/// LZ77 history). Both the C FFI backend and the pure Rust backend accept
/// either representation.
Comment thread
polaz marked this conversation as resolved.
Outdated
fn compress_with_dict(data: &[u8], level: i32, dict_raw: &[u8]) -> crate::Result<Vec<u8>>;

/// Decompress a zstd frame that was compressed with a dictionary.
Expand Down Expand Up @@ -151,9 +157,17 @@ impl ZstdDictionary {

/// Returns a 32-bit dictionary fingerprint (lower 32 bits of xxh3).
///
/// Intended for display and external interop (e.g., matching against the
/// dict ID embedded in a zstd frame header). For internal cache keying
/// use [`id64`](ZstdDictionary::id64) to avoid hash collisions.
/// Intended for config validation (matching a `CompressionType::ZstdDict`
/// `dict_id` against the supplied `ZstdDictionary`) and external interop.
///
/// The value is the raw lower 32 bits of xxh3 and may theoretically be `0`
/// (probability ≈ 1/2³²). Backends that embed a dict ID in the zstd frame
/// header (where id=0 is reserved) are responsible for clamping to at
/// least 1 themselves. Config validation is unaffected: both sides derive
/// the ID from the same bytes and therefore agree even in the zero case.
///
/// For internal cache keying use [`id64`](ZstdDictionary::id64) to avoid
/// hash collisions.
Comment thread
polaz marked this conversation as resolved.
Outdated
#[must_use]
#[expect(
clippy::cast_possible_truncation,
Expand Down
Loading
Loading