diff --git a/Cargo.toml b/Cargo.toml index 681f9c6dd..89b5612c9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "coordinode-lsm-tree" -description = "A K.I.S.S. implementation of log-structured merge trees (LSM-trees/LSMTs) — CoordiNode fork" +description = "Embedded LSM-tree storage engine: BuRR filters, zstd dictionary compression, MVCC, range tombstones, merge operators, K/V separation, AES-256-GCM at rest." license = "Apache-2.0" version = "4.5.0" edition = "2024" @@ -9,8 +9,19 @@ readme = "README.md" include = ["src/**/*", "build.rs", "LICENSE-APACHE", "README.md", "CHANGELOG.md"] repository = "https://github.com/structured-world/coordinode-lsm-tree" homepage = "https://github.com/structured-world/coordinode-lsm-tree" -keywords = ["lsm-tree", "storage", "database", "coordinode", "key-value"] -categories = ["data-structures", "database-implementations"] +documentation = "https://docs.rs/coordinode-lsm-tree" +keywords = ["lsm-tree", "storage", "database", "embedded", "key-value"] +categories = ["data-structures", "database-implementations", "filesystem", "compression"] + +[package.metadata.docs.rs] +# Build the docs.rs page with every feature enabled so the rendered +# crate page exposes the full public API surface (zstd dictionary +# compression, encryption, io-uring, bytes integration, metrics, +# ribbon-serde). cfg(docsrs) is set so #[cfg_attr(docsrs, ...)] items +# render their feature/availability badges. +all-features = true +rustdoc-args = ["--cfg", "docsrs"] +targets = ["x86_64-unknown-linux-gnu"] [lib] name = "lsm_tree" @@ -20,11 +31,26 @@ path = "src/lib.rs" default = [] io-uring = ["dep:io-uring"] lz4 = ["dep:lz4_flex"] +# The previous `zstd-pure = ["zstd"]` alias was removed. It was +# documented as deprecated when there were two candidate zstd backends +# on the roadmap; only structured-zstd remains, so the alias serves no +# purpose and is dropped per the standard deprecation lifecycle. The +# removal is signalled to release tooling as a breaking change via the +# conventional `!` markers on this PR's breaking commits (BuRR filter +# wire format, V5 manifest gate); release-plz raises the crate's major +# version on the next release tag accordingly. zstd = ["dep:structured-zstd"] -zstd-pure = ["zstd"] encryption = ["dep:aes-gcm", "dep:rand_chacha"] bytes_1 = ["dep:bytes"] metrics = [] +# Vendored Ribbon filter retains its `#[cfg(feature = "ribbon-serde")]` +# guards (renamed from upstream's bare `serde` feature to avoid clashing +# with any future top-level serde feature in this crate). We do not +# consume the serde repr from inside this crate — the BuRR on-disk +# format is byteorder-encoded — but the feature wires `serde` as an +# optional dep so `--all-features` builds and a future extraction back +# into a standalone crate compile cleanly. +ribbon-serde = ["dep:serde"] [dependencies] bytes = { version = "1", optional = true } @@ -38,6 +64,9 @@ structured-zstd = { version = "0.0.21", optional = true, default-features = fals quick_cache = { version = "0.6.16", default-features = false, features = [] } rustc-hash = "2.1.1" self_cell = "1.2.0" +# Optional — only pulled in by the vendored Ribbon filter under the +# `ribbon-serde` feature flag (off by default). +serde = { version = "1", optional = true, features = ["derive"] } sfa = "~1.0.0" tempfile = "3.20.0" varint-rs = "2.2.0" @@ -65,6 +94,9 @@ fs_extra = "1.3.0" nanoid = "0.5.0" proptest = "1" rand = "0.10.1" +# Used by the vendored ribbon-filter's #[cfg(feature = "ribbon-serde")] +# round-trip tests. Dev-only; production code does not depend on it. +serde_json = "1" strum = { version = "0.28.0", features = ["derive"] } test-log = "0.2.18" diff --git a/README.md b/README.md index 363854dde..97a438b78 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,4 @@ -

- -

+# coordinode-lsm-tree [![CI](https://github.com/structured-world/coordinode-lsm-tree/actions/workflows/coordinode-ci.yml/badge.svg)](https://github.com/structured-world/coordinode-lsm-tree/actions/workflows/coordinode-ci.yml) [![codecov](https://codecov.io/gh/structured-world/coordinode-lsm-tree/graph/badge.svg)](https://codecov.io/gh/structured-world/coordinode-lsm-tree) @@ -11,107 +9,106 @@ [![dependency status](https://deps.rs/repo/github/structured-world/coordinode-lsm-tree/status.svg)](https://deps.rs/repo/github/structured-world/coordinode-lsm-tree) [![License](https://img.shields.io/badge/license-Apache--2.0-blue)](#license) -> LSM-tree engine for [CoordiNode](https://github.com/structured-world/coordinode), maintained by [Structured World Foundation](https://sw.foundation). -> Derivative work of [fjall-rs/lsm-tree](https://github.com/fjall-rs/lsm-tree), developed independently with diverging features: zstd dictionary compression, custom sequence number generators, multi_get (batch-optimized), PinnableSlice zero-copy reads, WriteBatch seqno-grouped batch writes with caller-controlled atomic visibility, intra-L0 compaction, and security hardening. +LSM-tree storage engine in Rust. Embedded library; provides keyed point reads, prefix and range scans, MVCC snapshots, compaction, and a block cache. No write-ahead log — durability is the caller's responsibility. Built for [CoordiNode](https://github.com/structured-world/coordinode); usable standalone. -> [!IMPORTANT] -> This fork now introduces a fork-specific **disk format V4** compatibility boundary. -> `V4` is a breaking on-disk change relative to `V3` because the fork persists new semantics such as range tombstones and merge operands. -> New code may continue reading supported `V3` databases, but databases written with these `V4` semantics must not be opened by older `V3` binaries. +## Status -A K.I.S.S. implementation of log-structured merge trees (LSM-trees/LSMTs) in Rust. +On-disk format version **V5**. V5 introduces a wire-format break for filter blocks (BuRR replaces Bloom); V3 and V4 databases are not readable by this version and vice versa. Versioning is single-monotonic — every breaking format change bumps to the next version with explicit migration notes. -> [!NOTE] -> This crate only provides a primitive LSM-tree, not a full storage engine. -> For example, it does not ship with a write-ahead log. -> You probably want to use https://github.com/fjall-rs/fjall instead. +## Features -## About +### Read path -This is the most feature-rich LSM-tree implementation in Rust! It features: +- Point reads via `get` / `multi_get` (batch-optimized). +- `PinnableSlice` for zero-copy reads. +- `BurrFilter` AMQ filter (Bumped Ribbon Retrieval, Walzer & Dillinger 2022): ~1% memory overhead vs the information-theoretic minimum — ~30% smaller filter blocks than a same-FPR Bloom filter, or ~10× tighter FPR at the same memory budget. Used for both per-key and per-prefix membership checks. +- Forward and reverse range / prefix iteration. +- Block cache with size cap. +- File-descriptor cache to bound `fopen` syscalls. -- Thread-safe `BTreeMap`-like API -- Mostly [safe](./UNSAFE.md) & 100% stable Rust -- Block-based tables with compression support & prefix truncation - - Optional block hash indexes in data blocks for faster point lookups [[3]](#footnotes) - - Per-level filter/index block pinning configuration -- Range & prefix searching with forward and reverse iteration -- Block caching to keep hot data in memory -- File descriptor caching with upper bound to reduce `fopen` syscalls -- *AMQ* filters (currently Bloom filters) to improve point lookup performance -- Multi-versioning of KVs, enabling snapshot reads -- Optionally partitioned block index & filters for better cache efficiency [[1]](#footnotes) -- Leveled and FIFO compaction -- Optional key-value separation for large value workloads [[2]](#footnotes), with automatic garbage collection -- Single deletion tombstones ("weak" deletion) -- Optional compaction filters to run custom logic during compactions +### Write path -Keys are limited to 65536 bytes, values are limited to 2^32 bytes. -As is normal with any kind of storage engine, larger keys and values have a bigger performance impact. +- `WriteBatch` with seqno-grouped batch writes — caller-controlled atomic visibility. +- Single deletion tombstones (`remove_weak`). +- Range tombstones (`delete_range` / `delete_prefix`). +- Merge operators for commutative LSM operations. +- Optional key-value separation (BlobTree) for large-value workloads with automatic garbage collection. -## Feature flags - -### lz4 +### Compaction -Allows using `LZ4` compression, powered by [`lz4_flex`](https://github.com/PSeitz/lz4_flex). +- Leveled, size-tiered, dynamic-leveled, and FIFO strategies. +- Intra-L0 compaction for overlapping runs. +- Major compaction (full force flush + merge). +- Optional compaction filters for custom logic during compactions. +- Merge-aware compaction resolves operands lazily. -*Disabled by default.* +### Storage & encoding -### zstd +- Block-based tables with optional compression (none / LZ4 / Zstd) and prefix truncation. +- Per-table data block size policy and per-table compression policy. +- Optional **zstd dictionary compression** — trained per-table or per-column for small (4-64 KiB) blocks and blob files. +- Optional **block-level encryption at rest** — AES-256-GCM, key supplied by caller. +- Optional per-table block hash indexes for faster point lookups [[3]](#footnotes). +- Optional partitioned block index & filters for better cache efficiency [[1]](#footnotes). +- Per-level filter/index block pinning configuration. -Allows using `Zstd` compression via a pure Rust implementation, powered by -[`structured-zstd`](https://github.com/structured-world/structured-zstd) (managed fork of ruzstd). -Requires no C compiler or system libraries — compiles with `cargo build` alone. -Supports both regular zstd (`CompressionType::Zstd`) and dictionary compression -(`CompressionType::ZstdDict`) for improved ratios on small table blocks (4–64 KiB) -and blob files. +### Concurrency & API -**Current limitations:** -- Decompression throughput is ~2–3.5× slower than the C reference implementation +- Thread-safe `BTreeMap`-like API. +- `SequenceNumberGenerator` trait — pluggable seqno source. +- Custom `UserComparator` for non-lexicographic ordering. +- MVCC: snapshot reads at a chosen `SeqNo`. -*Disabled by default.* +### Internals -### zstd-pure +- 100% stable Rust, MSRV 1.92. +- No FFI: zstd via [`structured-zstd`](https://github.com/structured-world/structured-zstd) (pure-Rust), LZ4 via `lz4_flex`, AES via `aes-gcm`. +- Pluggable `Fs` trait — back the engine on the standard filesystem, on `io_uring`, on an in-memory `MemFs`, or on a custom implementation. +- Pluggable `CompressionProvider` for third-party codecs. -Deprecated alias for `zstd`. Enabling `zstd-pure` is equivalent to enabling `zstd` -and will be removed in a future release. +## Limits -*Disabled by default.* +- Keys: up to 65,535 bytes (the on-disk encoding caps the key-length field at `u16`). +- Values: up to 4,294,967,295 bytes (`2³² − 1`; the encoding caps the value-length field at `u32`). +- Larger keys and values carry a proportional performance cost. -### bytes +## Feature flags -Uses [`bytes`](https://github.com/tokio-rs/bytes) as the underlying `Slice` type. +All optional, all off by default. The default build is the minimal core (no compression, no encryption, std filesystem). Every flag below is gated because it pulls in extra dependencies or runtime overhead. -*Disabled by default.* +| Flag | Pulls in | Enable when | +|---|---|---| +| `lz4` | [`lz4_flex`](https://github.com/PSeitz/lz4_flex) | Block compression wanted, decompression latency matters more than ratio. | +| `zstd` | [`structured-zstd`](https://github.com/structured-world/structured-zstd) (pure-Rust, no FFI) | Block compression wanted, ratio matters more than absolute decompression speed. Supports `CompressionType::Zstd` and dictionary-mode `CompressionType::ZstdDict`. Decompression is ~2-3.5× slower than C reference. | +| `encryption` | `aes-gcm`, `rand_chacha` | AES-256-GCM block encryption at rest. Keys are caller-managed. | +| `io-uring` (linux only) | [`io-uring`](https://github.com/tokio-rs/io-uring) | I/O-bound workload on a modern Linux kernel — adds an `io_uring` `Fs` backend. | +| `bytes_1` | [`bytes`](https://github.com/tokio-rs/bytes) | Consumer already speaks `bytes::Bytes` (tokio/hyper/tonic stack) and wants zero-copy interop with engine slices. | +| `metrics` | — | Production observability or profiling. Compiles in atomic counters around block I/O, filter probes, compaction, and cache hit rates (`tree.metrics()`). Small but non-zero hot-path cost. | +| `ribbon-serde` | `serde` | Snapshotting the internal `RibbonFilterRepr` for debugging or out-of-band transport. Not used by the on-disk format. | ## Benchmarks -CI runs [`db_bench`](tools/db_bench) on every push to `main` and on pull requests. -Results from `main` are published to the -[benchmark dashboard](https://structured-world.github.io/coordinode-lsm-tree/dev/bench/). -PRs that regress performance by >15% trigger an alert; >25% regression fails CI. +CI runs [`db_bench`](tools/db_bench) on every push to `main` and on pull requests. Results from `main` are published to the [benchmark dashboard](https://structured-world.github.io/coordinode-lsm-tree/dev/bench/). PRs regressing performance by more than 15% trigger an alert; more than 25% fails CI. -Flamegraphs are generated on every merge to `main` using instrumented `db_bench` runs -and published under `flamegraphs//flamegraph.svg` on -[gh-pages](https://structured-world.github.io/coordinode-lsm-tree/). +Flamegraphs are generated on every merge to `main` from instrumented `db_bench` runs and published under `flamegraphs//flamegraph.svg` on [gh-pages](https://structured-world.github.io/coordinode-lsm-tree/). -To run Criterion microbenchmarks locally: +Local Criterion microbenchmarks: ```bash cargo bench --features lz4 ``` -To generate flamegraphs locally (requires the `flamegraph` feature): +Local flamegraphs: ```bash cd tools/db_bench cargo run --release --features flamegraph -- \ --benchmark all --num 100000 --flamegraph --skip-calibration -# Folded stacks written to target/flamegraphs/all.folded -# Render with: cargo install inferno && inferno-flamegraph target/flamegraphs/all.folded > flame.svg +# Folded stacks: target/flamegraphs/all.folded +# Render: cargo install inferno && inferno-flamegraph target/flamegraphs/all.folded > flame.svg ``` -## Support the Project +## Support the project
@@ -121,13 +118,17 @@ USDT (TRC-20): `TFDsezHa1cBkoeZT5q2T49Wp66K8t2DmdA`
+## Credits + +Originally created by Marvin Blum as part of [fjall-rs/lsm-tree](https://github.com/fjall-rs/lsm-tree); this codebase carries the original copyright (`Copyright (c) 2024-present, fjall-rs`). The vendored Ribbon filter (`src/table/filter/ribbon/`) is by [William Rågstad](https://github.com/WilliamRagstad) — see [`src/table/filter/ribbon/_vendored/`](src/table/filter/ribbon/_vendored/) for the upstream license texts. + ## License -All source code is licensed under Apache-2.0. +All source code is licensed under [Apache-2.0](LICENSE-APACHE). Each first-party `.rs` file carries an `SPDX-License-Identifier: Apache-2.0` header alongside the original-author copyright and the maintainer copyright (Structured World Foundation). Contributions are accepted under the same license. -All contributions are to be licensed as Apache-2.0. +The vendored Ribbon filter (`src/table/filter/ribbon/`) keeps its upstream layout — it carries William Rågstad's per-module licensing commentary rather than per-file SPDX headers, plus the original `LICENSE-APACHE` and `LICENSE-MIT` preserved verbatim in `src/table/filter/ribbon/_vendored/`. The upstream crate is dual-licensed (`MIT OR Apache-2.0`); we redistribute the vendored copy only under the Apache-2.0 arm per Apache-2.0 §4. -Originally derived from [fjall-rs/lsm-tree](https://github.com/fjall-rs/lsm-tree). Independently maintained by [Structured World Foundation](https://sw.foundation). +Maintained by [Structured World Foundation](https://sw.foundation). ## Footnotes diff --git a/benches/bloom.rs b/benches/bloom.rs index d198c0254..10dbbbcf9 100644 --- a/benches/bloom.rs +++ b/benches/bloom.rs @@ -1,7 +1,21 @@ +//! BuRR filter microbenches (construction + probe). +//! +//! The bench file is still named `bloom.rs` for git-history continuity — +//! the standard bloom filter it used to benchmark has been replaced by +//! BuRR (Bumped Ribbon Retrieval). Compare absolute numbers against the +//! pre-BuRR runs to track migration deltas. + use criterion::{Criterion, criterion_group, criterion_main}; +use lsm_tree::hash::hash64; +use lsm_tree::table::filter::ribbon::burr::{ + BurrBuilder, BurrFilter, BurrFilterReader, BurrParams, +}; use rand::RngExt; +use std::collections::hash_map::DefaultHasher; +use std::hash::BuildHasherDefault; + +type Hasher = BuildHasherDefault; -// Not really worth it anymore on new CPUs...? fn fast_block_index(c: &mut Criterion) { pub fn fast_impl(h: u64, num_blocks: usize) -> usize { // https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/ @@ -26,75 +40,138 @@ fn fast_block_index(c: &mut Criterion) { }); } -fn standard_filter_construction(c: &mut Criterion) { - use lsm_tree::table::filter::standard_bloom::Builder; - +fn burr_filter_construction(c: &mut Criterion) { let mut rng = rand::rng(); - c.bench_function("standard bloom filter add key, 1M", |b| { - let mut filter = Builder::with_fp_rate(1_000_000, 0.01); - - b.iter(|| { - let mut key = [0; 16]; - rng.fill(&mut key[..]); - - filter.set_with_hash(Builder::get_hash(&key)); + for n in [100_000_usize, 1_000_000] { + let label = format!("burr filter build, {n} keys @ FPR=1%"); + c.bench_function(&label, |b| { + // Pre-hash a key universe so the bench measures BuRR build cost, + // not RNG. + let mut keys = Vec::with_capacity(n); + for _ in 0..n { + let mut key = [0_u8; 16]; + rng.fill(&mut key[..]); + keys.push(hash64(&key)); + } + + b.iter(|| { + let params = BurrParams::with_fp_rate(n, 0.01).expect("params"); + let builder = BurrBuilder::new(params, Hasher::default()).expect("builder"); + let filter: BurrFilter = builder.build_from_hashes(&keys).expect("build"); + std::hint::black_box(filter.layer_count()); + }); }); - }); - - c.bench_function("standard bloom filter add key, 10M", |b| { - let mut filter = Builder::with_fp_rate(10_000_000, 0.01); - - b.iter(|| { - let mut key = [0; 16]; - rng.fill(&mut key[..]); - - filter.set_with_hash(Builder::get_hash(&key)); - }); - }); + } } -fn standard_filter_contains(c: &mut Criterion) { - use lsm_tree::table::filter::standard_bloom::Builder; - - let keys = (0..100_000u128) +fn burr_filter_contains(c: &mut Criterion) { + let keys: Vec> = (0..100_000_u128) .map(|x| x.to_be_bytes().to_vec()) - .collect::>(); - - // 3 representative FPR levels (low / medium / high precision). - // Reduced from 5 to cut benchmark count without losing coverage. - for fpr in [0.01, 0.001, 0.0001] { - // Bloom filter sized larger than the key set to trigger CPU cache misses, - // but not so large that setup time dominates (was 100M, now 1M). - let n = 1_000_000; + .collect(); - let mut filter = Builder::with_fp_rate(n, fpr); + for fpr in [0.01_f32, 0.001, 0.0001] { + let n = 1_000_000_usize; - for key in &keys { - filter.set_with_hash(Builder::get_hash(key)); + let hashes: Vec = keys.iter().map(|k| hash64(k)).collect(); + // Pad to n with random hashes so the filter is sized realistically. + let mut rng = rand::rng(); + let mut padded = hashes.clone(); + while padded.len() < n { + padded.push(rng.random::()); } - let mut rng = rand::rng(); + let params = BurrParams::with_fp_rate(n, fpr).expect("params"); + let builder = BurrBuilder::new(params, Hasher::default()).expect("builder"); + let filter = builder.build_from_hashes(&padded).expect("build"); + let filter_bytes = filter.to_wire_bytes(); - let filter_bytes = filter.build(); + // Long-lived reader matches the table read path (FilterBlock + // pins the parsed view); construct ONCE outside b.iter to + // measure steady-state probe latency, not parse+probe. + let reader = BurrFilterReader::new(&filter_bytes).unwrap(); + c.bench_function( + &format!( + "burr filter contains (probe-only), true positive (FPR={}%)", + fpr * 100.0 + ), + |b| { + b.iter(|| { + use rand::seq::IndexedRandom; + let sample = keys.choose(&mut rng).unwrap(); + let hash = hash64(sample); + assert!(reader.contains_hash(hash)); + }); + }, + ); + // Separate decode+probe bench so the cost of parsing the wire + // header is also visible — e.g. for callers that don't pin a + // long-lived reader. c.bench_function( &format!( - "standard bloom filter contains key, true positive ({}%)", - fpr * 100.0, + "burr filter contains (decode+probe), true positive (FPR={}%)", + fpr * 100.0 ), |b| { b.iter(|| { use rand::seq::IndexedRandom; - use lsm_tree::table::filter::standard_bloom::StandardBloomFilterReader as Reader; + let reader = BurrFilterReader::new(&filter_bytes).unwrap(); + let sample = keys.choose(&mut rng).unwrap(); + let hash = hash64(sample); + assert!(reader.contains_hash(hash)); + }); + }, + ); + } +} - // NOTE: To make the costs more realistic, we - // pretend we are reading the filter straight from the block - let filter = Reader::new(&filter_bytes).unwrap(); +/// Standard (single-layer) Ribbon contains_in bench — apples-to-apples +/// against BuRR's contains_hash so the BuRR multi-layer overhead vs +/// pure Ribbon stays visible. +fn ribbon_filter_contains(c: &mut Criterion) { + use lsm_tree::table::filter::ribbon::{Mode, Params, RibbonBuilder}; + + let keys: Vec = (0..100_000_u64).collect(); + + for fpr in [0.01_f32, 0.001, 0.0001] { + let n = 1_000_000_usize; + // r = ceil(-log2(fpr)) — matches what BuRR picks internally. + #[expect( + clippy::cast_possible_truncation, + reason = "r is derived from bounded FPR inputs for this benchmark" + )] + #[expect( + clippy::cast_sign_loss, + reason = "(-log2(fpr)).ceil() is non-negative for fpr in (0, 1)" + )] + let r = (-fpr.log2()).ceil() as usize; + let params = Params::new(n, 64, r, Mode::Standard) + .expect("ribbon params") + .with_seed(0); + let builder = RibbonBuilder::new(params, Hasher::default()).expect("builder"); + // Pad the key set to n with random fillers so the Ribbon load + // factor matches the BuRR bench above. Without padding the + // Ribbon body would be ~10% loaded vs BuRR's ~70-90%, skewing + // probe-latency conclusions. + let mut rng = rand::rng(); + let mut padded = keys.clone(); + while padded.len() < n { + padded.push(rng.random::()); + } + let filter = builder.build(&padded).expect("build"); + let mut scratch = filter.new_scratch(); + c.bench_function( + &format!( + "standard ribbon contains, true positive (FPR={}%)", + fpr * 100.0 + ), + |b| { + b.iter(|| { + use rand::seq::IndexedRandom; let sample = keys.choose(&mut rng).unwrap(); - let hash = Builder::get_hash(sample); - assert!(filter.contains_hash(hash)); + assert!(filter.contains_in(sample, &mut scratch)); }); }, ); @@ -104,7 +181,8 @@ fn standard_filter_contains(c: &mut Criterion) { criterion_group!( benches, fast_block_index, - standard_filter_construction, - standard_filter_contains, + burr_filter_construction, + burr_filter_contains, + ribbon_filter_contains, ); criterion_main!(benches); diff --git a/benches/index_block.rs b/benches/index_block.rs index 490644ef9..19fe2cb3a 100644 --- a/benches/index_block.rs +++ b/benches/index_block.rs @@ -8,10 +8,8 @@ use lsm_tree::{ Cache, Checksum, DefaultUserComparator, DescriptorTable, InternalValue, SeqNo, SharedComparator, TableId, ValueType, fs::StdFs, - table::{ - BlockHandle, BlockOffset, IndexBlock, KeyedBlockHandle, Table, Writer, - filter::standard_bloom::Builder as BloomBuilder, - }, + hash::hash64, + table::{BlockHandle, BlockOffset, IndexBlock, KeyedBlockHandle, Table, Writer}, }; use std::sync::Arc; use tempfile::TempDir; @@ -130,7 +128,7 @@ fn build_table_for_point_read(restart_interval: u8) -> BenchTable { BenchTable { _dir: dir, - key_hash: BloomBuilder::get_hash(&key), + key_hash: hash64(&key), key, table, } diff --git a/logo.png b/logo.png deleted file mode 100644 index d5873dd9a..000000000 Binary files a/logo.png and /dev/null differ diff --git a/src/abstract_tree.rs b/src/abstract_tree.rs index fee5562aa..ba9708877 100644 --- a/src/abstract_tree.rs +++ b/src/abstract_tree.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use crate::{ AnyTree, BlobTree, Config, Guard, InternalValue, KvPair, Memtable, SeqNo, TableId, Tree, diff --git a/src/active_tombstone_set.rs b/src/active_tombstone_set.rs index 5f06c2e11..878f48ccc 100644 --- a/src/active_tombstone_set.rs +++ b/src/active_tombstone_set.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation //! Active tombstone sets for tracking range tombstones during iteration. //! diff --git a/src/any_tree.rs b/src/any_tree.rs index 7e3259df9..55b388cf5 100644 --- a/src/any_tree.rs +++ b/src/any_tree.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use crate::{BlobTree, Tree}; use enum_dispatch::enum_dispatch; diff --git a/src/blob_tree/gc.rs b/src/blob_tree/gc.rs index 96eef7a39..98ba4b177 100644 --- a/src/blob_tree/gc.rs +++ b/src/blob_tree/gc.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use crate::{ blob_tree::handle::BlobIndirection, coding::Decode, compaction::stream::DroppedKvCallback, diff --git a/src/blob_tree/handle.rs b/src/blob_tree/handle.rs index 364508482..d2171801a 100644 --- a/src/blob_tree/handle.rs +++ b/src/blob_tree/handle.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use crate::{ coding::{Decode, Encode}, diff --git a/src/blob_tree/ingest.rs b/src/blob_tree/ingest.rs index 6ee5a490b..2ea3e1a87 100644 --- a/src/blob_tree/ingest.rs +++ b/src/blob_tree/ingest.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use crate::{ SeqNo, UserKey, UserValue, blob_tree::handle::BlobIndirection, file::BLOBS_FOLDER, diff --git a/src/blob_tree/mod.rs b/src/blob_tree/mod.rs index 71b2cec03..b1d929541 100644 --- a/src/blob_tree/mod.rs +++ b/src/blob_tree/mod.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation mod gc; pub mod handle; @@ -784,8 +784,7 @@ impl AbstractTree for BlobTree { let miss_keys: Vec<(usize, u64)> = remaining .iter() .map(|&idx| { - let hash = - crate::table::filter::standard_bloom::Builder::get_hash(keys[idx].as_ref()); + let hash = crate::hash::hash64(keys[idx].as_ref()); (idx, hash) }) .collect(); diff --git a/src/cache.rs b/src/cache.rs index bb0969c6b..7d82ac524 100644 --- a/src/cache.rs +++ b/src/cache.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use crate::table::block::Header; use crate::table::{Block, BlockOffset}; diff --git a/src/checksum.rs b/src/checksum.rs index 3e0f88ff6..da6c23a66 100644 --- a/src/checksum.rs +++ b/src/checksum.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation #[derive(Copy, Clone, PartialEq, Eq, Debug)] pub enum ChecksumType { diff --git a/src/coding.rs b/src/coding.rs index 862347ae7..3388afa65 100644 --- a/src/coding.rs +++ b/src/coding.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use std::io::{Read, Write}; diff --git a/src/compaction/drop_range.rs b/src/compaction/drop_range.rs index abd899c6b..cf577afb6 100644 --- a/src/compaction/drop_range.rs +++ b/src/compaction/drop_range.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use super::{Choice, CompactionStrategy}; use crate::compaction::state::CompactionState; diff --git a/src/compaction/fifo.rs b/src/compaction/fifo.rs index b1b53e5aa..8bb6f033e 100644 --- a/src/compaction/fifo.rs +++ b/src/compaction/fifo.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use super::{Choice, CompactionStrategy}; use crate::{ diff --git a/src/compaction/flavour.rs b/src/compaction/flavour.rs index 0b82237f7..6edf7eeac 100644 --- a/src/compaction/flavour.rs +++ b/src/compaction/flavour.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use crate::blob_tree::FragmentationMap; use crate::blob_tree::handle::BlobIndirection; diff --git a/src/compaction/leveled/mod.rs b/src/compaction/leveled/mod.rs index 09631d2e1..f84c1cfd0 100644 --- a/src/compaction/leveled/mod.rs +++ b/src/compaction/leveled/mod.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation #[cfg(test)] #[allow( diff --git a/src/compaction/maintenance.rs b/src/compaction/maintenance.rs index aa0974c7a..c009cb852 100644 --- a/src/compaction/maintenance.rs +++ b/src/compaction/maintenance.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use super::{Choice, CompactionStrategy}; use crate::{config::Config, level_manifest::LevelManifest, segment::Segment, HashSet, SegmentId}; diff --git a/src/compaction/major.rs b/src/compaction/major.rs index 8a5241fce..70caf7358 100644 --- a/src/compaction/major.rs +++ b/src/compaction/major.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use super::{Choice, CompactionStrategy, Input as CompactionInput}; use crate::{ diff --git a/src/compaction/mod.rs b/src/compaction/mod.rs index f83481b0f..3e3364231 100644 --- a/src/compaction/mod.rs +++ b/src/compaction/mod.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation //! Contains compaction strategies diff --git a/src/compaction/movedown.rs b/src/compaction/movedown.rs index cfbf071c2..9539f87a5 100644 --- a/src/compaction/movedown.rs +++ b/src/compaction/movedown.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use super::{Choice, CompactionStrategy, Input}; use crate::{Config, compaction::state::CompactionState, table::Table, version::Version}; diff --git a/src/compaction/pulldown.rs b/src/compaction/pulldown.rs index 135f642ae..016661170 100644 --- a/src/compaction/pulldown.rs +++ b/src/compaction/pulldown.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use super::{Choice, CompactionStrategy}; use crate::{ diff --git a/src/compaction/state/hidden_set.rs b/src/compaction/state/hidden_set.rs index 6b3d55739..b6a545ca0 100644 --- a/src/compaction/state/hidden_set.rs +++ b/src/compaction/state/hidden_set.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use crate::TableId; diff --git a/src/compaction/state/mod.rs b/src/compaction/state/mod.rs index a7e70b627..1173a424e 100644 --- a/src/compaction/state/mod.rs +++ b/src/compaction/state/mod.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation pub mod hidden_set; diff --git a/src/compaction/stream.rs b/src/compaction/stream.rs index e23e76531..f8578d6a1 100644 --- a/src/compaction/stream.rs +++ b/src/compaction/stream.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use crate::{InternalValue, SeqNo, UserKey, UserValue, ValueType, merge_operator::MergeOperator}; use std::{collections::VecDeque, iter::Peekable, sync::Arc}; diff --git a/src/compaction/tiered/mod.rs b/src/compaction/tiered/mod.rs index 627d0d284..c8b2ca68a 100644 --- a/src/compaction/tiered/mod.rs +++ b/src/compaction/tiered/mod.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use super::{Choice, CompactionStrategy, Input as CompactionInput}; use crate::{ diff --git a/src/compaction/worker.rs b/src/compaction/worker.rs index c00d41d90..18762b3e8 100644 --- a/src/compaction/worker.rs +++ b/src/compaction/worker.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use super::{CompactionAction, CompactionResult, CompactionStrategy, Input as CompactionPayload}; use crate::{ diff --git a/src/comparator.rs b/src/comparator.rs index a396e3fbe..01e1feb5a 100644 --- a/src/comparator.rs +++ b/src/comparator.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use std::sync::Arc; diff --git a/src/compression/mod.rs b/src/compression/mod.rs index 8bc37238a..119f9adda 100644 --- a/src/compression/mod.rs +++ b/src/compression/mod.rs @@ -1,9 +1,9 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation #[cfg(feature = "zstd")] -mod zstd_pure; +mod zstd_backend; use crate::coding::{Decode, Encode}; use byteorder::{ReadBytesExt, WriteBytesExt}; @@ -50,7 +50,7 @@ pub trait CompressionProvider { /// The active zstd backend (pure Rust via `structured-zstd`). #[cfg(feature = "zstd")] -pub type ZstdBackend = zstd_pure::ZstdPureProvider; +pub type ZstdBackend = zstd_backend::ZstdProvider; /// Pre-trained zstd dictionary for improved compression of small blocks. /// diff --git a/src/compression/zstd_pure.rs b/src/compression/zstd_backend.rs similarity index 94% rename from src/compression/zstd_pure.rs rename to src/compression/zstd_backend.rs index 6727c4cd9..1fdaefa1a 100644 --- a/src/compression/zstd_pure.rs +++ b/src/compression/zstd_backend.rs @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2025-present, Structured World Foundation -// This source code is licensed under the Apache 2.0 License -// (found in the LICENSE-APACHE file in the repository) //! Pure Rust zstd backend via the `structured-zstd` crate. //! @@ -292,9 +291,9 @@ fn do_decompress_with_dict( } /// Pure Rust zstd backend. -pub struct ZstdPureProvider; +pub struct ZstdProvider; -impl CompressionProvider for ZstdPureProvider { +impl CompressionProvider for ZstdProvider { fn compress(data: &[u8], level: i32) -> crate::Result> { let compressed = structured_zstd::encoding::compress_to_vec( std::io::Cursor::new(data), @@ -581,7 +580,7 @@ mod tests { #[test] fn decompress_with_dict_returns_correct_plaintext() { let dict = ZstdDictionary::new(DICT); - let result = ZstdPureProvider::decompress_with_dict(COMPRESSED, &dict, PLAINTEXT.len() + 1) + let result = ZstdProvider::decompress_with_dict(COMPRESSED, &dict, PLAINTEXT.len() + 1) .expect("decompression should succeed"); assert_eq!( result, PLAINTEXT, @@ -595,9 +594,8 @@ mod tests { // Call three times to exercise the TLS caching path (second and third // calls must reuse the cached FrameDecoder without re-parsing the dict). for _ in 0..3 { - let result = - ZstdPureProvider::decompress_with_dict(COMPRESSED, &dict, PLAINTEXT.len() + 1) - .expect("decompression should succeed on every call"); + let result = ZstdProvider::decompress_with_dict(COMPRESSED, &dict, PLAINTEXT.len() + 1) + .expect("decompression should succeed on every call"); assert_eq!(result, PLAINTEXT); } } @@ -609,7 +607,7 @@ mod tests { // capacity guard added to `decode_all_to_vec`). let dict = ZstdDictionary::new(DICT); let too_small = PLAINTEXT.len() / 2; - let result = ZstdPureProvider::decompress_with_dict(COMPRESSED, &dict, too_small); + let result = ZstdProvider::decompress_with_dict(COMPRESSED, &dict, too_small); assert!( matches!(result, Err(crate::Error::DecompressedSizeTooLarge { .. })), "expected DecompressedSizeTooLarge but got {result:?}", @@ -625,7 +623,7 @@ mod tests { // same pure backend. let dict = ZstdDictionary::new(DICT); - let compressed = ZstdPureProvider::compress_with_dict(PLAINTEXT, 3, DICT) + let compressed = ZstdProvider::compress_with_dict(PLAINTEXT, 3, DICT) .expect("compression with dict should succeed"); // The output must be a non-empty zstd frame. @@ -635,7 +633,7 @@ mod tests { ); let decompressed = - ZstdPureProvider::decompress_with_dict(&compressed, &dict, PLAINTEXT.len() + 1) + ZstdProvider::decompress_with_dict(&compressed, &dict, PLAINTEXT.len() + 1) .expect("decompression with dict should succeed"); assert_eq!( @@ -649,7 +647,7 @@ mod tests { // zstd frames always start with the little-endian magic number 0xFD2FB528 // (bytes: 0x28, 0xB5, 0x2F, 0xFD). A mismatched magic means the frame is // corrupt or the output is not a valid zstd frame. - let compressed = ZstdPureProvider::compress_with_dict(PLAINTEXT, 3, DICT) + let compressed = ZstdProvider::compress_with_dict(PLAINTEXT, 3, DICT) .expect("compression should succeed"); assert!( @@ -667,10 +665,10 @@ mod tests { for level in [1, 3, 9, 19] { let compressed = - ZstdPureProvider::compress_with_dict(PLAINTEXT, level, DICT).expect("compress"); + ZstdProvider::compress_with_dict(PLAINTEXT, level, DICT).expect("compress"); let decompressed = - ZstdPureProvider::decompress_with_dict(&compressed, &dict, PLAINTEXT.len() + 1) + ZstdProvider::decompress_with_dict(&compressed, &dict, PLAINTEXT.len() + 1) .expect("decompress"); assert_eq!( @@ -685,7 +683,7 @@ mod tests { // An empty dictionary slice must return an error because there is no // content to use as LZ77 history. Both the finalized-format path and // the raw-content path reject empty input. - let result = ZstdPureProvider::compress_with_dict(PLAINTEXT, 3, b""); + let result = ZstdProvider::compress_with_dict(PLAINTEXT, 3, b""); assert!( result.is_err(), "expected an error for empty dictionary, got Ok" @@ -699,11 +697,11 @@ mod tests { let raw_content_dict = b"this is raw content dictionary data for matching"; let dict = ZstdDictionary::new(raw_content_dict); - let compressed = ZstdPureProvider::compress_with_dict(PLAINTEXT, 3, raw_content_dict) + let compressed = ZstdProvider::compress_with_dict(PLAINTEXT, 3, raw_content_dict) .expect("compression with raw content dict should succeed"); let decompressed = - ZstdPureProvider::decompress_with_dict(&compressed, &dict, PLAINTEXT.len() + 1) + ZstdProvider::decompress_with_dict(&compressed, &dict, PLAINTEXT.len() + 1) .expect("decompression with raw content dict should succeed"); assert_eq!( @@ -717,10 +715,10 @@ mod tests { // Edge case: compressing an empty payload with a dictionary must round-trip. let dict = ZstdDictionary::new(DICT); - let compressed = ZstdPureProvider::compress_with_dict(&[], 3, DICT) + let compressed = ZstdProvider::compress_with_dict(&[], 3, DICT) .expect("compression of empty payload should succeed"); - let decompressed = ZstdPureProvider::decompress_with_dict(&compressed, &dict, 1) + let decompressed = ZstdProvider::decompress_with_dict(&compressed, &dict, 1) .expect("decompression of empty payload should succeed"); assert!( @@ -739,10 +737,10 @@ mod tests { let raw_dict = b"raw content dictionary for empty payload smoke test"; let dict = ZstdDictionary::new(raw_dict); - let compressed = ZstdPureProvider::compress_with_dict(&[], 3, raw_dict) + let compressed = ZstdProvider::compress_with_dict(&[], 3, raw_dict) .expect("compression of empty payload with raw-content dict should succeed"); - let decompressed = ZstdPureProvider::decompress_with_dict(&compressed, &dict, 1).expect( + let decompressed = ZstdProvider::decompress_with_dict(&compressed, &dict, 1).expect( "decompression of empty payload with raw-content dict (capacity=1) should succeed", ); @@ -762,10 +760,10 @@ mod tests { let raw_dict = b"raw content dictionary for empty payload exact-capacity test"; let dict = ZstdDictionary::new(raw_dict); - let compressed = ZstdPureProvider::compress_with_dict(&[], 3, raw_dict) + let compressed = ZstdProvider::compress_with_dict(&[], 3, raw_dict) .expect("compression of empty payload with raw-content dict should succeed"); - let decompressed = ZstdPureProvider::decompress_with_dict(&compressed, &dict, 0).expect( + let decompressed = ZstdProvider::decompress_with_dict(&compressed, &dict, 0).expect( "decompression of empty payload with raw-content dict (capacity=0) should succeed", ); @@ -784,14 +782,14 @@ mod tests { // the decode_raw_content_bounded loop capacity guard. let raw_dict = b"this is raw content dictionary data for matching"; - let compressed = ZstdPureProvider::compress_with_dict(PLAINTEXT, 3, raw_dict) + let compressed = ZstdProvider::compress_with_dict(PLAINTEXT, 3, raw_dict) .expect("compression with raw content dict should succeed"); let dict = ZstdDictionary::new(raw_dict); // Capacity set to half the plaintext length — frame decompresses to // more than this limit so the guard must fire. let too_small = PLAINTEXT.len() / 2; - let result = ZstdPureProvider::decompress_with_dict(&compressed, &dict, too_small); + let result = ZstdProvider::decompress_with_dict(&compressed, &dict, too_small); assert!( matches!(result, Err(crate::Error::DecompressedSizeTooLarge { .. })), @@ -807,11 +805,11 @@ mod tests { // first block for a zero-capacity output buffer. let raw_dict = b"raw content dict for zero-capacity test"; - let compressed = ZstdPureProvider::compress_with_dict(PLAINTEXT, 3, raw_dict) + let compressed = ZstdProvider::compress_with_dict(PLAINTEXT, 3, raw_dict) .expect("compression should succeed"); let dict = ZstdDictionary::new(raw_dict); - let result = ZstdPureProvider::decompress_with_dict(&compressed, &dict, 0); + let result = ZstdProvider::decompress_with_dict(&compressed, &dict, 0); assert!( matches!(result, Err(crate::Error::DecompressedSizeTooLarge { .. })), @@ -960,10 +958,10 @@ mod tests { // failure on the finalized-dict path. Exercises the Io error branch in // do_decompress_with_dict when decode_all_to_vec fails. let dict = ZstdDictionary::new(DICT); - let mut frame = ZstdPureProvider::compress_with_dict(PLAINTEXT, 3, DICT) - .expect("compression must succeed"); + let mut frame = + ZstdProvider::compress_with_dict(PLAINTEXT, 3, DICT).expect("compression must succeed"); frame.pop(); // truncate last byte → corrupt frame - let result = ZstdPureProvider::decompress_with_dict(&frame, &dict, 1024); + let result = ZstdProvider::decompress_with_dict(&frame, &dict, 1024); assert!( matches!(result, Err(crate::Error::Io(_))), "corrupt frame must return Err(Io(_)) on finalized dict path; got {result:?}", @@ -977,10 +975,10 @@ mod tests { // branch in do_decompress_with_dict. let raw_dict = b"some raw content dictionary bytes for testing corruption"; let dict = ZstdDictionary::new(raw_dict); - let mut frame = ZstdPureProvider::compress_with_dict(PLAINTEXT, 3, raw_dict) + let mut frame = ZstdProvider::compress_with_dict(PLAINTEXT, 3, raw_dict) .expect("compression must succeed"); frame.pop(); // truncate last byte → corrupt frame - let result = ZstdPureProvider::decompress_with_dict(&frame, &dict, 1024); + let result = ZstdProvider::decompress_with_dict(&frame, &dict, 1024); assert!( matches!(result, Err(crate::Error::Io(_))), "corrupt frame must return Err(Io(_)) on raw-content dict path; got {result:?}", @@ -1047,7 +1045,7 @@ mod tests { // pre-check in do_decompress_with_dict returns early first (frames // produced by compress_with_dict include the frame content size). let raw_dict = b"raw content dict for remaining-zero test"; - let compressed = ZstdPureProvider::compress_with_dict(PLAINTEXT, 3, raw_dict) + let compressed = ZstdProvider::compress_with_dict(PLAINTEXT, 3, raw_dict) .expect("compression should succeed"); let mut cursor = std::io::Cursor::new(compressed.as_slice()); @@ -1071,7 +1069,7 @@ mod tests { // This path is unreachable through the high-level API for the same // reason as the test above. let raw_dict = b"raw content dict for can-exceeds-capacity test"; - let compressed = ZstdPureProvider::compress_with_dict(PLAINTEXT, 3, raw_dict) + let compressed = ZstdProvider::compress_with_dict(PLAINTEXT, 3, raw_dict) .expect("compression should succeed"); let mut cursor = std::io::Cursor::new(compressed.as_slice()); diff --git a/src/config/block_size.rs b/src/config/block_size.rs index e402a6f45..195cae52e 100644 --- a/src/config/block_size.rs +++ b/src/config/block_size.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation /// Block size policy #[derive(Debug, Clone, Eq, PartialEq)] diff --git a/src/config/compression.rs b/src/config/compression.rs index 61dc017e6..4c362b511 100644 --- a/src/config/compression.rs +++ b/src/config/compression.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use crate::CompressionType; diff --git a/src/config/filter.rs b/src/config/filter.rs index 0136de725..17a51993e 100644 --- a/src/config/filter.rs +++ b/src/config/filter.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation pub use crate::table::filter::BloomConstructionPolicy; diff --git a/src/config/hash_ratio.rs b/src/config/hash_ratio.rs index b0fd623f1..3716daff7 100644 --- a/src/config/hash_ratio.rs +++ b/src/config/hash_ratio.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation /// Hash ratio policy #[derive(Debug, Clone, PartialEq)] diff --git a/src/config/mod.rs b/src/config/mod.rs index 2d811304e..0aa274583 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation mod block_size; mod compression; diff --git a/src/config/pinning.rs b/src/config/pinning.rs index e5de3fc37..e5502ffe5 100644 --- a/src/config/pinning.rs +++ b/src/config/pinning.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation /// Pinning policy #[derive(Debug, Clone, Eq, PartialEq)] diff --git a/src/config/restart_interval.rs b/src/config/restart_interval.rs index fe9d412ff..33703efb8 100644 --- a/src/config/restart_interval.rs +++ b/src/config/restart_interval.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation /// Restart interval policy #[derive(Debug, Clone, Eq, PartialEq)] diff --git a/src/descriptor_table.rs b/src/descriptor_table.rs index 903365687..14e717f5c 100644 --- a/src/descriptor_table.rs +++ b/src/descriptor_table.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2025-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use crate::{GlobalTableId, fs::FsFile}; use quick_cache::{UnitWeighter, sync::Cache as QuickCache}; diff --git a/src/double_ended_peekable.rs b/src/double_ended_peekable.rs index 0185b7ce6..00a17feeb 100644 --- a/src/double_ended_peekable.rs +++ b/src/double_ended_peekable.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation //! A fork of //! to allow accessing the inner type diff --git a/src/encryption.rs b/src/encryption.rs index 5ab034d40..e2dfbe448 100644 --- a/src/encryption.rs +++ b/src/encryption.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2025-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation //! Block-level encryption at rest. //! diff --git a/src/error.rs b/src/error.rs index 13ff6b6cc..edc1580b3 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use crate::{Checksum, CompressionType}; diff --git a/src/file.rs b/src/file.rs index e4905e8ed..142a3d2fc 100644 --- a/src/file.rs +++ b/src/file.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use crate::{ Slice, diff --git a/src/file_accessor.rs b/src/file_accessor.rs index 318c37901..64097ea50 100644 --- a/src/file_accessor.rs +++ b/src/file_accessor.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2025-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use crate::GlobalTableId; use crate::descriptor_table::DescriptorTable; diff --git a/src/format_version.rs b/src/format_version.rs index b4c5db4f3..28e820ff8 100644 --- a/src/format_version.rs +++ b/src/format_version.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation /// Disk format version #[derive(Copy, Clone, Debug, Eq, PartialEq)] @@ -16,6 +16,19 @@ pub enum FormatVersion { /// Version for range-tombstone SST semantics V4, + + /// `BuRR` (Bumped Ribbon Retrieval) filter wire format. Filter + /// blocks are no longer Bloom-encoded; the `filter_type` byte + + /// per-layer header layout is documented in + /// `src/table/filter/ribbon/burr/wire.rs`. + /// + /// V3/V4 ↔ V5 incompatibility is enforced primarily by the manifest + /// version gate at `Tree::open` (returns `InvalidVersion` for + /// anything other than V5). If the manifest is bypassed and a + /// pre-V5 filter block reaches the decoder, the `BuRR` magic + the + /// `filter_type=2` byte plus `format_version=1` inside the `BuRR` + /// header will reject the older Bloom-shaped payload. + V5, } impl std::fmt::Display for FormatVersion { @@ -31,6 +44,7 @@ impl From for u8 { FormatVersion::V2 => 2, FormatVersion::V3 => 3, FormatVersion::V4 => 4, + FormatVersion::V5 => 5, } } } @@ -44,6 +58,7 @@ impl TryFrom for FormatVersion { 2 => Ok(Self::V2), 3 => Ok(Self::V3), 4 => Ok(Self::V4), + 5 => Ok(Self::V5), _ => Err(()), } } diff --git a/src/fs/io_uring_fs.rs b/src/fs/io_uring_fs.rs index 74c4419f6..9bfcbe860 100644 --- a/src/fs/io_uring_fs.rs +++ b/src/fs/io_uring_fs.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation //! `io_uring`-backed [`Fs`] implementation for high-throughput I/O on Linux. //! diff --git a/src/fs/mem_fs.rs b/src/fs/mem_fs.rs index bf3996b99..c69b21ecb 100644 --- a/src/fs/mem_fs.rs +++ b/src/fs/mem_fs.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation //! In-memory [`Fs`] implementation for testing and ephemeral trees. //! diff --git a/src/fs/mod.rs b/src/fs/mod.rs index ab743b548..e5b69eef7 100644 --- a/src/fs/mod.rs +++ b/src/fs/mod.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation //! Pluggable filesystem abstraction for I/O backends. //! diff --git a/src/fs/std_fs.rs b/src/fs/std_fs.rs index f537586d2..7881ffde6 100644 --- a/src/fs/std_fs.rs +++ b/src/fs/std_fs.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use super::{Fs, FsDirEntry, FsFile, FsMetadata, FsOpenOptions}; use std::fs::{File, OpenOptions}; diff --git a/src/hash.rs b/src/hash.rs index 5d5d17168..4a000483b 100644 --- a/src/hash.rs +++ b/src/hash.rs @@ -1,9 +1,16 @@ +//! Stable hash functions used across the filter and table subsystems. +//! +//! `hash64` is the canonical 64-bit key hash piped into `BuRR` filters; it +//! is xxh3-based and deterministic across runs / processes / hosts. + /// Generates a 64-bit hash using xxh3. +#[must_use] pub fn hash64(bytes: &[u8]) -> u64 { xxhash_rust::xxh3::xxh3_64(bytes) } /// Generates a 128-bit hash using xxh3. +#[must_use] pub fn hash128(bytes: &[u8]) -> u128 { xxhash_rust::xxh3::xxh3_128(bytes) } diff --git a/src/heap.rs b/src/heap.rs index 786dbb380..bdefd52c9 100644 --- a/src/heap.rs +++ b/src/heap.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation //! Custom merge heap backed by a sorted vector. //! diff --git a/src/ingestion.rs b/src/ingestion.rs index 96dc79d79..b78c96caa 100644 --- a/src/ingestion.rs +++ b/src/ingestion.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use crate::{ AnyTree, UserKey, UserValue, blob_tree::ingest::BlobIngestion, tree::ingest::Ingestion, diff --git a/src/key.rs b/src/key.rs index 193ea6f4d..5048e4792 100644 --- a/src/key.rs +++ b/src/key.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use crate::{SeqNo, UserKey, ValueType, comparator::UserComparator}; use std::cmp::Reverse; diff --git a/src/key_range.rs b/src/key_range.rs index d59ca0278..3f8bda960 100644 --- a/src/key_range.rs +++ b/src/key_range.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use crate::{Slice, UserKey}; use std::ops::Bound; diff --git a/src/lib.rs b/src/lib.rs index 516010133..db7ba50b1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,43 +1,59 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation -//! A K.I.S.S. implementation of log-structured merge trees (LSM-trees/LSMTs). +//! Embedded LSM-tree storage engine. //! -//! ##### NOTE +//! Provides keyed point reads, prefix and range scans, MVCC snapshots, block +//! and file-descriptor caching, and a configurable compaction subsystem. No +//! write-ahead log — durability is the caller's responsibility (`flush_active_memtable` +//! forces persistence when needed). //! -//! > This crate only provides a primitive LSM-tree, not a full storage engine. -//! > You probably want to use instead. -//! > For example, it does not ship with a write-ahead log, so writes are not -//! > persisted until manually flushing the memtable. +//! ## Highlights //! -//! ##### About +//! - **AMQ filter**: `BuRR` (Bumped Ribbon Retrieval, Walzer & Dillinger 2022) for +//! per-key and per-prefix membership checks. ~30% smaller filter blocks than a +//! same-FPR Bloom filter, or ~10× tighter FPR at the same memory budget. +//! - **Compression**: pure-Rust zstd (incl. dictionary mode), LZ4, or none — +//! per-table and per-level policy. +//! - **Encryption at rest**: AES-256-GCM block encryption with a caller-supplied +//! key. +//! - **Range tombstones**: `delete_range` / `delete_prefix` (SST-encoded; the +//! feature was added in disk format V4 and remains supported in the current +//! V5 format — V5's breaking change is the filter wire format, not the +//! tombstone encoding). +//! - **Merge operators**: commutative-merge LSM operations with lazy resolution. +//! - **K/V separation (`BlobTree`)**: large-value workloads with automatic GC. +//! - **Pluggable `Fs`**: standard, in-memory, `io_uring`, or custom backends. +//! - **MVCC**: snapshot reads at a chosen `SeqNo`, custom `UserComparator`. +//! - **Concurrency**: thread-safe `BTreeMap`-like API. //! -//! This crate exports a `Tree` that supports a subset of the `BTreeMap` API. +//! Keys: up to 65,535 bytes (`u16` length field). Values: up to 4,294,967,295 +//! bytes (`u32` length field, `2³² − 1`). Larger keys and values +//! carry a proportional performance cost. //! -//! LSM-trees are an alternative to B-trees to persist a sorted list of items (e.g. a database table) -//! on disk and perform fast lookup queries. -//! Instead of updating a disk-based data structure in-place, -//! deltas (inserts and deletes) are added into an in-memory write buffer (`Memtable`). -//! Data is then flushed to disk-resident table files when the write buffer reaches some threshold. +//! ## Quick start //! -//! Amassing many tables on disk will degrade read performance and waste disk space, so tables -//! can be periodically merged into larger tables in a process called `Compaction`. -//! Different compaction strategies have different advantages and drawbacks, and should be chosen based -//! on the workload characteristics. +//! ```no_run +//! use lsm_tree::{AbstractTree, Config, SequenceNumberCounter}; //! -//! Because maintaining an efficient structure is deferred to the compaction process, writing to an LSMT -//! is very fast (_O(1)_ complexity). +//! let folder = tempfile::tempdir().unwrap(); +//! let seqno = SequenceNumberCounter::default(); +//! let tree = Config::new(&folder, seqno.clone(), SequenceNumberCounter::default()) +//! .open() +//! .unwrap(); //! -//! Keys are limited to 65536 bytes, values are limited to 2^32 bytes. As is normal with any kind of storage -//! engine, larger keys and values have a bigger performance impact. - -#![doc( - html_logo_url = "https://raw.githubusercontent.com/structured-world/coordinode-lsm-tree/main/logo.png" -)] -#![doc( - html_favicon_url = "https://raw.githubusercontent.com/structured-world/coordinode-lsm-tree/main/logo.png" -)] +//! tree.insert("key", "value", seqno.next()); +//! let value = tree.get("key", lsm_tree::SeqNo::MAX).unwrap(); +//! assert_eq!(value.map(|v| v.to_vec()), Some(b"value".to_vec())); +//! ``` +//! +//! ## On-disk format +//! +//! Current version: **V5**. V5 introduces a wire-format break for filter +//! blocks (`BuRR` replaces Bloom); V3 and V4 databases are not readable by +//! this version and vice versa. The manifest version gate rejects pre-V5 +//! databases at `Tree::open` time. #![deny(clippy::all, missing_docs, clippy::cargo)] #![deny(clippy::unwrap_used)] #![deny(clippy::indexing_slicing)] @@ -110,7 +126,7 @@ pub mod file; /// Pluggable filesystem abstraction for I/O backends. pub mod fs; -mod hash; +pub mod hash; mod heap; mod ingestion; mod iter_guard; diff --git a/src/manifest.rs b/src/manifest.rs index a1db61da1..a9151eb56 100644 --- a/src/manifest.rs +++ b/src/manifest.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use crate::{ FormatVersion, TreeType, @@ -156,7 +156,7 @@ mod tests { let mut writer = sfa::Writer::from_writer(std::io::BufWriter::new(file)); writer.start("format_version")?; - writer.write_u8(FormatVersion::V4.into())?; + writer.write_u8(FormatVersion::V5.into())?; writer.start("tree_type")?; writer.write_u8(TreeType::Standard.into())?; @@ -242,7 +242,7 @@ mod tests { let mut writer = sfa::Writer::from_writer(std::io::BufWriter::new(file)); writer.start("format_version")?; - writer.write_u8(FormatVersion::V4.into())?; + writer.write_u8(FormatVersion::V5.into())?; writer.start("tree_type")?; writer.write_u8(TreeType::Standard.into())?; writer.start("level_count")?; @@ -280,7 +280,7 @@ mod tests { let manifest = decode_manifest(&path, &fs)?; assert_eq!(manifest.comparator_name, "default"); assert_eq!(manifest.level_count, 7); - assert!(matches!(manifest.version, FormatVersion::V4)); + assert!(matches!(manifest.version, FormatVersion::V5)); assert!(matches!(manifest.tree_type, TreeType::Standard)); Ok(()) } diff --git a/src/memtable/arena.rs b/src/memtable/arena.rs index bdbc07902..34e553c39 100644 --- a/src/memtable/arena.rs +++ b/src/memtable/arena.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation //! Multi-block bump-allocating arena for skiplist node storage. //! diff --git a/src/memtable/interval_tree.rs b/src/memtable/interval_tree.rs index bdc92d15c..ce20f483e 100644 --- a/src/memtable/interval_tree.rs +++ b/src/memtable/interval_tree.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation //! AVL-balanced interval tree for efficient range tombstone queries in memtables. //! diff --git a/src/memtable/mod.rs b/src/memtable/mod.rs index c1d49ac6a..e1890308c 100644 --- a/src/memtable/mod.rs +++ b/src/memtable/mod.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation pub mod arena; pub mod interval_tree; diff --git a/src/memtable/skiplist.rs b/src/memtable/skiplist.rs index 3463964a5..fc4e2b27d 100644 --- a/src/memtable/skiplist.rs +++ b/src/memtable/skiplist.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation //! Arena-based concurrent skiplist for memtable storage. //! diff --git a/src/memtable/value_store.rs b/src/memtable/value_store.rs index 78e02f1e4..21010fec9 100644 --- a/src/memtable/value_store.rs +++ b/src/memtable/value_store.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation //! Lock-free segmented value storage for the memtable skiplist. //! diff --git a/src/merge.rs b/src/merge.rs index 9944872f4..f640af900 100644 --- a/src/merge.rs +++ b/src/merge.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use crate::InternalValue; use crate::comparator::SharedComparator; diff --git a/src/merge_operator.rs b/src/merge_operator.rs index c87cb58ca..33107aae4 100644 --- a/src/merge_operator.rs +++ b/src/merge_operator.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use crate::UserValue; use std::panic::RefUnwindSafe; diff --git a/src/metrics.rs b/src/metrics.rs index d79c363bb..43bb8fd73 100644 --- a/src/metrics.rs +++ b/src/metrics.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use std::sync::atomic::Ordering::Relaxed; use std::sync::atomic::{AtomicU64, AtomicUsize}; diff --git a/src/mvcc_stream.rs b/src/mvcc_stream.rs index 1130b55cb..e26ef2e6d 100644 --- a/src/mvcc_stream.rs +++ b/src/mvcc_stream.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use crate::double_ended_peekable::{DoubleEndedPeekable, DoubleEndedPeekableExt}; use crate::merge_operator::MergeOperator; diff --git a/src/path.rs b/src/path.rs index 18740f5df..eb48508cd 100644 --- a/src/path.rs +++ b/src/path.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use std::path::{Path, PathBuf}; diff --git a/src/pinnable_slice.rs b/src/pinnable_slice.rs index dbf7c76d1..6d6fbb79e 100644 --- a/src/pinnable_slice.rs +++ b/src/pinnable_slice.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation //! Zero-copy value reference that keeps the decompressed block buffer alive. //! diff --git a/src/prefix.rs b/src/prefix.rs index 1c622f4c6..e6febf89c 100644 --- a/src/prefix.rs +++ b/src/prefix.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2025-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation /// Extracts prefixes from keys for prefix bloom filter indexing. /// @@ -109,15 +109,13 @@ pub fn compute_prefix_hash( extractor: Option<&std::sync::Arc>, prefix_bytes: &[u8], ) -> Option { - use crate::table::filter::standard_bloom::Builder; - if prefix_bytes.is_empty() { return None; } extractor .filter(|e| e.is_valid_scan_boundary(prefix_bytes)) - .map(|_| Builder::get_hash(prefix_bytes)) + .map(|_| crate::hash::hash64(prefix_bytes)) } #[cfg(test)] diff --git a/src/range.rs b/src/range.rs index 50335ac7c..23cae986f 100644 --- a/src/range.rs +++ b/src/range.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use crate::{ BoxedIterator, InternalValue, diff --git a/src/range_tombstone.rs b/src/range_tombstone.rs index 8bf8aa925..203e63221 100644 --- a/src/range_tombstone.rs +++ b/src/range_tombstone.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use crate::{SeqNo, UserKey, comparator::UserComparator}; use std::cmp::Reverse; diff --git a/src/range_tombstone_filter.rs b/src/range_tombstone_filter.rs index 09b4af829..f198aaf38 100644 --- a/src/range_tombstone_filter.rs +++ b/src/range_tombstone_filter.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation //! Bidirectional range tombstone filter for iteration. //! diff --git a/src/run_reader.rs b/src/run_reader.rs index 63fe2b9c7..0881f278c 100644 --- a/src/run_reader.rs +++ b/src/run_reader.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use crate::{BoxedIterator, InternalValue, Table, UserKey, version::Run}; use std::{ diff --git a/src/run_scanner.rs b/src/run_scanner.rs index 0de423b30..6846b3238 100644 --- a/src/run_scanner.rs +++ b/src/run_scanner.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use crate::{InternalValue, Table, table::Scanner, version::Run}; use std::sync::Arc; diff --git a/src/seqno.rs b/src/seqno.rs index fc1d8cc9a..5c5395a46 100644 --- a/src/seqno.rs +++ b/src/seqno.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use crate::SeqNo; use std::{ diff --git a/src/slice/mod.rs b/src/slice/mod.rs index 2f82f7e3b..c3ccc4483 100644 --- a/src/slice/mod.rs +++ b/src/slice/mod.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation // Using tokio bytes #[cfg(feature = "bytes_1")] diff --git a/src/slice/slice_bytes/mod.rs b/src/slice/slice_bytes/mod.rs index a229e3adb..e5dab8638 100644 --- a/src/slice/slice_bytes/mod.rs +++ b/src/slice/slice_bytes/mod.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use bytes::Bytes; diff --git a/src/slice/slice_default/mod.rs b/src/slice/slice_default/mod.rs index c294a983c..77c524895 100644 --- a/src/slice/slice_default/mod.rs +++ b/src/slice/slice_default/mod.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use byteview::ByteView; diff --git a/src/slice_windows.rs b/src/slice_windows.rs index a1f2e8bce..59c15c23b 100644 --- a/src/slice_windows.rs +++ b/src/slice_windows.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation pub trait GrowingWindowsExt { fn growing_windows<'a>(&'a self) -> impl Iterator diff --git a/src/stop_signal.rs b/src/stop_signal.rs index 5eaa4c91a..a5cd06f80 100644 --- a/src/stop_signal.rs +++ b/src/stop_signal.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use std::sync::{Arc, atomic::AtomicBool}; diff --git a/src/table/block/binary_index/builder.rs b/src/table/block/binary_index/builder.rs index dcd0d6475..1b6eb6f8d 100644 --- a/src/table/block/binary_index/builder.rs +++ b/src/table/block/binary_index/builder.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2025-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use byteorder::{LittleEndian, WriteBytesExt}; diff --git a/src/table/block/binary_index/mod.rs b/src/table/block/binary_index/mod.rs index bdd12251d..df401c523 100644 --- a/src/table/block/binary_index/mod.rs +++ b/src/table/block/binary_index/mod.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2025-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation mod builder; mod reader; diff --git a/src/table/block/binary_index/reader.rs b/src/table/block/binary_index/reader.rs index 09fa6c060..91b51eee6 100644 --- a/src/table/block/binary_index/reader.rs +++ b/src/table/block/binary_index/reader.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2025-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use byteorder::{LittleEndian, ReadBytesExt}; diff --git a/src/table/block/decoder.rs b/src/table/block/decoder.rs index 916f647b1..d2c4e7ad8 100644 --- a/src/table/block/decoder.rs +++ b/src/table/block/decoder.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2025-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use super::{TRAILER_START_MARKER, binary_index::Reader as BinaryIndexReader}; use crate::{ diff --git a/src/table/block/encoder.rs b/src/table/block/encoder.rs index 212dc848e..04a3beca3 100644 --- a/src/table/block/encoder.rs +++ b/src/table/block/encoder.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2025-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use super::{ super::{ diff --git a/src/table/block/hash_index/builder.rs b/src/table/block/hash_index/builder.rs index 09aa54465..d18b46ea4 100644 --- a/src/table/block/hash_index/builder.rs +++ b/src/table/block/hash_index/builder.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2025-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use super::{MARKER_CONFLICT, MARKER_FREE, calculate_bucket_position}; use byteorder::WriteBytesExt; diff --git a/src/table/block/hash_index/mod.rs b/src/table/block/hash_index/mod.rs index 0e49816ac..9e115b8ff 100644 --- a/src/table/block/hash_index/mod.rs +++ b/src/table/block/hash_index/mod.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2025-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation //! The hash index is a compact (typically <=1 byte per KV) index //! embeddeded into a block to speed up point reads. diff --git a/src/table/block/hash_index/reader.rs b/src/table/block/hash_index/reader.rs index 7b36ffbcd..c2c12a1e3 100644 --- a/src/table/block/hash_index/reader.rs +++ b/src/table/block/hash_index/reader.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2025-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use super::{MARKER_CONFLICT, calculate_bucket_position}; diff --git a/src/table/block/header.rs b/src/table/block/header.rs index 42e34b8f1..3628de6ab 100644 --- a/src/table/block/header.rs +++ b/src/table/block/header.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2025-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use super::Checksum; use crate::checksum::ChecksummedWriter; diff --git a/src/table/block/mod.rs b/src/table/block/mod.rs index 6284c7a51..6faf6493f 100644 --- a/src/table/block/mod.rs +++ b/src/table/block/mod.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2025-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation pub(crate) mod binary_index; // Crate-internal: Decoder, Decodable, ParsedItem are not part of the public API. diff --git a/src/table/block/offset.rs b/src/table/block/offset.rs index db7b80e81..76247846b 100644 --- a/src/table/block/offset.rs +++ b/src/table/block/offset.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2025-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation // TODO: rename FileOffset? #[derive(Copy, Clone, Default, Debug, std::hash::Hash, PartialEq, Eq, Ord, PartialOrd)] diff --git a/src/table/block/trailer.rs b/src/table/block/trailer.rs index 5b86e3f9f..f9c64dd8e 100644 --- a/src/table/block/trailer.rs +++ b/src/table/block/trailer.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2025-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use super::{ Block, diff --git a/src/table/block/type.rs b/src/table/block/type.rs index 1a90a0104..b8d0795cc 100644 --- a/src/table/block/type.rs +++ b/src/table/block/type.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2025-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation #[derive(Copy, Clone, Debug, Eq, PartialEq)] pub enum BlockType { diff --git a/src/table/block_index/full.rs b/src/table/block_index/full.rs index 3ea106b59..19fa31ae8 100644 --- a/src/table/block_index/full.rs +++ b/src/table/block_index/full.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use crate::SeqNo; use crate::comparator::SharedComparator; diff --git a/src/table/block_index/iter.rs b/src/table/block_index/iter.rs index bcf16a8fc..75f333199 100644 --- a/src/table/block_index/iter.rs +++ b/src/table/block_index/iter.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2025-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use crate::{ SeqNo, diff --git a/src/table/block_index/mod.rs b/src/table/block_index/mod.rs index 60fd96af5..55e159ade 100644 --- a/src/table/block_index/mod.rs +++ b/src/table/block_index/mod.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation mod full; pub mod iter; diff --git a/src/table/block_index/two_level.rs b/src/table/block_index/two_level.rs index 17628c35b..982c0a359 100644 --- a/src/table/block_index/two_level.rs +++ b/src/table/block_index/two_level.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use crate::SeqNo; use crate::comparator::SharedComparator; diff --git a/src/table/block_index/volatile.rs b/src/table/block_index/volatile.rs index afc3e04ba..624e8c450 100644 --- a/src/table/block_index/volatile.rs +++ b/src/table/block_index/volatile.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use super::KeyedBlockHandle; use crate::{ diff --git a/src/table/data_block/iter.rs b/src/table/data_block/iter.rs index 100ddb008..ff229988f 100644 --- a/src/table/data_block/iter.rs +++ b/src/table/data_block/iter.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2025-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use crate::{ InternalValue, SeqNo, diff --git a/src/table/data_block/mod.rs b/src/table/data_block/mod.rs index 4a90f89cf..5649ede7c 100644 --- a/src/table/data_block/mod.rs +++ b/src/table/data_block/mod.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2025-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation mod iter; diff --git a/src/table/filter/bit_array/builder.rs b/src/table/filter/bit_array/builder.rs deleted file mode 100644 index 9df79c28b..000000000 --- a/src/table/filter/bit_array/builder.rs +++ /dev/null @@ -1,71 +0,0 @@ -// Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) - -const BIT_MASK: u8 = 0b1000_0000_u8; - -/// Sets a bit in the byte to `true` -#[must_use] -pub fn enable_bit(byte: u8, idx: usize) -> u8 { - let bit_mask = BIT_MASK >> idx; - byte | bit_mask -} - -/// Fixed-size bit array -#[derive(Debug)] -pub struct Builder(Box<[u8]>); - -impl Builder { - #[must_use] - pub fn with_capacity(bytes: usize) -> Self { - let vec = vec![0; bytes]; - Self(vec.into_boxed_slice()) - } - - #[must_use] - pub fn from_bytes(bytes: Box<[u8]>) -> Self { - Self(bytes) - } - - #[must_use] - pub fn bytes(&self) -> &[u8] { - &self.0 - } - - /// Sets the i-th bit - pub fn enable_bit(&mut self, idx: usize) { - let byte_idx = idx / 8; - - #[expect(clippy::expect_used, reason = "we trust the caller")] - let byte = self.0.get_mut(byte_idx).expect("should be in bounds"); - - let bit_idx = idx % 8; - *byte = enable_bit(*byte, bit_idx); - } -} - -#[cfg(test)] -mod tests { - use super::*; - use test_log::test; - - #[test] - fn bit_set_true() { - assert_eq!(0b0000_0010, enable_bit(0, 6)); - assert_eq!(0b1000_0000, enable_bit(0, 0)); - assert_eq!(0b0100_0000, enable_bit(0, 1)); - assert_eq!(0b0100_0110, enable_bit(0b0000_0110, 1)); - } - - #[test] - fn bit_array_builder_basic() { - let mut builder = Builder::with_capacity(1); - assert_eq!(&[0], builder.bytes()); - - builder.enable_bit(0); - assert_eq!(&[0b1000_0000], builder.bytes()); - - builder.enable_bit(7); - assert_eq!(&[0b1000_0001], builder.bytes()); - } -} diff --git a/src/table/filter/bit_array/mod.rs b/src/table/filter/bit_array/mod.rs deleted file mode 100644 index 061a2cd34..000000000 --- a/src/table/filter/bit_array/mod.rs +++ /dev/null @@ -1,9 +0,0 @@ -// Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) - -mod builder; -mod reader; - -pub use builder::Builder; -pub use reader::BitArrayReader; diff --git a/src/table/filter/bit_array/reader.rs b/src/table/filter/bit_array/reader.rs deleted file mode 100644 index bf2ea88d9..000000000 --- a/src/table/filter/bit_array/reader.rs +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) - -const BIT_MASK: u8 = 0b1000_0000_u8; - -/// Gets a bit from the byte -fn get_bit(byte: u8, idx: usize) -> bool { - let bit_mask = BIT_MASK >> idx; - - let masked = byte & bit_mask; - masked > 0 -} - -/// Fixed-size bit array reader -#[derive(Debug)] -pub struct BitArrayReader<'a>(&'a [u8]); - -impl<'a> BitArrayReader<'a> { - #[must_use] - pub fn new(bytes: &'a [u8]) -> Self { - Self(bytes) - } - - #[must_use] - pub fn bytes(&self) -> &[u8] { - self.0 - } - - /// Gets the i-th bit. - #[must_use] - pub fn get(&self, idx: usize) -> bool { - let byte_idx = idx / 8; - - #[expect(clippy::expect_used, reason = "we trust the caller")] - let byte = self.0.get(byte_idx).expect("should be in bounds"); - - let bit_idx = idx % 8; - get_bit(*byte, bit_idx) - } -} diff --git a/src/table/filter/block.rs b/src/table/filter/block.rs index 2dc43b512..82243e91f 100644 --- a/src/table/filter/block.rs +++ b/src/table/filter/block.rs @@ -1,8 +1,8 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation -use crate::table::{Block, filter::standard_bloom::StandardBloomFilterReader}; +use crate::table::{Block, filter::ribbon::burr::contains_hash_from_bytes}; #[derive(Clone)] pub struct FilterBlock(Block); @@ -14,7 +14,23 @@ impl FilterBlock { } pub fn maybe_contains_hash(&self, hash: u64) -> crate::Result { - Ok(StandardBloomFilterReader::new(&self.0.data)?.contains_hash(hash)) + // Empty payload is the "no filter installed" sentinel produced + // by build_burr_filter_bytes for empty key sets and by + // BurrFilter::to_wire_bytes for zero-layer filters. Probing + // such a buffer must report Ok(true) (permissive) so the + // caller falls through to the data block lookup; forwarding + // it to contains_hash_from_bytes would fail the magic check + // and surface InvalidHeader on every read of a filter-less + // partition. + if self.0.data.is_empty() { + return Ok(true); + } + // Single-pass parse + probe — no per-call heap allocation. The + // alternative `BurrFilterReader::new(bytes)?.contains_hash(hash)` + // builds a `Vec` inside `wire::decode`; we are on + // the table read hot path (`Table::check_bloom` calls this per + // candidate table) so amortising that allocation matters. + contains_hash_from_bytes(&self.0.data, hash) } /// Returns the block size in bytes. @@ -23,3 +39,44 @@ impl FilterBlock { self.0.size() } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::Slice; + use crate::checksum::Checksum; + use crate::table::Block; + use crate::table::block::{BlockType, Header}; + + fn empty_filter_block() -> FilterBlock { + // Sentinel "no filter" payload: empty data slice. Matches what + // build_burr_filter_bytes returns for an empty key set and what + // BurrFilter::to_wire_bytes returns for a zero-layer filter. + let block = Block { + header: Header { + block_type: BlockType::Filter, + checksum: Checksum::from_raw(0), + data_length: 0, + uncompressed_length: 0, + }, + data: Slice::empty(), + }; + FilterBlock::new(block) + } + + #[test] + fn maybe_contains_hash_empty_payload_returns_true() { + // Empty payload is the sentinel for "no filter installed for this + // table" — probes must report Ok(true) (permissive) so the caller + // falls through to the actual data block lookup. Forwarding the + // empty buffer to contains_hash_from_bytes returns InvalidHeader, + // which turns every read on a filter-less partition into a hard + // error. + let fb = empty_filter_block(); + let result = fb.maybe_contains_hash(0xDEAD_BEEF_CAFE_F00D); + assert!( + matches!(result, Ok(true)), + "expected Ok(true) for empty filter payload, got {result:?}", + ); + } +} diff --git a/src/table/filter/mod.rs b/src/table/filter/mod.rs index 630fca835..270a87fa0 100644 --- a/src/table/filter/mod.rs +++ b/src/table/filter/mod.rs @@ -1,12 +1,22 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation -pub mod bit_array; pub mod block; -pub mod standard_bloom; +pub mod ribbon; -use standard_bloom::Builder as StandardBloomFilterBuilder; +use ribbon::burr::{BurrBuilder, BurrParams}; +use std::collections::hash_map::DefaultHasher; +use std::hash::BuildHasherDefault; + +/// Hasher type embedded in `BuRR` filters. Only its type identity is used — +/// the construction + probe paths in this crate go through +/// `BurrBuilder::build_from_hashes` / `BurrFilter::contains_hash` / +/// `BurrFilterReader::contains_hash`, all of which take pre-computed u64 +/// hashes (xxh3 via `crate::hash::hash64`) and never invoke the +/// `BuildHasher`. The type slot exists only to satisfy the vendored +/// ribbon-filter's generic `S: BuildHasher` bound. +type FilterHasher = BuildHasherDefault; #[derive(Copy, Clone, Debug, PartialEq)] pub enum BloomConstructionPolicy { @@ -21,96 +31,229 @@ impl Default for BloomConstructionPolicy { } impl BloomConstructionPolicy { + /// Returns `true` if this policy can produce a valid filter + /// (`burr_params` would return `Some` for any non-zero `n`). False + /// means the writer should skip filter construction entirely + /// instead of buffering hashes that will later be dropped. #[must_use] - pub fn init(&self, n: usize) -> StandardBloomFilterBuilder { - use standard_bloom::Builder; - - match self { - Self::BitsPerKey(bpk) => Builder::with_bpk(n, *bpk), - Self::FalsePositiveRate(fpr) => Builder::with_fp_rate(n, *fpr), - } + pub fn is_active(&self) -> bool { + // Delegate to `burr_params` so this method is exact-equivalent + // to "would this policy produce a non-empty filter for n=1?". + // Anything stricter (e.g. fpr too small → r > 64) is captured + // by the params constructor's own validation. + self.burr_params(1).is_some() } - #[must_use] - pub fn is_active(&self) -> bool { + /// Build `BurrParams` for the given key count under this policy. + /// + /// Returns `None` if `n == 0` or the policy translates to an invalid + /// `BurrParams` (e.g. `bpk > 64` or `fpr` outside `(0,1)`). Callers + /// should treat `None` as "skip filter construction for this block". + pub(crate) fn burr_params(self, n: usize) -> Option { + if n == 0 { + return None; + } match self { - Self::BitsPerKey(bpk) => *bpk > 0.0, - Self::FalsePositiveRate(fpr) => *fpr > 0.0, + Self::BitsPerKey(bpk) => BurrParams::with_bpk(n, bpk).ok(), + Self::FalsePositiveRate(fpr) => BurrParams::with_fp_rate(n, fpr).ok(), } } /// Returns the estimated filter size in bytes. + /// + /// Returns `0` if the policy is inactive for the given `n` + /// (`burr_params` would return `None`). Otherwise estimates the + /// `BuRR` body size as `n * r * 1.05 / 8` — `r` is the fingerprint + /// width chosen by the params constructor, `1.05` is a flat 5% + /// overhead for layer thresholds + last-layer enlargement. #[must_use] + #[expect( + clippy::cast_precision_loss, + clippy::cast_possible_truncation, + clippy::cast_sign_loss, + reason = "estimation, precision loss is acceptable" + )] pub fn estimated_filter_size(&self, n: usize) -> usize { - if n == 0 { + // Delegate to burr_params so the estimate is 0 exactly when the + // builder would also return empty — keeps memory accounting in + // sync with build behavior. + let Some(params) = self.burr_params(n) else { return 0; - } - - #[expect( - clippy::cast_precision_loss, - clippy::cast_possible_truncation, - clippy::cast_sign_loss, - reason = "truncation and precision loss are fine because this is an estimation" - )] - match self { - Self::BitsPerKey(bpk) => (*bpk * (n as f32)) as usize / 8, - Self::FalsePositiveRate(fpr) => { - let m = StandardBloomFilterBuilder::calculate_m(n, *fpr); - let bpk = (m / n) as f32; - (bpk * (n as f32)) as usize / 8 - } - } + }; + let r_bits = f32::from(params.r); + ((n as f32) * r_bits * 1.05 / 8.0) as usize } } -#[derive(Copy, Clone, PartialEq, Eq, Debug)] -enum FilterType { - StandardBloom, - BlockedBloom, +/// Build a `BuRR` filter block payload from pre-hashed keys under the given +/// policy. Returns the serialized wire bytes the +/// [`block::FilterBlock`] reader can parse. +/// +/// Returns an empty `Vec` if `hashes` is empty or the policy parameters +/// are invalid for `n = hashes.len()` — callers should treat that as +/// "no filter for this block". +/// +/// Consumes `hashes` so the writer's accumulated `bloom_hash_buffer` can +/// be `mem::take`n straight in without a `to_vec()` copy at the boundary. +pub(crate) fn build_burr_filter_bytes( + policy: BloomConstructionPolicy, + hashes: Vec, +) -> crate::Result> { + if hashes.is_empty() { + return Ok(Vec::new()); + } + let Some(params) = policy.burr_params(hashes.len()) else { + return Ok(Vec::new()); + }; + let build_hasher = FilterHasher::default(); + let builder = BurrBuilder::new(params, build_hasher).map_err(|e| { + log::error!("BuRR builder init failed: {e:?}"); + crate::Error::Unrecoverable + })?; + let filter = builder.build_from_hashes_owned(hashes).map_err(|e| { + log::error!("BuRR build_from_hashes failed: {e:?}"); + crate::Error::Unrecoverable + })?; + Ok(filter.to_wire_bytes()) } -impl TryFrom for FilterType { - type Error = crate::Error; +#[cfg(test)] +#[expect(clippy::expect_used, reason = "test code")] +#[expect(clippy::unwrap_used, reason = "test code")] +mod tests { + use super::*; + use test_log::test; + + #[test] + fn burr_estimated_size_bpk() { + let policy = BloomConstructionPolicy::BitsPerKey(10.0); + let n = 1_000_000; + let estimated_size = policy.estimated_filter_size(n); + // 10 bits/key × 1M keys × 1.05 overhead / 8 ≈ 1.31 MB + assert!(estimated_size > 1_200_000); + assert!(estimated_size < 1_400_000); + } - fn try_from(value: u8) -> Result { - match value { - 0 => Ok(Self::StandardBloom), - 1 => Ok(Self::BlockedBloom), - _ => Err(crate::Error::InvalidTag(("FilterType", value))), - } + #[test] + fn burr_estimated_size_fpr() { + let policy = BloomConstructionPolicy::FalsePositiveRate(0.01); + let n = 1_000_000; + let estimated_size = policy.estimated_filter_size(n); + // ceil(-log2(0.01)) = 7 bits/key → 7M bits × 1.05 / 8 ≈ 918 KB + assert!(estimated_size > 800_000); + assert!(estimated_size < 1_000_000); } -} -impl From for u8 { - fn from(value: FilterType) -> Self { - match value { - FilterType::StandardBloom => 0, - FilterType::BlockedBloom => 1, + #[test] + fn build_burr_filter_bytes_empty_returns_empty() { + let policy = BloomConstructionPolicy::BitsPerKey(10.0); + let bytes = build_burr_filter_bytes(policy, Vec::new()).unwrap(); + assert!(bytes.is_empty()); + } + + #[test] + fn build_burr_filter_bytes_round_trips_via_reader() { + use crate::table::filter::ribbon::burr::BurrFilterReader; + let policy = BloomConstructionPolicy::FalsePositiveRate(0.01); + let hashes: Vec = (0..1_000_u64) + .map(|i| crate::hash::hash64(&i.to_le_bytes())) + .collect(); + let bytes = build_burr_filter_bytes(policy, hashes.clone()).unwrap(); + assert!(!bytes.is_empty()); + let reader = BurrFilterReader::new(&bytes).expect("reader"); + for h in &hashes { + assert!(reader.contains_hash(*h), "inserted hash {h} not found"); } } } #[cfg(test)] -mod tests { +#[expect(clippy::expect_used, reason = "test code")] +#[expect(clippy::unwrap_used, reason = "test code")] +mod extra_tests { use super::*; use test_log::test; #[test] - fn bloom_estimated_size_bpk() { + fn policy_default_is_bits_per_key_10() { + let policy = BloomConstructionPolicy::default(); + assert_eq!(policy, BloomConstructionPolicy::BitsPerKey(10.0)); + } + + #[test] + fn is_active_false_for_bpk_below_one() { + assert!(!BloomConstructionPolicy::BitsPerKey(0.5).is_active()); + assert!(!BloomConstructionPolicy::BitsPerKey(0.0).is_active()); + } + + #[test] + fn is_active_false_for_bpk_above_64() { + assert!(!BloomConstructionPolicy::BitsPerKey(70.0).is_active()); + } + + #[test] + fn is_active_true_for_valid_bpk() { + assert!(BloomConstructionPolicy::BitsPerKey(10.0).is_active()); + assert!(BloomConstructionPolicy::BitsPerKey(1.0).is_active()); + assert!(BloomConstructionPolicy::BitsPerKey(64.0).is_active()); + } + + #[test] + fn is_active_false_for_fpr_out_of_range() { + assert!(!BloomConstructionPolicy::FalsePositiveRate(0.0).is_active()); + assert!(!BloomConstructionPolicy::FalsePositiveRate(-0.1).is_active()); + assert!(!BloomConstructionPolicy::FalsePositiveRate(1.0).is_active()); + assert!(!BloomConstructionPolicy::FalsePositiveRate(1.5).is_active()); + // Too tight — would map to r > 64. + assert!(!BloomConstructionPolicy::FalsePositiveRate(1.0e-25_f32).is_active()); + } + + #[test] + fn is_active_true_for_valid_fpr() { + assert!(BloomConstructionPolicy::FalsePositiveRate(0.01).is_active()); + assert!(BloomConstructionPolicy::FalsePositiveRate(0.0001).is_active()); + assert!(BloomConstructionPolicy::FalsePositiveRate(0.5).is_active()); + } + + #[test] + fn estimated_size_zero_n_returns_zero() { let policy = BloomConstructionPolicy::BitsPerKey(10.0); - let n = 1_000_000; - let estimated_size = policy.estimated_filter_size(n); - // For 1 million keys and 10 bits per key, the size should be around 1.25 MB - assert_eq!(estimated_size, 1_250_000); + assert_eq!(policy.estimated_filter_size(0), 0); + let policy_fpr = BloomConstructionPolicy::FalsePositiveRate(0.01); + assert_eq!(policy_fpr.estimated_filter_size(0), 0); } #[test] - fn bloom_estimated_size_fpr() { + fn burr_params_returns_none_for_n_zero() { + let policy = BloomConstructionPolicy::BitsPerKey(10.0); + assert!(policy.burr_params(0).is_none()); + } + + #[test] + fn burr_params_returns_some_for_valid_inputs() { + let policy = BloomConstructionPolicy::BitsPerKey(10.0); + let params = policy.burr_params(100).expect("valid"); + assert_eq!(params.n, 100); + assert_eq!(params.r, 10); + } + + #[test] + fn burr_params_fpr_variant() { let policy = BloomConstructionPolicy::FalsePositiveRate(0.01); - let n = 1_000_000; - let estimated_size = policy.estimated_filter_size(n); - // For 1 million keys and 1% false positive rate, the size should be around 1.2 MB - assert!(estimated_size < 1_300_000); - assert!(estimated_size > 1_100_000); + let params = policy.burr_params(100).expect("valid"); + assert_eq!(params.n, 100); + // r = ceil(-log2(0.01)) = 7 + assert_eq!(params.r, 7); + } + + #[test] + fn build_burr_filter_bytes_invalid_policy_returns_empty() { + // Policy too tight → burr_params returns None → empty bytes. + let policy = BloomConstructionPolicy::FalsePositiveRate(1.0e-25_f32); + let hashes: Vec = (0..10) + .map(|i: u64| crate::hash::hash64(&i.to_le_bytes())) + .collect(); + let bytes = build_burr_filter_bytes(policy, hashes).unwrap(); + assert!(bytes.is_empty()); } } diff --git a/src/table/filter/ribbon/_vendored/LICENSE-APACHE b/src/table/filter/ribbon/_vendored/LICENSE-APACHE new file mode 100644 index 000000000..509af29dc --- /dev/null +++ b/src/table/filter/ribbon/_vendored/LICENSE-APACHE @@ -0,0 +1,173 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS diff --git a/src/table/filter/ribbon/_vendored/LICENSE-MIT b/src/table/filter/ribbon/_vendored/LICENSE-MIT new file mode 100644 index 000000000..2616ab09d --- /dev/null +++ b/src/table/filter/ribbon/_vendored/LICENSE-MIT @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) William Rågstad and ribbon-filter contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/src/table/filter/ribbon/builder.rs b/src/table/filter/ribbon/builder.rs new file mode 100644 index 000000000..bde28e17a --- /dev/null +++ b/src/table/filter/ribbon/builder.rs @@ -0,0 +1,468 @@ +use std::hash::BuildHasher; + +use super::error::{BuildError, ConstructionFailure}; +use super::filter::RibbonFilter; +use super::hashing::{ + SplitMix64, StandardEquation, derive_attempt_seed, for_each_set_bit_u128_parts, + standard_equation_from_hash, standard_equation_w64, xor_words, +}; +use super::params::{Mode, Params}; + +#[derive(Debug, Clone)] +pub struct Scratch { + pub(crate) fingerprint: Vec, + pub(crate) acc: Vec, +} + +impl Scratch { + pub(crate) fn new(stride_words: usize) -> Self { + Self { + fingerprint: vec![0; stride_words], + acc: vec![0; stride_words], + } + } + + pub(crate) fn reset(&mut self) { + self.fingerprint.fill(0); + self.acc.fill(0); + } +} + +#[derive(Debug, Clone)] +pub struct RibbonBuilder { + params: Params, + build_hasher: S, +} + +impl RibbonBuilder +where + S: BuildHasher + Clone, +{ + pub fn new(params: Params, build_hasher: S) -> Result { + params.validate().map_err(BuildError::InvalidParams)?; + Ok(Self { + params, + build_hasher, + }) + } + + pub fn params(&self) -> Params { + self.params + } + + pub fn hasher(&self) -> &S { + &self.build_hasher + } + + /// Build a Ribbon filter using a CALLER-PROVIDED seed verbatim — no + /// `derive_attempt_seed` mixing on top. This is the entry point BuRR + /// uses to keep the threshold-decision seed and the actual + /// construction seed identical, so the per-block bump decisions made + /// from precomputed equations agree with the equations the ribbon + /// stores for its kept keys. + /// + /// No retry budget: a single attempt with the given seed. Caller is + /// responsible for sizing `m` (via `Params::m`) generously enough that + /// the banded solver succeeds with the keys provided. + pub(crate) fn build_with_seed_verbatim( + &self, + keys: &[K], + seed: u64, + m: usize, + ) -> Result, BuildError> { + self.params.validate().map_err(BuildError::InvalidParams)?; + self.build_once(keys, m, seed) + .map_err(|failure| BuildError::ConstructionFailed { + final_m: m, + attempts: 1, + last_failure: failure, + }) + } + + /// Build a Ribbon filter from already-hashed keys (each `u64` is + /// treated as the value `BuildHasher::hash_one(key)` would have + /// produced). Verbatim seed, no retry. + /// + /// Used by BuRR when the LSM has already computed a stable u64 + /// hash for each key (via `crate::hash::hash64` / xxh3) — running + /// the BuildHasher again would just double-hash the same bytes. + pub(crate) fn build_with_seed_verbatim_from_hashes( + &self, + hashes: &[u64], + seed: u64, + m: usize, + ) -> Result, BuildError> { + self.params.validate().map_err(BuildError::InvalidParams)?; + self.build_once_from_hashes(hashes, m, seed) + .map_err(|failure| BuildError::ConstructionFailed { + final_m: m, + attempts: 1, + last_failure: failure, + }) + } + + pub fn build(&self, keys: &[K]) -> Result, BuildError> { + self.params.validate().map_err(BuildError::InvalidParams)?; + + let mut attempts = 0usize; + let mut current_m = self.params.m; + let mut last_failure = None; + + for grow_step in 0..=self.params.grow_limit { + for retry_step in 0..self.params.retry_limit { + attempts += 1; + let attempt_index = ((grow_step as u64) << 32) | retry_step as u64; + let seed = derive_attempt_seed(self.params.seed, attempt_index); + + match self.build_once(keys, current_m, seed) { + Ok(filter) => return Ok(filter), + Err(err) => last_failure = Some(err), + } + + if matches!(self.params.mode, Mode::Homogeneous) { + break; + } + } + + if matches!(self.params.mode, Mode::Homogeneous) { + break; + } + + if grow_step < self.params.grow_limit { + let w = self.params.w; + // Unchecked multiplication can wrap in release builds for + // caller-supplied `m` near usize::MAX, leaving `current_m` + // smaller than `w` and breaking later invariants. Fail + // construction explicitly when the grown size would + // overflow. + let Some(grown) = current_m.checked_mul(w + 1).map(|raw| raw.div_ceil(w)) else { + return Err(BuildError::ConstructionFailed { + final_m: current_m, + attempts, + last_failure: last_failure.unwrap_or( + ConstructionFailure::InconsistentEquation { + key_index: 0, + row_index: 0, + }, + ), + }); + }; + current_m = grown; + debug_assert!(current_m >= self.params.w); + } + } + + Err(BuildError::ConstructionFailed { + final_m: current_m, + attempts, + last_failure: last_failure.unwrap_or(ConstructionFailure::InconsistentEquation { + key_index: 0, + row_index: 0, + }), + }) + } + + fn build_once( + &self, + keys: &[K], + m: usize, + seed: u64, + ) -> Result, ConstructionFailure> { + debug_assert!(m >= self.params.w); + + let stride_words = self.params.fingerprint_words(); + // `m * stride_words` would overflow `usize` if `m` is set + // unreasonably large; allocate via the checked product and bail + // before the vec! call panics. + let total_words = m + .checked_mul(stride_words) + .ok_or(ConstructionFailure::StorageLengthOverflow { m, stride_words })?; + let fp_last_mask = self.params.fingerprint_last_word_mask(); + let mut occupied = vec![false; m]; + let mut coeff_lo = vec![0u64; m]; + let mut coeff_hi = vec![0u64; m]; + let mut rhs = vec![0u64; total_words]; + + let mut key_fp = vec![0u64; stride_words]; + + for (key_index, key) in keys.iter().enumerate() { + key_fp.fill(0); + let equation = standard_equation_w64( + &self.build_hasher, + key, + seed, + &Params { m, ..self.params }, + &mut key_fp, + ); + + let mut i = equation.start; + let mut c_lo = equation.coeff_lo; + let mut c_hi = equation.coeff_hi; + let mut b = key_fp.clone(); + + if i >= m { + return Err(ConstructionFailure::OutOfBounds { + key_index: Some(key_index), + row_index: i, + m, + }); + } + + loop { + if !occupied[i] { + occupied[i] = true; + coeff_lo[i] = c_lo; + coeff_hi[i] = c_hi; + rhs[i * stride_words..(i + 1) * stride_words].copy_from_slice(&b); + break; + } + + c_lo ^= coeff_lo[i]; + c_hi ^= coeff_hi[i]; + xor_words(&mut b, &rhs[i * stride_words..(i + 1) * stride_words]); + + if c_lo == 0 && c_hi == 0 { + if b.iter().all(|&x| x == 0) { + break; + } + return Err(ConstructionFailure::InconsistentEquation { + key_index, + row_index: i, + }); + } + + let shift = if c_lo != 0 { + c_lo.trailing_zeros() as usize + } else { + 64 + c_hi.trailing_zeros() as usize + }; + i += shift; + if i >= m { + return Err(ConstructionFailure::OutOfBounds { + key_index: Some(key_index), + row_index: i, + m, + }); + } + if shift >= 64 { + c_lo = c_hi >> (shift - 64); + c_hi = 0; + } else if shift > 0 { + c_lo = (c_lo >> shift) | (c_hi << (64 - shift)); + c_hi >>= shift; + } + } + } + + let mut z = vec![0u64; total_words]; + if matches!(self.params.mode, Mode::Homogeneous) { + let mut rng = SplitMix64::new(seed ^ 0xD1B5_4A32_D192_ED03); + for (i, is_occupied) in occupied.iter().enumerate().take(m) { + if *is_occupied { + continue; + } + + let row_start = i * stride_words; + let row_end = row_start + stride_words; + for word in &mut z[row_start..row_end] { + *word = rng.next_u64(); + } + z[row_end - 1] &= fp_last_mask; + } + } + + for i in (0..m).rev() { + if !occupied[i] { + continue; + } + + let row_start = i * stride_words; + let row_end = row_start + stride_words; + + z[row_start..row_end].copy_from_slice(&rhs[row_start..row_end]); + + let upper_lo = coeff_lo[i] & !1u64; + let upper_hi = coeff_hi[i]; + let mut row_offsets = Vec::with_capacity(self.params.w.saturating_sub(1)); + for_each_set_bit_u128_parts(upper_lo, upper_hi, |offset| { + row_offsets.push(offset); + }); + + for offset in row_offsets { + let row_index = i + offset; + if row_index >= m { + return Err(ConstructionFailure::OutOfBounds { + key_index: None, + row_index, + m, + }); + } + let other_start = row_index * stride_words; + let (left, right) = z.split_at_mut(other_start); + let row = &mut left[row_start..row_end]; + let other = &right[..stride_words]; + xor_words(row, other); + } + + z[row_end - 1] &= fp_last_mask; + } + let mut built_params = self.params; + built_params.m = m; + built_params.seed = seed; + + Ok(RibbonFilter::new( + built_params, + self.build_hasher.clone(), + z, + )) + } + + /// Variant of [`Self::build_once`] that takes pre-computed key hashes + /// instead of `Hash` keys. Otherwise identical algorithm. + /// + /// Used by BuRR through `build_with_seed_verbatim_from_hashes` so the + /// LSM-side stable u64 hash (xxh3 / `crate::hash::hash64`) flows + /// straight into the banded solver without re-hashing through the + /// `BuildHasher`. + fn build_once_from_hashes( + &self, + hashes: &[u64], + m: usize, + seed: u64, + ) -> Result, ConstructionFailure> { + debug_assert!(m >= self.params.w); + + let stride_words = self.params.fingerprint_words(); + let total_words = m + .checked_mul(stride_words) + .ok_or(ConstructionFailure::StorageLengthOverflow { m, stride_words })?; + let fp_last_mask = self.params.fingerprint_last_word_mask(); + let mut occupied = vec![false; m]; + let mut coeff_lo = vec![0u64; m]; + let mut coeff_hi = vec![0u64; m]; + let mut rhs = vec![0u64; total_words]; + + let mut key_fp = vec![0u64; stride_words]; + let layer_params = Params { m, ..self.params }; + + for (key_index, hash) in hashes.iter().enumerate() { + key_fp.fill(0); + let equation: StandardEquation = + standard_equation_from_hash(*hash, seed, &layer_params, &mut key_fp); + + let mut i = equation.start; + let mut c_lo = equation.coeff_lo; + let mut c_hi = equation.coeff_hi; + let mut b = key_fp.clone(); + + if i >= m { + return Err(ConstructionFailure::OutOfBounds { + key_index: Some(key_index), + row_index: i, + m, + }); + } + + loop { + if !occupied[i] { + occupied[i] = true; + coeff_lo[i] = c_lo; + coeff_hi[i] = c_hi; + rhs[i * stride_words..(i + 1) * stride_words].copy_from_slice(&b); + break; + } + + c_lo ^= coeff_lo[i]; + c_hi ^= coeff_hi[i]; + xor_words(&mut b, &rhs[i * stride_words..(i + 1) * stride_words]); + + if c_lo == 0 && c_hi == 0 { + if b.iter().all(|&x| x == 0) { + break; + } + return Err(ConstructionFailure::InconsistentEquation { + key_index, + row_index: i, + }); + } + + let shift = if c_lo != 0 { + c_lo.trailing_zeros() as usize + } else { + 64 + c_hi.trailing_zeros() as usize + }; + i += shift; + if i >= m { + return Err(ConstructionFailure::OutOfBounds { + key_index: Some(key_index), + row_index: i, + m, + }); + } + if shift >= 64 { + c_lo = c_hi >> (shift - 64); + c_hi = 0; + } else if shift > 0 { + c_lo = (c_lo >> shift) | (c_hi << (64 - shift)); + c_hi >>= shift; + } + } + } + + let mut z = vec![0u64; total_words]; + if matches!(self.params.mode, Mode::Homogeneous) { + let mut rng = SplitMix64::new(seed ^ 0xD1B5_4A32_D192_ED03); + for (i, is_occupied) in occupied.iter().enumerate().take(m) { + if *is_occupied { + continue; + } + let row_start = i * stride_words; + let row_end = row_start + stride_words; + for word in &mut z[row_start..row_end] { + *word = rng.next_u64(); + } + z[row_end - 1] &= fp_last_mask; + } + } + + for i in (0..m).rev() { + if !occupied[i] { + continue; + } + let row_start = i * stride_words; + let row_end = row_start + stride_words; + z[row_start..row_end].copy_from_slice(&rhs[row_start..row_end]); + let upper_lo = coeff_lo[i] & !1u64; + let upper_hi = coeff_hi[i]; + let mut row_offsets = Vec::with_capacity(self.params.w.saturating_sub(1)); + for_each_set_bit_u128_parts(upper_lo, upper_hi, |offset| { + row_offsets.push(offset); + }); + for offset in row_offsets { + let row_index = i + offset; + if row_index >= m { + return Err(ConstructionFailure::OutOfBounds { + key_index: None, + row_index, + m, + }); + } + let other_start = row_index * stride_words; + let (left, right) = z.split_at_mut(other_start); + let row = &mut left[row_start..row_end]; + let other = &right[..stride_words]; + xor_words(row, other); + } + z[row_end - 1] &= fp_last_mask; + } + let mut built_params = self.params; + built_params.m = m; + built_params.seed = seed; + + Ok(RibbonFilter::new( + built_params, + self.build_hasher.clone(), + z, + )) + } +} diff --git a/src/table/filter/ribbon/burr/builder.rs b/src/table/filter/ribbon/burr/builder.rs new file mode 100644 index 000000000..88dfe48c1 --- /dev/null +++ b/src/table/filter/ribbon/burr/builder.rs @@ -0,0 +1,367 @@ +use std::hash::{BuildHasher, Hash}; + +use super::super::builder::RibbonBuilder; +use super::super::hashing::{StandardEquation, standard_equation_w64}; +use super::super::params::{Mode, Params}; +use super::error::BurrBuildError; +use super::filter::{BurrFilter, BurrLayer}; +use super::params::BurrParams; +use super::threshold::{compute_thresholds, partition_keys_by_threshold}; + +/// Builds a BuRR filter from a key set. +/// +/// # Construction sketch +/// +/// For each layer (0 to `max_layers - 1`): +/// 1. Hash every input key with the layer's derived seed to produce a +/// `StandardEquation` (gives `start = block_idx * b + offset`). +/// 2. Run [`compute_thresholds`] over those equations to pick per-block +/// threshold `τ_i`. A key with `offset < τ_i` is KEPT in this layer; +/// a key with `offset >= τ_i` is BUMPED to the next layer. +/// 3. Partition keys into `kept` and `bumped` via +/// [`partition_keys_by_threshold`]. +/// 4. Build a vendored Standard Ribbon over `kept` — the threshold +/// scheme caps per-block load to ~90%, so this build succeeds with +/// negligible probability of falling into Ribbon's +/// retry-with-different-seed path. +/// 5. Push `BurrLayer { thresholds, ribbon }` onto the layer stack. +/// 6. Recurse with `remaining = bumped`. +/// +/// The last layer cannot bump (there is no next layer), so it forces +/// `thresholds[..] = b` (accept everything) and is sized with an enlarged +/// `m` and generous retry+grow budget so the Ribbon build is guaranteed +/// to absorb its residual. +pub struct BurrBuilder { + params: BurrParams, + hasher: S, +} + +impl core::fmt::Debug for BurrBuilder { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_struct("BurrBuilder") + .field("params", &self.params) + .finish() + } +} + +impl BurrBuilder +where + S: BuildHasher + Clone, +{ + pub fn new(params: BurrParams, hasher: S) -> Result { + if params.n == 0 { + return Err(BurrBuildError::InvalidParams("n must be > 0")); + } + if !(1..=64).contains(¶ms.r) { + return Err(BurrBuildError::InvalidParams("r must be in 1..=64")); + } + // w == 64 is a hard invariant: the BuRR probe path + // (BurrFilter::contains_hash, wire::contains_hash) iterates set + // bits of coeff_lo (u64) ONLY and asserts coeff_hi == 0 via + // debug_assert. A w > 64 build would silently produce filters + // that the probe path misses bits on → false negatives. The + // BurrParams constructors (with_fp_rate, with_bpk) pin w to + // 64; this check defends against callers that build params by + // hand or via deserialisation. + if params.w != 64 { + return Err(BurrBuildError::InvalidParams("w must be exactly 64")); + } + if params.b == 0 { + return Err(BurrBuildError::InvalidParams("b must be > 0")); + } + // `BurrParams::layer_m` rounds the per-layer slot count up to a + // multiple of `b` and floors at `b`, so its result is always + // `>= b`. But the vendored Ribbon `Params::new(m, w=64, ...)` + // also requires `m >= w`. If `b < w` (= 64), `layer_m` can hand + // Ribbon an `m` between `b` and `w-1`, which Ribbon rejects + // with a "vendored ribbon param error" that hides the real + // invariant. Enforce `b >= w` here so the floor on `layer_m` + // is at least `w` and Ribbon never sees an undersized layer. + if params.b < params.w { + return Err(BurrBuildError::InvalidParams("b must be >= w")); + } + if params.max_layers == 0 { + return Err(BurrBuildError::InvalidParams("max_layers must be > 0")); + } + Ok(Self { params, hasher }) + } + + /// Build from pre-computed u64 key hashes (e.g. xxh3 outputs from + /// `crate::hash::hash64`). Bypasses `BuildHasher::hash_one` — the + /// `S` parameter is only used as the type slot for the eventual + /// `BurrFilter` return value (which carries it for API + /// compatibility with the key-based `contains_in`); no key → + /// hash work happens here. + /// + /// This is the entry point the LSM filter writer uses: it has + /// already hashed every key with xxh3 for filter-block indexing + /// and pipes those u64s directly into BuRR. + pub fn build_from_hashes(&self, hashes: &[u64]) -> Result, BurrBuildError> { + // Borrowed-slice variant — copies the input into the per-layer + // working buffer. Use [`Self::build_from_hashes_owned`] to move + // an existing `Vec` instead, avoiding the up-front clone + // on large filter partitions. + self.build_from_hashes_owned(hashes.to_vec()) + } + + /// Same as [`Self::build_from_hashes`] but consumes a caller-owned + /// `Vec` directly, saving the up-front `to_vec()` clone. The + /// filter writer uses this on its accumulated `bloom_hash_buffer` + /// so per-partition construction doesn't pay the copy cost twice + /// (once here, once during the per-layer recursion). + pub fn build_from_hashes_owned( + &self, + hashes: Vec, + ) -> Result, BurrBuildError> { + // Empty input would produce a zero-layer filter that + // `to_wire_bytes` correctly serialises as an empty Vec, but + // `BurrFilterReader::new` rejects num_layers == 0 — so the + // build → to_wire_bytes → read round-trip breaks for empty + // input. Reject up front so callers see the error at build + // time rather than at the first read. + if hashes.is_empty() { + return Err(BurrBuildError::InvalidParams("key set must be non-empty")); + } + let mut remaining: Vec = hashes; + let mut layers: Vec> = Vec::with_capacity(usize::from(self.params.max_layers)); + + for layer_idx in 0..self.params.max_layers { + if remaining.is_empty() { + break; + } + + let is_last_layer = layer_idx + 1 == self.params.max_layers; + let layer_seed = derive_layer_seed(self.params.seed, layer_idx); + let layer_input = remaining.len(); + + let m_target = self.params.layer_m(layer_input); + let m = if is_last_layer { + let doubled = m_target.saturating_mul(2); + doubled.max(usize::from(self.params.b) * 4) + } else { + m_target + }; + + let layer_w = usize::from(self.params.w); + let layer_r = usize::from(self.params.r); + let equation_params = Params::new(m, layer_w, layer_r, Mode::Standard) + .map_err(static_param_err)? + .with_seed(layer_seed); + + // Compute equations directly from hashes — skip hash_one. + let stride = layer_r.div_ceil(64); + let mut fp_throwaway = vec![0_u64; stride]; + let mut equations: Vec = Vec::with_capacity(remaining.len()); + for hash in &remaining { + fp_throwaway.fill(0); + let eq = super::super::hashing::standard_equation_from_hash( + *hash, + layer_seed, + &equation_params, + &mut fp_throwaway, + ); + equations.push(eq); + } + + let thresholds = if is_last_layer { + let block_count = m.div_ceil(usize::from(self.params.b)); + vec![self.params.b; block_count] + } else { + compute_thresholds(&equations, m, self.params.b) + }; + + let (kept, bumped) = + partition_keys_by_threshold(&remaining, &equations, &thresholds, self.params.b); + + let ribbon_builder = + RibbonBuilder::new(equation_params, self.hasher.clone()).map_err(|e| { + BurrBuildError::RibbonLayerFailed { + layer_index: usize::from(layer_idx), + ribbon_error: e, + } + })?; + + let ribbon = ribbon_builder + .build_with_seed_verbatim_from_hashes(&kept, layer_seed, m) + .map_err(|e| BurrBuildError::RibbonLayerFailed { + layer_index: usize::from(layer_idx), + ribbon_error: e, + })?; + + layers.push(BurrLayer { + m, + seed: layer_seed, + thresholds, + ribbon, + }); + + remaining = bumped; + } + + if !remaining.is_empty() { + return Err(BurrBuildError::LayerExhaustion { + layers_attempted: usize::from(self.params.max_layers), + remaining_keys: remaining.len(), + }); + } + + Ok(BurrFilter::from_layers( + self.params, + self.hasher.clone(), + layers, + )) + } + + pub fn build(&self, keys: &[K]) -> Result, BurrBuildError> { + // Same empty-input rejection as `build_from_hashes_owned` — + // builder is the right place to surface "no keys, no filter" + // rather than letting the read path fail later. + if keys.is_empty() { + return Err(BurrBuildError::InvalidParams("key set must be non-empty")); + } + let mut remaining: Vec = keys.to_vec(); + let mut layers: Vec> = Vec::with_capacity(usize::from(self.params.max_layers)); + + for layer_idx in 0..self.params.max_layers { + if remaining.is_empty() { + break; + } + + let is_last_layer = layer_idx + 1 == self.params.max_layers; + let layer_seed = derive_layer_seed(self.params.seed, layer_idx); + let layer_input = remaining.len(); + + // Last layer: enlarge m to guarantee success even at full load + // (no next layer to absorb spillover). + let m_target = self.params.layer_m(layer_input); + let m = if is_last_layer { + let doubled = m_target.saturating_mul(2); + doubled.max(usize::from(self.params.b) * 4) + } else { + m_target + }; + + // Build a Params instance reflecting THIS layer's slot count, + // seed, and (later) retry budget — used both for equation + // computation and for the inner RibbonBuilder. + let layer_w = usize::from(self.params.w); + let layer_r = usize::from(self.params.r); + let equation_params = Params::new(m, layer_w, layer_r, Mode::Standard) + .map_err(static_param_err)? + .with_seed(layer_seed); + + // (1) Equations for every key in `remaining` under this + // layer's seed/m/w/r. + let equations = + compute_layer_equations(&self.hasher, &remaining, &equation_params, layer_r); + + // (2) Decide per-block thresholds. Last layer uses + // all-accepting thresholds: `b` everywhere. + let thresholds = if is_last_layer { + let block_count = m.div_ceil(usize::from(self.params.b)); + vec![self.params.b; block_count] + } else { + compute_thresholds(&equations, m, self.params.b) + }; + + // (3) Partition into kept / bumped. + let (kept, bumped) = + partition_keys_by_threshold(&remaining, &equations, &thresholds, self.params.b); + + // (4) Build Ribbon for kept. Use `build_with_seed_verbatim` + // so the construction seed matches `layer_seed` exactly — + // otherwise the vendored `RibbonBuilder.build` would mix it + // through `derive_attempt_seed`, which would make the + // ribbon's internal `start` values disagree with the start + // values we used for threshold decisions (= correctness bug + // surfaced as wire-format probe misses). + // + // No retry budget: the threshold scheme caps per-block load + // at ~90%, so single-attempt construction succeeds in + // practice. If it doesn't (parameter mistuning), the + // resulting `RibbonLayerFailed` is the diagnostic — we + // don't silently retry with a different seed because that + // would invalidate the thresholds we just computed. + let ribbon_builder = + RibbonBuilder::new(equation_params, self.hasher.clone()).map_err(|e| { + BurrBuildError::RibbonLayerFailed { + layer_index: usize::from(layer_idx), + ribbon_error: e, + } + })?; + + let ribbon = ribbon_builder + .build_with_seed_verbatim(&kept, layer_seed, m) + .map_err(|e| BurrBuildError::RibbonLayerFailed { + layer_index: usize::from(layer_idx), + ribbon_error: e, + })?; + + layers.push(BurrLayer { + m, + seed: layer_seed, + thresholds, + ribbon, + }); + + // (5) Recurse with bumped keys. + remaining = bumped; + } + + if !remaining.is_empty() { + return Err(BurrBuildError::LayerExhaustion { + layers_attempted: usize::from(self.params.max_layers), + remaining_keys: remaining.len(), + }); + } + + Ok(BurrFilter::from_layers( + self.params, + self.hasher.clone(), + layers, + )) + } +} + +/// Compute the equation each key would generate under the given params. +/// +/// The fingerprint side-output is discarded — we only need `start` for +/// the threshold decision. The ribbon build that follows will recompute +/// equations (incl. fingerprints) using the same hasher + seed, so the +/// `start` values agree by construction. +fn compute_layer_equations( + hasher: &S, + keys: &[K], + params: &Params, + r: usize, +) -> Vec +where + K: Hash, + S: BuildHasher, +{ + let stride = r.div_ceil(64); + let mut fp_throwaway = vec![0_u64; stride]; + let mut out = Vec::with_capacity(keys.len()); + for key in keys { + fp_throwaway.fill(0); + let eq = standard_equation_w64(hasher, key, params.seed, params, &mut fp_throwaway); + out.push(eq); + } + out +} + +/// Derive a per-layer seed from the root seed. +/// +/// Each layer must hash to a different `(start, band, fp)` distribution +/// so that keys bumped from layer i get a fresh slot-space allocation at +/// layer i+1. Splitmix64 mixes the layer index into the root seed. +pub(crate) fn derive_layer_seed(root: u64, layer_idx: u8) -> u64 { + let mut z = root.wrapping_add(u64::from(layer_idx).wrapping_mul(0x9E37_79B9_7F4A_7C15)); + z = (z ^ (z >> 30)).wrapping_mul(0xBF58_476D_1CE4_E5B9); + z = (z ^ (z >> 27)).wrapping_mul(0x94D0_49BB_1331_11EB); + z ^ (z >> 31) +} + +fn static_param_err(_e: super::super::error::ParamError) -> BurrBuildError { + BurrBuildError::InvalidParams("vendored ribbon param error during burr build") +} diff --git a/src/table/filter/ribbon/burr/error.rs b/src/table/filter/ribbon/burr/error.rs new file mode 100644 index 000000000..25c8a7b28 --- /dev/null +++ b/src/table/filter/ribbon/burr/error.rs @@ -0,0 +1,162 @@ +use core::fmt; + +use super::super::error::BuildError as RibbonBuildError; + +/// Errors that can occur while building a BuRR filter. +#[derive(Debug)] +pub enum BurrBuildError { + /// Configuration error: parameters reject during `BurrParams::new` or + /// during derivation (e.g. zero keys, illegal FPR). + InvalidParams(&'static str), + /// Construction reached the maximum allowed layer count without + /// absorbing all keys. In a correctly-tuned BuRR this is impossible + /// (the last layer has full capacity by construction); reaching this + /// indicates a parameter mistuning bug. + LayerExhaustion { + layers_attempted: usize, + remaining_keys: usize, + }, + /// An underlying Ribbon layer failed to build despite the threshold + /// guarantee that its key population should fit. Also indicates a + /// parameter mistuning bug — included for completeness so failures + /// surface with diagnostic context. + RibbonLayerFailed { + layer_index: usize, + ribbon_error: RibbonBuildError, + }, +} + +impl fmt::Display for BurrBuildError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::InvalidParams(msg) => write!(f, "BuRR invalid params: {msg}"), + Self::LayerExhaustion { + layers_attempted, + remaining_keys, + } => write!( + f, + "BuRR exhausted {layers_attempted} layers with {remaining_keys} keys still bumped (parameter mistuning)", + ), + Self::RibbonLayerFailed { + layer_index, + ribbon_error, + } => write!( + f, + "BuRR layer {layer_index} ribbon build failed: {ribbon_error:?}", + ), + } + } +} + +impl std::error::Error for BurrBuildError {} + +/// Detailed construction failure for diagnostics. +#[derive(Debug, Clone)] +pub enum BurrConstructionFailure { + /// A specific block's chosen threshold could not absorb its key + /// population — i.e. even at threshold=0 (all keys bumped) the + /// remaining slot count was somehow exceeded. Should be impossible + /// given correct accounting; surfaced as a sentinel. + BlockOverflow { + layer_index: usize, + block_index: usize, + }, +} + +impl fmt::Display for BurrConstructionFailure { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::BlockOverflow { + layer_index, + block_index, + } => write!( + f, + "BuRR layer {layer_index} block {block_index} could not absorb its keys at any threshold", + ), + } + } +} + +impl std::error::Error for BurrConstructionFailure {} + +#[cfg(test)] +mod tests { + use super::*; + use test_log::test; + + #[test] + fn invalid_params_display() { + let err = BurrBuildError::InvalidParams("n must be > 0"); + let s = format!("{err}"); + assert!(s.contains("invalid params"), "got: {s}"); + assert!(s.contains("n must be > 0"), "got: {s}"); + } + + #[test] + fn layer_exhaustion_display() { + let err = BurrBuildError::LayerExhaustion { + layers_attempted: 4, + remaining_keys: 17, + }; + let s = format!("{err}"); + assert!(s.contains("4 layers"), "got: {s}"); + assert!(s.contains("17 keys"), "got: {s}"); + } + + #[test] + fn ribbon_layer_failed_display() { + let ribbon_err = + RibbonBuildError::InvalidParams(super::super::super::error::ParamError::ZeroM); + let err = BurrBuildError::RibbonLayerFailed { + layer_index: 2, + ribbon_error: ribbon_err, + }; + let s = format!("{err}"); + assert!(s.contains("layer 2"), "got: {s}"); + assert!(s.contains("ribbon build failed"), "got: {s}"); + } + + #[test] + fn burr_build_error_implements_std_error() { + let err = BurrBuildError::InvalidParams("x"); + let _: &dyn std::error::Error = &err; + } + + #[test] + fn block_overflow_display() { + let err = BurrConstructionFailure::BlockOverflow { + layer_index: 1, + block_index: 42, + }; + let s = format!("{err}"); + assert!(s.contains("layer 1"), "got: {s}"); + assert!(s.contains("block 42"), "got: {s}"); + } + + #[test] + fn construction_failure_implements_std_error() { + let err = BurrConstructionFailure::BlockOverflow { + layer_index: 0, + block_index: 0, + }; + let _: &dyn std::error::Error = &err; + } + + #[test] + fn construction_failure_is_clone_and_debug() { + let err = BurrConstructionFailure::BlockOverflow { + layer_index: 1, + block_index: 2, + }; + let cloned = err.clone(); + assert!(matches!( + cloned, + BurrConstructionFailure::BlockOverflow { + layer_index: 1, + block_index: 2 + } + )); + let debug = format!("{err:?}"); + assert!(debug.contains("BlockOverflow"), "got: {debug}"); + } +} diff --git a/src/table/filter/ribbon/burr/filter.rs b/src/table/filter/ribbon/burr/filter.rs new file mode 100644 index 000000000..79bc3a068 --- /dev/null +++ b/src/table/filter/ribbon/burr/filter.rs @@ -0,0 +1,355 @@ +use std::hash::{BuildHasher, Hash}; + +use super::super::builder::Scratch; +use super::super::filter::RibbonFilter; +use super::super::hashing::{standard_equation_from_hash, standard_equation_w64}; +use super::super::params::{Mode, Params}; +use super::params::BurrParams; +use super::threshold::is_bumped; + +/// One layer of a built BuRR filter. +pub(crate) struct BurrLayer { + /// Slot count for this layer (== ribbon's m). Kept here so we don't + /// have to reach into ribbon.params() on every probe. + pub(crate) m: usize, + /// Per-layer hash seed (derived from `BurrParams::seed` via the + /// builder's layer-seed function). Stored so the probe path can + /// recompute the equation under the same seed used at build time. + pub(crate) seed: u64, + /// Per-block thresholds for this layer: `thresholds[block_idx]` is + /// the largest `offset_in_block` value that is KEPT at this layer. + /// A key whose `offset_in_block >= thresholds[block_idx]` is BUMPED + /// to the next layer at probe time (same decision the builder made). + /// Length = `m.div_ceil(b)`. + pub(crate) thresholds: Vec, + /// The vendored Ribbon filter holding this layer's KEPT keys. + pub(crate) ribbon: RibbonFilter, +} + +impl core::fmt::Debug for BurrLayer { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_struct("BurrLayer") + .field("m", &self.m) + .field("ribbon", &"") + .finish() + } +} + +/// A built, queryable BuRR filter. +/// +/// Layers are tried in order on each probe: layer 0 first, then layer 1 +/// (the bumped-from-layer-0 set), etc. A key is "present" if any layer's +/// Ribbon body reports a match. False positives carry the FPR ≈ 2⁻ʳ of +/// the underlying Ribbon layers. +/// +/// The probe path is allocation-free after the initial `new_scratch` call +/// (one `Scratch` is reused across layers — the largest layer's stride is +/// used). +pub struct BurrFilter { + params: BurrParams, + /// Hasher used by the probe-time equation re-compute for the per- + /// layer bump-check. All `BurrLayer::ribbon`s were given clones of + /// this same hasher at build time, so hashes agree at the boundary + /// (`BuildHasher::hash_one` is deterministic for a given hasher + /// state). + hasher: S, + layers: Vec>, +} + +impl BurrFilter +where + S: BuildHasher + Clone, +{ + pub(crate) fn from_layers(params: BurrParams, hasher: S, layers: Vec>) -> Self { + Self { + params, + hasher, + layers, + } + } + + /// Returns the layer count after construction. Useful for diagnostics + /// and tests; a healthy BuRR build usually settles in 1-2 layers. + #[must_use] + pub fn layer_count(&self) -> usize { + self.layers.len() + } + + /// Borrowed access to the underlying layer descriptors. Used by the + /// wire-format encoder; `pub(crate)` so it doesn't leak into the + /// public API. + #[must_use] + pub(crate) fn layers_inner(&self) -> &[BurrLayer] { + &self.layers + } + + /// Serialize this filter into the BuRR wire format. The result can + /// be later parsed by [`BurrFilterReader::new`]. + /// + /// Returns an empty `Vec` for a filter with zero layers (e.g. + /// `BurrBuilder::build_from_hashes(&[])`). The decoder rejects + /// `num_layers == 0` as a malformed header (correctly — a zero- + /// layer filter cannot answer any membership query), so emitting + /// the header anyway would yield a wire payload that no reader + /// can ingest. Empty wire bytes are the canonical "no filter for + /// this block" signal, identical to what + /// `build_burr_filter_bytes(_, &[])` returns up at the writer + /// boundary. + #[must_use] + pub fn to_wire_bytes(&self) -> Vec { + if self.layers.is_empty() { + return Vec::new(); + } + super::wire::encode(self) + } + + /// Returns the parameters this filter was built with. + #[must_use] + pub fn params(&self) -> BurrParams { + self.params + } + + /// Returns a fresh `Scratch` sized for the largest layer's stride. + #[must_use] + pub fn new_scratch(&self) -> Scratch { + // All layers share the same r (fingerprint stride), so any + // layer's scratch is interchangeable. + match self.layers.first() { + Some(layer) => layer.ribbon.new_scratch(), + None => Scratch::new(0), + } + } + + /// Returns `true` if the key may be present. + /// + /// MUST be paired with [`BurrBuilder::build`] (the key-based build + /// path): the probe hashes `key` via the filter's `BuildHasher` and + /// looks the hash up under the same hashing the builder used. + /// Calling `contains(&k)` on a filter built via + /// [`BurrBuilder::build_from_hashes`] is NOT valid — those filters + /// were built from caller-supplied u64 hashes that, in general, do + /// not equal `BuildHasher::hash_one(&k)`, and the probe will report + /// inserted keys as absent (false negative). Use + /// [`Self::contains_hash`] with the same u64 hashing the builder + /// used in that case. + pub fn contains(&self, key: &Q) -> bool { + let mut scratch = self.new_scratch(); + self.contains_in(key, &mut scratch) + } + + /// Probe with a pre-computed u64 hash (e.g. xxh3 output from + /// `crate::hash::hash64`). Equivalent to `contains` when the caller + /// has already hashed the key — avoids re-running the + /// `BuildHasher` on the hot path. + /// + /// MUST be paired with [`BurrBuilder::build_from_hashes`]: a filter + /// built via `build(keys)` (which hashes with `BuildHasher`) is NOT + /// queryable by `contains_hash(h)` unless `h` is the + /// `BuildHasher::hash_one(key)` value. The on-disk LSM filter + /// always uses the hash-based build + probe pair so the two stay + /// consistent. + /// + /// Note on probe-mode tagging: a previous reviewer asked whether + /// the construction mode (keyed vs hashed) should be encoded in + /// the type so that mismatched pairs fail fast at runtime. The + /// trade-off was considered and rejected: the in-tree LSM caller + /// uses [`BurrBuilder::build_from_hashes`] + `contains_hash` + /// exclusively (see the table filter pipeline), so the keyed API + /// has a single, well-defined call site (this crate's own tests + /// plus external callers who explicitly opt in). Adding a runtime + /// mode tag would cost an extra branch on every probe; splitting + /// into two filter types would bifurcate every consumer (writer, + /// reader, builder, wire codec) for no in-tree benefit. The + /// doc-comment contract above is the canonical guarantee. + #[inline] + pub fn contains_hash(&self, hash: u64) -> bool { + // BurrParams::with_fp_rate / with_bpk both clamp r to 1..=64, so + // stride is always 1. Single u64 buffer for fingerprint, scalar + // u64 accumulator. The debug_assert pins the invariant — if the + // format ever grows to r > 64 the probe path must be updated + // at the same time. + debug_assert!(self.params.r <= 64, "BuRR params pin r <= 64"); + let mut fingerprint_buf = [0_u64; 1]; + for layer in &self.layers { + let layer_params = match Params::new( + layer.m, + usize::from(self.params.w), + usize::from(self.params.r), + Mode::Standard, + ) { + Ok(p) => p.with_seed(layer.seed), + // In-memory filter: layer params were valid at build + // time, so this is unreachable. Fail closed defensively. + Err(_) => return true, + }; + + fingerprint_buf[0] = 0; + let equation = + standard_equation_from_hash(hash, layer.seed, &layer_params, &mut fingerprint_buf); + let fingerprint = fingerprint_buf[0]; + + if is_bumped(&equation, &layer.thresholds, self.params.b) { + continue; + } + + // GF(2) XOR-reduce. start ∈ [0, m-w] and every set bit offset + // ∈ [0, w-1], so row_index ∈ [0, m-1] is always in-bounds + // (proven; no per-row bounds check in the inner loop). + let z_words = layer.ribbon.z_raw_words(); + let mut acc: u64 = 0; + let mut lo = equation.coeff_lo; + while lo != 0 { + let offset = lo.trailing_zeros() as usize; + acc ^= z_words[equation.start + offset]; + lo &= lo - 1; + } + debug_assert_eq!( + equation.coeff_hi, 0, + "BuRR builds with w <= 64; coeff_hi must be 0", + ); + + return acc == fingerprint; + } + false + } + + /// Allocation-free probe using a caller-provided scratch. + /// + /// Same pairing contract as [`Self::contains`]: only valid when + /// the filter was built via [`BurrBuilder::build`] (key-based + /// path). A filter built via [`BurrBuilder::build_from_hashes`] + /// must be probed with [`Self::contains_hash`] instead, otherwise + /// the probe reports inserted keys as absent. + /// + /// Walks layers descend-only: for each layer, recompute the equation + /// under that layer's seed+m and check the per-block threshold. If + /// the key would have been BUMPED at construction time + /// (`offset >= thresholds[block]`), continue to the next layer. Else + /// delegate to the layer's `RibbonFilter::contains_in` — which + /// re-derives the same equation internally and runs the GF(2) XOR- + /// reduce against the stored solution. + /// + /// The double equation work per kept-layer is the MVP cost + /// (correctness first); a follow-up can expose a `contains_with_eq` + /// path on `RibbonFilter` that reuses our pre-computed equation. + pub fn contains_in(&self, key: &Q, scratch: &mut Scratch) -> bool { + // Stack-sized throwaway fingerprint buffer reused across layers. + // `BurrParams::with_*` clamp `r` to 1..=64 so `stride` is 1; the + // assert pins the invariant. + debug_assert!(self.params.r <= 64, "BuRR params pin r <= 64"); + let mut fp_throwaway = [0_u64; 1]; + for layer in &self.layers { + // Build a Params reflecting this layer's m/w/r/seed so the + // equation-computation matches what the builder did. + let layer_params = match Params::new( + layer.m, + usize::from(self.params.w), + usize::from(self.params.r), + Mode::Standard, + ) { + Ok(p) => p.with_seed(layer.seed), + // Unreachable for built filters; fail closed defensively + // so a future param-validation regression yields a + // false positive (caller does an index lookup) rather + // than a false negative. + Err(_) => return true, + }; + + // Re-hash to learn this layer's `start` and decide bump. + // Throwaway fingerprint; the real probe uses `scratch` + // inside `ribbon.contains_in`. The hasher is the one + // BurrFilter holds — all layers' RibbonFilters were given + // clones of THIS hasher at build time, so hashes agree by + // construction (BuildHasher is deterministic). + fp_throwaway[0] = 0; + let equation = standard_equation_w64( + &self.hasher, + key, + layer.seed, + &layer_params, + &mut fp_throwaway, + ); + + if is_bumped(&equation, &layer.thresholds, self.params.b) { + // Bumped at build time → not in this layer's ribbon; + // continue to the next layer. + continue; + } + + // Kept at this layer → ribbon authoritatively decides. + return layer.ribbon.contains_in(key, scratch); + } + // Walked all layers without finding a non-bumped layer — would + // only happen if the input was never inserted in any layer + // (i.e. a non-member key whose hash always lands at a bumped + // offset). Definite-not-present. + false + } +} + +impl core::fmt::Debug for BurrFilter { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_struct("BurrFilter") + .field("params", &self.params) + .field("layer_count", &self.layers.len()) + .finish() + } +} + +/// Wire-format reader for a BuRR filter loaded from a serialized buffer. +/// +/// This is the type the LSM filter framework consumes: it owns a borrowed +/// slice of the on-disk filter block, parses the BuRR header, and answers +/// `contains_hash` lookups. Wire format documented in +/// [`super::wire`] — intentionally distinct from the vendored +/// `ribbon-serde` repr (that one is for in-memory snapshots). +#[derive(Debug)] +pub struct BurrFilterReader<'a> { + decoded: super::wire::DecodedFilter<'a>, +} + +/// Single-pass parse + probe over a wire-format BuRR filter buffer. +/// +/// This is the preferred entry point for the LSM table read path: it +/// parses the header and walks per-layer payloads in place without +/// allocating an intermediate `BurrFilterReader` (the +/// `Vec` inside is the only heap allocation a fresh reader +/// would do). Use this when the wire buffer is already in the block +/// cache and you only need a one-shot membership check. +/// +/// Behaviour matches `BurrFilterReader::new(bytes)?.contains_hash(hash)` +/// modulo allocation: on a structurally invalid header returns +/// `Err(InvalidHeader)`; on payload-level corruption (truncated z +/// slice past header-validated lengths) fails closed with `Ok(true)` +/// so the caller falls through to a real index lookup rather than +/// reporting a false negative. +#[inline] +pub fn contains_hash_from_bytes(bytes: &[u8], hash: u64) -> crate::Result { + super::wire::contains_hash_from_bytes(bytes, hash) +} + +impl<'a> BurrFilterReader<'a> { + /// Parse a serialized BuRR filter slice. Returns an error if the + /// magic bytes don't match, the version is unrecognised, or the + /// buffer is truncated. + pub fn new(bytes: &'a [u8]) -> crate::Result { + let decoded = super::wire::decode(bytes)?; + Ok(Self { decoded }) + } + + /// Number of layers in the decoded filter. + #[must_use] + pub fn layer_count(&self) -> usize { + self.decoded.layers.len() + } + + /// Probe with a pre-computed key hash. Used by the LSM filter + /// framework's `block::FilterBlock` — the table read path already + /// computes a u64 hash for block indexing, and the filter consumes + /// that same hash directly (no re-hash via `BuildHasher`). + #[inline] + #[must_use] + pub fn contains_hash(&self, hash: u64) -> bool { + super::wire::contains_hash(&self.decoded, hash) + } +} diff --git a/src/table/filter/ribbon/burr/mod.rs b/src/table/filter/ribbon/burr/mod.rs new file mode 100644 index 000000000..e0d0e8ce6 --- /dev/null +++ b/src/table/filter/ribbon/burr/mod.rs @@ -0,0 +1,84 @@ +// BuRR (Bumped Ribbon Retrieval) — Walzer & Dillinger 2022, arXiv:2109.01892. +// +// Built on top of the vendored Ribbon primitives in `super::`. The Ribbon +// layer provides the GF(2) banded solver and packed `r`-bit fingerprint +// storage; BuRR adds: +// * a per-block THRESHOLD scheme that deterministically decides which +// keys are "bumped" out of a layer (rather than failing the whole +// construction); +// * MULTI-LAYER composition — bumped keys are passed to a smaller +// secondary BuRR layer, recursively, until the residual fits; +// * a BUMP-AWARE probe path — at each layer, check the block's +// threshold against the key's offset-in-block; if bumped, walk to the +// next layer; otherwise probe the Ribbon body and compare fingerprints. +// +// # Why BuRR over Standard Ribbon +// +// Standard Ribbon has a probabilistic construction failure mode: rare +// "inconsistent equation" or "out-of-bounds" terminations require seed +// retries. BuRR replaces retries with bumping — failure-prone keys go to +// the next layer instead of aborting the build. Memory overhead vs the +// information-theoretic lower bound: +// +// bloom (current default): ~45% +// BinaryFuse8: ~36% +// Standard Ribbon (vendored): ~14% +// BuRR (this module): ~1% +// +// # Architecture +// +// BurrParams +// ├─ key count `n` +// ├─ fingerprint bits `r` (derived from FPR, FPR ≈ 2^-r) +// ├─ band width `w` (= 64; single-word band) +// ├─ block size `b` (rows per block, typically 64 = `w`) +// └─ max layer count (typically 3; last layer always succeeds) +// +// BurrBuilder +// ├─ compute per-key (start, block_idx, offset_in_block, band, fp) +// ├─ per-block threshold selection — bucket offsets, pick the largest +// │ threshold τ such that {keys with offset < τ} fits the block's +// │ ribbon capacity +// ├─ partition: kept ↦ ribbon build for this layer, bumped ↦ next layer +// └─ recurse until bumped set is small enough to fit at full capacity +// (last layer trivially succeeds) +// +// BurrFilter +// ├─ per-layer: (thresholds: Vec, ribbon: RibbonFilter) +// ├─ probe walks layers: re-hash with layer's seed, check threshold, +// │ either descend or run RibbonFilter::contains_in +// └─ wire format: serialised as `MAGIC | filter_type=Burr | header | +// per-layer (thresholds bytes + ribbon z bits)` +// +// The wire format is intentionally NOT compatible with the upstream +// ribbon-filter crate's serde-based repr — that one is meant for in-memory +// snapshot/restore, while ours is the on-disk SST filter block format +// used by the LSM. Both can coexist if/when the ribbon module is extracted +// into a standalone crate. + +// The parent `ribbon::` module-level `#![allow(...)]` covers vendored +// upstream code (clippy::indexing_slicing, clippy::expect_used, +// clippy::unwrap_used, etc.) and currently leaks into this BuRR +// submodule because crate-attribute allow propagates to children. +// Re-denying here would require migrating ~30 internal indexing / +// expect sites in builder.rs / wire.rs / threshold.rs / filter.rs to +// `.get(...).ok_or(...)?` or `#[expect(..., reason)]` per use site — +// a sizeable but tractable refactor that's tracked as a follow-up +// rather than bundled into this PR. New BuRR code added in this PR +// uses `#[expect(..., reason)]` per use site for any new +// suppressions (see params.rs / wire.rs). + +pub mod builder; +pub mod error; +pub mod filter; +pub mod params; +pub(crate) mod threshold; +pub(crate) mod wire; + +pub use builder::BurrBuilder; +pub use error::{BurrBuildError, BurrConstructionFailure}; +pub use filter::{BurrFilter, BurrFilterReader, contains_hash_from_bytes}; +pub use params::BurrParams; + +#[cfg(test)] +mod tests; diff --git a/src/table/filter/ribbon/burr/params.rs b/src/table/filter/ribbon/burr/params.rs new file mode 100644 index 000000000..2a4f6dc0a --- /dev/null +++ b/src/table/filter/ribbon/burr/params.rs @@ -0,0 +1,163 @@ +use super::error::BurrBuildError; + +/// Configuration for a BuRR filter. +/// +/// Construction strategy: +/// * `n` keys are expected; +/// * each layer i has `m_i ≈ n_i * (1 + per_layer_overhead)` slots, where +/// `n_i` is the bumped-from-previous-layer key count (n_0 = n); +/// * blocks of size `b` within each layer drive the threshold scheme; +/// * up to `max_layers` are built — the last layer uses overhead high +/// enough to absorb its residual at threshold = b (no bumping). +#[derive(Debug, Clone, Copy, PartialEq)] +pub struct BurrParams { + /// Total expected key count (= layer 0 input size). + pub n: usize, + /// Fingerprint width in bits. FPR ≈ 2⁻ʳ. Must be in `1..=64` so that + /// the fingerprint fits in a single `u64` lane (the vendored Ribbon + /// `w=64` band assumes single-word `b` vectors). + pub r: u8, + /// Band width — fixed at 64 to match the vendored Ribbon + /// `standard_equation_w64` solver. + pub w: u8, + /// Block size (rows per block, drives the per-block threshold byte). + /// Default 64; must be ≤ 255 so the threshold fits one byte. + pub b: u8, + /// Maximum layer count. Last layer is sized for guaranteed success + /// (no bumping); typical values 3–4. + pub max_layers: u8, + /// Per-layer construction overhead expressed as a fractional + /// multiplier added to the key count: `m_i = ceil(n_i * (1 + + /// per_layer_overhead))`. Higher overhead → fewer keys bumped → fewer + /// layers needed but more memory. + pub per_layer_overhead: f32, + /// Root hash seed (combined with per-layer offsets to derive each + /// layer's seed). Stored in the wire format header so probe-side + /// re-derives the same seeds. + pub seed: u64, +} + +impl BurrParams { + /// Default block size — chosen to match the band width so each block + /// covers exactly one full band span; matches the BuRR paper's + /// `b = w` recommendation for the homogeneous-threshold variant. + pub const DEFAULT_B: u8 = 64; + + /// Default max layer count. 4 is enough for arbitrarily large n: each + /// layer absorbs ~95% of incoming keys, so 4 layers reach ≈ 0.05⁴ ≈ + /// 6 × 10⁻⁶ of n. The last layer is sized for guaranteed success. + pub const DEFAULT_MAX_LAYERS: u8 = 4; + + /// Per-layer overhead. With `b = 64`, overhead ≈ 5% leaves margin for + /// the threshold scheme without overshooting the ~1% target overhead + /// vs the information-theoretic minimum. + pub const DEFAULT_PER_LAYER_OVERHEAD: f32 = 0.05; + + /// Construct params for `n` keys at a given false-positive rate. + pub fn with_fp_rate(n: usize, fpr: f32) -> Result { + if n == 0 { + return Err(BurrBuildError::InvalidParams("n must be > 0")); + } + if !(0.0 < fpr && fpr < 1.0) { + return Err(BurrBuildError::InvalidParams("fpr must be in (0.0, 1.0)")); + } + let r_f = (-fpr.log2()).ceil(); + if !r_f.is_finite() || r_f < 1.0 || r_f > 64.0 { + return Err(BurrBuildError::InvalidParams( + "computed r out of supported range [1, 64]", + )); + } + #[expect( + clippy::cast_possible_truncation, + clippy::cast_sign_loss, + reason = "r_f is validated to 1.0..=64.0 above" + )] + let r = r_f as u8; + Ok(Self { + n, + r, + w: 64, + b: Self::DEFAULT_B, + max_layers: Self::DEFAULT_MAX_LAYERS, + per_layer_overhead: Self::DEFAULT_PER_LAYER_OVERHEAD, + seed: 0, + }) + } + + /// Construct params for `n` keys at a given bits-per-key target. + /// Maps `bpk` directly to fingerprint width `r` since BuRR's effective + /// storage is essentially `r` bits per key plus ~1% threshold metadata. + pub fn with_bpk(n: usize, bpk: f32) -> Result { + if n == 0 { + return Err(BurrBuildError::InvalidParams("n must be > 0")); + } + if !(1.0..=64.0).contains(&bpk) { + return Err(BurrBuildError::InvalidParams("bpk must be in [1.0, 64.0]")); + } + #[expect( + clippy::cast_possible_truncation, + clippy::cast_sign_loss, + reason = "bpk is validated to 1.0..=64.0 above, then clamped to the same range" + )] + let r = bpk.round().clamp(1.0, 64.0) as u8; + Ok(Self { + n, + r, + w: 64, + b: Self::DEFAULT_B, + max_layers: Self::DEFAULT_MAX_LAYERS, + per_layer_overhead: Self::DEFAULT_PER_LAYER_OVERHEAD, + seed: 0, + }) + } + + /// Override the construction seed (deterministic builds). + #[must_use] + pub fn with_seed(mut self, seed: u64) -> Self { + self.seed = seed; + self + } + + /// Compute slot count `m` for a layer receiving `layer_input_keys`. + /// + /// For non-final layers: `m = ceil(input * (1 + overhead))` rounded up + /// to a multiple of `b`. For the final layer, the caller is expected + /// to bump the overhead so that no keys spill over (handled by the + /// builder, not by this helper). + /// + /// Floor: the result is always `>= b`. The vendored Ribbon solver + /// (`Params::new(m, w=64, ...)`) additionally requires `m >= w`, so + /// the floor is only sufficient when `b >= w`. `BurrBuilder::new` + /// rejects params with `b < w` up-front to guarantee that + /// invariant; this helper itself does not re-check. + #[must_use] + pub fn layer_m(&self, layer_input_keys: usize) -> usize { + // BurrBuilder::new rejects params with b == 0; this assert pins + // the invariant so any future code path that constructs a + // BurrParams directly without going through the builder will + // panic loudly rather than divide by zero in the div_ceil below. + assert!(self.b > 0, "BurrParams.b must be > 0"); + let overhead = f64::from(self.per_layer_overhead); + #[expect( + clippy::cast_possible_truncation, + clippy::cast_sign_loss, + clippy::cast_precision_loss, + reason = "layer_input_keys is bounded by the filter capacity; ceil result fits usize on supported platforms" + )] + let raw = ((layer_input_keys as f64) * (1.0 + overhead)).ceil() as usize; + let raw = raw.max(usize::from(self.b)); // ≥ one block + // Round UP to a multiple of b (so block_count = m / b is exact). + // `raw.div_ceil(b) * b` can wrap on extreme inputs in release builds, + // yielding a SMALLER `m` that violates the caller's invariant. Saturate + // to the largest multiple of `b` that fits in `usize` instead — the + // caller will then fail allocation explicitly via + // `ConstructionFailure::StorageLengthOverflow` rather than silently + // building an undersized filter. + let b = usize::from(self.b); + let blocks = raw.div_ceil(b); + match blocks.checked_mul(b) { + Some(m) => m, + None => usize::MAX - (usize::MAX % b), + } + } +} diff --git a/src/table/filter/ribbon/burr/tests.rs b/src/table/filter/ribbon/burr/tests.rs new file mode 100644 index 000000000..ca60233a6 --- /dev/null +++ b/src/table/filter/ribbon/burr/tests.rs @@ -0,0 +1,1003 @@ +//! Unit + correctness tests for the BuRR filter. +//! +//! Covers: construction round-trip, membership invariants (FN-free for +//! inserted keys), FPR envelope at multiple targets, wire-format +//! encoder/decoder round-trips, wire-format rejection of bad magic / +//! version / filter_type / truncated headers, build determinism for +//! fixed seed, and scratch-reuse equivalence. +//! +//! End-to-end coverage through the table writer + reader path lives in +//! `tests/burr_filter_end_to_end.rs`. + +use std::collections::hash_map::DefaultHasher; +use std::hash::BuildHasherDefault; + +use super::{BurrBuilder, BurrParams}; + +type DefaultBuildHasher = BuildHasherDefault; + +#[test] +fn burr_builds_and_reports_inserted_keys_present() { + let n = 1_000_usize; + let params = BurrParams::with_fp_rate(n, 0.01).expect("valid params"); + let builder = BurrBuilder::new(params, DefaultBuildHasher::default()).expect("builder"); + let keys: Vec = (0..n as u64).collect(); + let filter = builder.build(&keys).expect("build"); + + // Every inserted key must report as present (no false negatives). + for key in &keys { + assert!( + filter.contains(key), + "inserted key {key} reported absent — BuRR must be FN-free", + ); + } +} + +#[test] +fn burr_fpr_at_one_percent_is_within_envelope() { + // Build with FPR=0.01 over a moderate key set, probe with disjoint + // non-keys, measure realised FPR. Allow up to 5% to give the small + // sample size some slack. + let n = 1_000_usize; + let params = BurrParams::with_fp_rate(n, 0.01).expect("valid params"); + let builder = BurrBuilder::new(params, DefaultBuildHasher::default()).expect("builder"); + let keys: Vec = (0..n as u64).collect(); + let filter = builder.build(&keys).expect("build"); + + let probe_count = 10_000_usize; + let mut false_positives = 0_usize; + for key in (n as u64)..(n as u64 + probe_count as u64) { + if filter.contains(&key) { + false_positives += 1; + } + } + #[expect( + clippy::cast_precision_loss, + reason = "test code: precision loss acceptable in rate calculations" + )] + let fpr = false_positives as f64 / probe_count as f64; + assert!( + fpr < 0.05, + "realised FPR {fpr} too high (wanted ≤ 5% envelope around 1% target)", + ); +} + +#[test] +fn burr_wire_format_round_trips() { + // Build a BuRR, serialize to wire bytes, parse via + // BurrFilterReader, and verify contains_hash answers match + // BurrFilter::contains for every inserted key. + use super::filter::BurrFilterReader; + use std::hash::BuildHasher; + + let n = 500_usize; + let params = BurrParams::with_fp_rate(n, 0.01).expect("valid params"); + let hasher = DefaultBuildHasher::default(); + let builder = BurrBuilder::new(params, hasher.clone()).expect("builder"); + let keys: Vec = (0..n as u64).collect(); + let filter = builder.build(&keys).expect("build"); + + let bytes = filter.to_wire_bytes(); + assert!(bytes.len() > 20, "wire buffer too small ({})", bytes.len()); + + let reader = BurrFilterReader::new(&bytes).expect("parse"); + assert_eq!( + reader.layer_count(), + filter.layer_count(), + "decoded layer count must match", + ); + + // The reader's contains_hash takes a pre-computed u64. We must + // use the SAME hasher state the BurrFilter was built with so the + // base_hash matches. BuildHasher::hash_one is the convention used + // by both sides. + for key in &keys { + let h = hasher.hash_one(key); + assert!( + reader.contains_hash(h), + "inserted key {key} not found in decoded reader (hash {h})", + ); + } +} + +#[test] +fn burr_build_from_hashes_and_contains_hash_round_trip() { + // The hash-based build + probe pair is what the LSM filter writer + // and reader use. Insert n xxh3-hashed u64s and verify + // contains_hash reports each as present. + let n = 500_usize; + let params = BurrParams::with_fp_rate(n, 0.01).expect("valid params"); + let builder = BurrBuilder::new(params, DefaultBuildHasher::default()).expect("builder"); + + let hashes: Vec = (0..n as u64) + .map(|i| crate::hash::hash64(&i.to_le_bytes())) + .collect(); + let filter = builder + .build_from_hashes(&hashes) + .expect("build_from_hashes"); + + for h in &hashes { + assert!( + filter.contains_hash(*h), + "inserted hash {h} reported absent", + ); + } + + // FPR sanity: probe disjoint non-key hashes, must be ≤ 5%. + let probe_count = 10_000_usize; + let mut false_positives = 0_usize; + for i in (n as u64)..(n as u64 + probe_count as u64) { + let h = crate::hash::hash64(&i.to_le_bytes()); + if filter.contains_hash(h) { + false_positives += 1; + } + } + #[expect( + clippy::cast_precision_loss, + reason = "test code: precision loss acceptable in rate calculations" + )] + let fpr = false_positives as f64 / probe_count as f64; + assert!(fpr < 0.05, "realised FPR {fpr} too high"); +} + +#[test] +fn burr_hash_build_wire_format_round_trips() { + // Build via build_from_hashes, serialize, decode via reader, + // contains_hash must match. + use super::filter::BurrFilterReader; + + let n = 500_usize; + let params = BurrParams::with_fp_rate(n, 0.01).expect("valid params"); + let builder = BurrBuilder::new(params, DefaultBuildHasher::default()).expect("builder"); + let hashes: Vec = (0..n as u64) + .map(|i| crate::hash::hash64(&i.to_le_bytes())) + .collect(); + let filter = builder.build_from_hashes(&hashes).expect("build"); + + let bytes = filter.to_wire_bytes(); + let reader = BurrFilterReader::new(&bytes).expect("decode"); + + for h in &hashes { + assert!( + reader.contains_hash(*h), + "inserted hash {h} not found via wire-format reader", + ); + } +} + +#[test] +fn burr_wire_rejects_bad_magic() { + use super::filter::BurrFilterReader; + // Build a valid wire payload first, then flip the first magic byte. + // This asserts the magic check actually triggers — a buffer of + // arbitrary zeros could also fail later in decode (e.g. on the + // version byte) and mask whether the magic check fires at all. + let params = BurrParams::with_fp_rate(50, 0.01).expect("params"); + let builder = BurrBuilder::new(params, DefaultBuildHasher::default()).expect("builder"); + let hashes: Vec = (0..50_u64) + .map(|i| crate::hash::hash64(&[i as u8])) + .collect(); + let filter = builder.build_from_hashes(&hashes).expect("build"); + let mut bytes = filter.to_wire_bytes(); + bytes[0] ^= 0xFF; + let err = BurrFilterReader::new(&bytes).expect_err("bad magic should fail decode"); + assert!( + matches!(err, crate::Error::InvalidHeader("BurrFilter")), + "expected InvalidHeader(\"BurrFilter\"), got: {err:?}", + ); +} + +#[test] +fn burr_single_key_round_trips() { + // Smallest possible filter. Last-layer enlargement must accommodate + // n=1 without LayerExhaustion. Hash-based + key-based both work. + let params = BurrParams::with_fp_rate(1, 0.01).expect("params"); + let builder = BurrBuilder::new(params, DefaultBuildHasher::default()).expect("builder"); + let key_hash = crate::hash::hash64(b"only-one"); + let filter = builder + .build_from_hashes(&[key_hash]) + .expect("build_from_hashes for n=1"); + assert!(filter.contains_hash(key_hash)); + let bytes = filter.to_wire_bytes(); + let reader = super::filter::BurrFilterReader::new(&bytes).expect("decode"); + assert!(reader.contains_hash(key_hash)); +} + +#[test] +fn burr_build_is_deterministic_for_fixed_seed() { + // Same params + same input → same wire bytes. Wire format must not + // depend on hash-map iteration order or any other non-deterministic + // source. Anyone shipping BuRR filter blocks across hosts relies on + // this. + let params = BurrParams::with_fp_rate(200, 0.01).expect("params"); + let hashes: Vec = (0..200_u64) + .map(|i| crate::hash::hash64(&i.to_le_bytes())) + .collect(); + let bytes_a = { + let builder = BurrBuilder::new(params, DefaultBuildHasher::default()).expect("builder"); + builder + .build_from_hashes(&hashes) + .expect("build") + .to_wire_bytes() + }; + let bytes_b = { + let builder = BurrBuilder::new(params, DefaultBuildHasher::default()).expect("builder"); + builder + .build_from_hashes(&hashes) + .expect("build") + .to_wire_bytes() + }; + assert_eq!(bytes_a, bytes_b); +} + +#[test] +fn burr_wire_rejects_short_buffer() { + // Anything below the fixed header length must be rejected without + // panic. Important for hardening against truncated on-disk blocks. + use super::filter::BurrFilterReader; + let short = vec![0_u8; 4]; + let err = BurrFilterReader::new(&short).expect_err("short buffer must error"); + assert!( + matches!(err, crate::Error::InvalidHeader("BurrFilter")), + "expected InvalidHeader(\"BurrFilter\"), got: {err:?}", + ); +} + +#[test] +fn burr_wire_rejects_unknown_version() { + // Build a real filter, mutate the version byte, decode must fail. + use super::filter::BurrFilterReader; + let params = BurrParams::with_fp_rate(50, 0.01).expect("params"); + let builder = BurrBuilder::new(params, DefaultBuildHasher::default()).expect("builder"); + let hashes: Vec = (0..50_u64) + .map(|i| crate::hash::hash64(&[i as u8])) + .collect(); + let filter = builder.build_from_hashes(&hashes).expect("build"); + let mut bytes = filter.to_wire_bytes(); + // version byte sits at offset MAGIC_LEN + 1 (after filter_type). + let version_offset = crate::file::MAGIC_BYTES.len() + 1; + bytes[version_offset] = 0xFE; + let err = BurrFilterReader::new(&bytes).expect_err("bad version must error"); + assert!( + matches!(err, crate::Error::InvalidHeader("BurrFilter version")), + "expected InvalidHeader(\"BurrFilter version\"), got: {err:?}", + ); +} + +#[test] +fn burr_wire_rejects_unknown_filter_type() { + use super::filter::BurrFilterReader; + let params = BurrParams::with_fp_rate(50, 0.01).expect("params"); + let builder = BurrBuilder::new(params, DefaultBuildHasher::default()).expect("builder"); + let hashes: Vec = (0..50_u64) + .map(|i| crate::hash::hash64(&[i as u8])) + .collect(); + let filter = builder.build_from_hashes(&hashes).expect("build"); + let mut bytes = filter.to_wire_bytes(); + let filter_type_offset = crate::file::MAGIC_BYTES.len(); + bytes[filter_type_offset] = 0xAA; + let err = BurrFilterReader::new(&bytes).expect_err("unknown filter_type must error"); + assert!( + matches!(err, crate::Error::InvalidTag(("FilterType", 0xAA))), + "expected InvalidTag((\"FilterType\", 0xAA)), got: {err:?}", + ); +} + +#[test] +fn burr_negative_keys_obey_fpr_envelope_at_low_target() { + // Tight FPR (0.001) over moderate n. Realised FPR over disjoint + // probes must stay within a safety envelope around the target. + let n = 2_000_usize; + let params = BurrParams::with_fp_rate(n, 0.001).expect("params"); + let builder = BurrBuilder::new(params, DefaultBuildHasher::default()).expect("builder"); + let hashes: Vec = (0..n as u64) + .map(|i| crate::hash::hash64(&i.to_le_bytes())) + .collect(); + let filter = builder.build_from_hashes(&hashes).expect("build"); + + let probe_count = 20_000_usize; + let mut false_positives = 0_usize; + for i in (n as u64)..(n as u64 + probe_count as u64) { + let h = crate::hash::hash64(&i.to_le_bytes()); + if filter.contains_hash(h) { + false_positives += 1; + } + } + #[expect( + clippy::cast_precision_loss, + reason = "test code: precision loss acceptable in rate calculations" + )] + let fpr = false_positives as f64 / probe_count as f64; + // BuRR at FPR=0.001 typically realises ≤ 0.5%. Allow envelope. + assert!(fpr < 0.01, "realised FPR {fpr} > 1% envelope around 0.1%"); +} + +#[test] +fn burr_negative_keys_obey_fpr_envelope_at_very_low_target() { + // Tightest FPR target documented in the issue acceptance criteria + // (0.0001). At r ≈ 14 the realised FPR over a 50k disjoint-probe + // sample should be well below the 1‰ ceiling we accept here. + let n = 5_000_usize; + let params = BurrParams::with_fp_rate(n, 0.0001).expect("params"); + let builder = BurrBuilder::new(params, DefaultBuildHasher::default()).expect("builder"); + let hashes: Vec = (0..n as u64) + .map(|i| crate::hash::hash64(&i.to_le_bytes())) + .collect(); + let filter = builder.build_from_hashes(&hashes).expect("build"); + + let probe_count = 50_000_usize; + let mut false_positives = 0_usize; + for i in (n as u64)..(n as u64 + probe_count as u64) { + let h = crate::hash::hash64(&i.to_le_bytes()); + if filter.contains_hash(h) { + false_positives += 1; + } + } + #[expect( + clippy::cast_precision_loss, + reason = "test code: precision loss acceptable in rate calculations" + )] + let fpr = false_positives as f64 / probe_count as f64; + // BuRR at FPR=0.0001 typically realises ≤ 0.05%. Allow 1‰ envelope + // (10× slack) so the test isn't a coin-flip on small probe samples. + assert!( + fpr < 0.001, + "realised FPR {fpr} > 0.1% envelope around 0.01% target", + ); +} + +#[test] +fn burr_contains_in_matches_contains_with_external_scratch() { + // The allocation-free probe path (contains_in with caller scratch) + // must agree with the convenience contains for every key in the set. + let n = 300_usize; + let params = BurrParams::with_fp_rate(n, 0.01).expect("params"); + let builder = BurrBuilder::new(params, DefaultBuildHasher::default()).expect("builder"); + let keys: Vec = (0..n as u64).collect(); + let filter = builder.build(&keys).expect("build"); + let mut scratch = filter.new_scratch(); + for key in &keys { + let via_contains = filter.contains(key); + let via_contains_in = filter.contains_in(key, &mut scratch); + assert_eq!( + via_contains, via_contains_in, + "probe paths disagree on {key}" + ); + assert!(via_contains, "inserted key {key} not present"); + } +} + +#[test] +fn burr_wire_rejects_zero_b() { + use super::filter::BurrFilterReader; + let params = BurrParams::with_fp_rate(50, 0.01).expect("params"); + let builder = BurrBuilder::new(params, DefaultBuildHasher::default()).expect("builder"); + let hashes: Vec = (0..50_u64) + .map(|i| crate::hash::hash64(&[i as u8])) + .collect(); + let filter = builder.build_from_hashes(&hashes).expect("build"); + let mut bytes = filter.to_wire_bytes(); + // b byte sits at offset MAGIC_LEN + 2 + 2 (magic + filter_type + + // version + r + w, then b). + let b_offset = crate::file::MAGIC_BYTES.len() + 4; + bytes[b_offset] = 0; + let err = BurrFilterReader::new(&bytes).expect_err("b == 0 must error"); + assert!( + matches!(err, crate::Error::InvalidHeader("BurrFilter params")), + "expected InvalidHeader(\"BurrFilter params\"), got: {err:?}", + ); +} + +#[test] +fn burr_wire_rejects_zero_num_layers() { + use super::filter::BurrFilterReader; + let params = BurrParams::with_fp_rate(50, 0.01).expect("params"); + let builder = BurrBuilder::new(params, DefaultBuildHasher::default()).expect("builder"); + let hashes: Vec = (0..50_u64) + .map(|i| crate::hash::hash64(&[i as u8])) + .collect(); + let filter = builder.build_from_hashes(&hashes).expect("build"); + let mut bytes = filter.to_wire_bytes(); + // num_layers byte: MAGIC_LEN + filter_type + version + r + w + b + let num_layers_offset = crate::file::MAGIC_BYTES.len() + 5; + bytes[num_layers_offset] = 0; + let err = BurrFilterReader::new(&bytes).expect_err("num_layers == 0 must error"); + assert!( + matches!(err, crate::Error::InvalidHeader("BurrFilter params")), + "expected InvalidHeader(\"BurrFilter params\"), got: {err:?}", + ); +} + +#[test] +fn burr_wire_rejects_out_of_range_r() { + use super::filter::BurrFilterReader; + let params = BurrParams::with_fp_rate(50, 0.01).expect("params"); + let builder = BurrBuilder::new(params, DefaultBuildHasher::default()).expect("builder"); + let hashes: Vec = (0..50_u64) + .map(|i| crate::hash::hash64(&[i as u8])) + .collect(); + let filter = builder.build_from_hashes(&hashes).expect("build"); + let mut bytes = filter.to_wire_bytes(); + // r byte: MAGIC_LEN + filter_type + version + let r_offset = crate::file::MAGIC_BYTES.len() + 2; + bytes[r_offset] = 0; // r==0 invalid + let err = BurrFilterReader::new(&bytes).expect_err("r == 0 must error"); + assert!( + matches!(err, crate::Error::InvalidHeader("BurrFilter params")), + "expected InvalidHeader(\"BurrFilter params\"), got: {err:?}", + ); + + let mut bytes2 = filter.to_wire_bytes(); + bytes2[r_offset] = 65; // r>64 invalid + let err2 = BurrFilterReader::new(&bytes2).expect_err("r == 65 must error"); + assert!( + matches!(err2, crate::Error::InvalidHeader("BurrFilter params")), + "expected InvalidHeader(\"BurrFilter params\"), got: {err2:?}", + ); +} + +#[test] +fn burr_wire_rejects_corrupted_num_blocks() { + use super::filter::BurrFilterReader; + let params = BurrParams::with_fp_rate(50, 0.01).expect("params"); + let builder = BurrBuilder::new(params, DefaultBuildHasher::default()).expect("builder"); + let hashes: Vec = (0..50_u64) + .map(|i| crate::hash::hash64(&[i as u8])) + .collect(); + let filter = builder.build_from_hashes(&hashes).expect("build"); + let mut bytes = filter.to_wire_bytes(); + // First layer header begins at HEADER_LEN. num_blocks is the + // second u32 (offset +4 from layer header start). Tamper to a + // value that disagrees with `m`. + let layer_header_start = crate::file::MAGIC_BYTES.len() + 6 + 8; + let num_blocks_offset = layer_header_start + 4; + bytes[num_blocks_offset] = bytes[num_blocks_offset].wrapping_add(1); + let err = BurrFilterReader::new(&bytes).expect_err("mismatched num_blocks must error"); + assert!( + matches!(err, crate::Error::InvalidHeader("BurrFilter layer payload")), + "expected InvalidHeader(\"BurrFilter layer payload\"), got: {err:?}", + ); +} + +#[test] +fn burr_wire_rejects_corrupted_z_byte_len() { + use super::filter::BurrFilterReader; + let params = BurrParams::with_fp_rate(50, 0.01).expect("params"); + let builder = BurrBuilder::new(params, DefaultBuildHasher::default()).expect("builder"); + let hashes: Vec = (0..50_u64) + .map(|i| crate::hash::hash64(&[i as u8])) + .collect(); + let filter = builder.build_from_hashes(&hashes).expect("build"); + let mut bytes = filter.to_wire_bytes(); + // z_byte_len is the third u32 of the layer header. + let layer_header_start = crate::file::MAGIC_BYTES.len() + 6 + 8; + let z_byte_len_offset = layer_header_start + 8; + bytes[z_byte_len_offset] = bytes[z_byte_len_offset].wrapping_add(8); + let err = BurrFilterReader::new(&bytes).expect_err("mismatched z_byte_len must error"); + assert!( + matches!(err, crate::Error::InvalidHeader("BurrFilter layer payload")), + "expected InvalidHeader(\"BurrFilter layer payload\"), got: {err:?}", + ); +} + +#[test] +fn burr_settles_in_few_layers() { + let n = 5_000_usize; + let params = BurrParams::with_fp_rate(n, 0.01).expect("valid params"); + let builder = BurrBuilder::new(params, DefaultBuildHasher::default()).expect("builder"); + let keys: Vec = (0..n as u64).collect(); + let filter = builder.build(&keys).expect("build"); + + // BuRR's design target is 1-3 layers for well-tuned parameters. + // Each layer absorbs ~90% of incoming keys; 3 layers reach ≈ + // 0.1³ ≈ 0.1% residual. The last layer absorbs the rest at full + // load. + let layer_count = filter.layer_count(); + assert!( + (1..=4).contains(&layer_count), + "layer count {layer_count} outside expected 1..=4 range", + ); +} + +#[test] +fn burr_params_with_fp_rate_rejects_n_zero() { + let err = BurrParams::with_fp_rate(0, 0.01).expect_err("n=0 must error"); + let msg = format!("{err}"); + assert!(msg.contains("n must be > 0"), "got: {msg}"); +} + +#[test] +fn burr_params_with_fp_rate_rejects_zero_fpr() { + let err = BurrParams::with_fp_rate(100, 0.0).expect_err("fpr=0 must error"); + let msg = format!("{err}"); + assert!(msg.contains("fpr"), "got: {msg}"); +} + +#[test] +fn burr_params_with_fp_rate_rejects_one_fpr() { + let err = BurrParams::with_fp_rate(100, 1.0).expect_err("fpr=1 must error"); + let msg = format!("{err}"); + assert!(msg.contains("fpr"), "got: {msg}"); +} + +#[test] +fn burr_params_with_fp_rate_rejects_negative_fpr() { + let err = BurrParams::with_fp_rate(100, -0.1).expect_err("negative fpr must error"); + let _ = format!("{err}"); +} + +#[test] +fn burr_params_with_fp_rate_rejects_too_tight_fpr() { + // fpr <= 2^-64 → r > 64 → reject. Use 1e-25 (well past 2^-64). + let err = BurrParams::with_fp_rate(100, 1.0e-25_f32).expect_err("too tight must error"); + let _ = format!("{err}"); +} + +#[test] +fn burr_params_with_bpk_rejects_n_zero() { + let err = BurrParams::with_bpk(0, 10.0).expect_err("n=0 must error"); + let _ = format!("{err}"); +} + +#[test] +fn burr_params_with_bpk_rejects_below_one() { + let err = BurrParams::with_bpk(100, 0.5).expect_err("bpk < 1 must error"); + let _ = format!("{err}"); +} + +#[test] +fn burr_params_with_bpk_rejects_above_64() { + let err = BurrParams::with_bpk(100, 70.0).expect_err("bpk > 64 must error"); + let _ = format!("{err}"); +} + +#[test] +fn burr_params_with_seed_sets_seed_field() { + let params = BurrParams::with_fp_rate(100, 0.01) + .unwrap() + .with_seed(0xDEAD_BEEF); + assert_eq!(params.seed, 0xDEAD_BEEF); +} + +#[test] +fn burr_builder_rejects_n_zero() { + let mut params = BurrParams::with_fp_rate(100, 0.01).unwrap(); + params.n = 0; + let err = BurrBuilder::new(params, DefaultBuildHasher::default()) + .expect_err("builder must reject n=0"); + let msg = format!("{err}"); + assert!(msg.contains("n must be > 0"), "got: {msg}"); +} + +#[test] +fn burr_builder_rejects_zero_r() { + let mut params = BurrParams::with_fp_rate(100, 0.01).unwrap(); + params.r = 0; + let err = BurrBuilder::new(params, DefaultBuildHasher::default()) + .expect_err("builder must reject r=0"); + let msg = format!("{err}"); + assert!(msg.contains("r must be in 1..=64"), "got: {msg}"); +} + +#[test] +fn burr_builder_rejects_zero_b() { + let mut params = BurrParams::with_fp_rate(100, 0.01).unwrap(); + params.b = 0; + let err = BurrBuilder::new(params, DefaultBuildHasher::default()) + .expect_err("builder must reject b=0"); + let msg = format!("{err}"); + assert!(msg.contains("b must be > 0"), "got: {msg}"); +} + +#[test] +fn burr_builder_rejects_zero_max_layers() { + let mut params = BurrParams::with_fp_rate(100, 0.01).unwrap(); + params.max_layers = 0; + let err = BurrBuilder::new(params, DefaultBuildHasher::default()) + .expect_err("builder must reject max_layers=0"); + let msg = format!("{err}"); + assert!(msg.contains("max_layers"), "got: {msg}"); +} + +#[test] +fn burr_layer_count_for_tiny_input_is_at_most_one() { + // Tiny inputs settle in a single layer — the last-layer + // enlargement absorbs the residual without bumping. (Empty input + // is now rejected by the builder; see + // burr_builder_rejects_empty_input_via_build_from_hashes.) + let params = BurrParams::with_fp_rate(100, 0.01).unwrap(); + let builder = BurrBuilder::new(params, DefaultBuildHasher::default()).unwrap(); + let hashes: Vec = (0..4_u64) + .map(|i| crate::hash::hash64(&i.to_le_bytes())) + .collect(); + let filter = builder.build_from_hashes(&hashes).unwrap(); + assert!(filter.layer_count() <= 1, "tiny input should fit one layer"); +} + +#[test] +fn burr_filter_debug_format_includes_layer_count() { + let params = BurrParams::with_fp_rate(100, 0.01).unwrap(); + let builder = BurrBuilder::new(params, DefaultBuildHasher::default()).unwrap(); + let hashes: Vec = (0..100_u64) + .map(|i| crate::hash::hash64(&i.to_le_bytes())) + .collect(); + let filter = builder.build_from_hashes(&hashes).unwrap(); + let debug = format!("{filter:?}"); + assert!(debug.contains("BurrFilter"), "got: {debug}"); + assert!(debug.contains("layer_count"), "got: {debug}"); +} + +#[test] +fn burr_filter_params_accessor() { + let params = BurrParams::with_fp_rate(500, 0.01).unwrap(); + let builder = BurrBuilder::new(params, DefaultBuildHasher::default()).unwrap(); + let hashes: Vec = (0..500_u64) + .map(|i| crate::hash::hash64(&i.to_le_bytes())) + .collect(); + let filter = builder.build_from_hashes(&hashes).unwrap(); + assert_eq!(filter.params().n, 500); + assert_eq!(filter.params().r, params.r); +} + +#[test] +fn burr_filter_contains_returns_false_for_definitely_absent() { + // n=64 small set, probe with hashes that almost certainly map outside. + // Just verify the absent path returns false sometimes (no false-negative + // for inserted; some false-positive is expected for non-inserted). + let n = 64_usize; + let params = BurrParams::with_fp_rate(n, 0.001).unwrap(); + let builder = BurrBuilder::new(params, DefaultBuildHasher::default()).unwrap(); + let hashes: Vec = (0..n as u64) + .map(|i| crate::hash::hash64(&i.to_le_bytes())) + .collect(); + let filter = builder.build_from_hashes(&hashes).unwrap(); + let mut false_count = 0_u32; + for i in 1000..2000_u64 { + let h = crate::hash::hash64(&i.to_le_bytes()); + if !filter.contains_hash(h) { + false_count += 1; + } + } + assert!( + false_count > 800, + "expected most non-inserted keys to report absent, got false_count={false_count}" + ); +} + +#[test] +fn contains_hash_from_bytes_round_trips_against_decoded() { + // The single-pass parse+probe entry point used by FilterBlock must + // produce the same answer as the decoded-then-probed reader for + // every inserted hash. + use super::contains_hash_from_bytes; + use super::filter::BurrFilterReader; + + let n = 500_usize; + let params = BurrParams::with_fp_rate(n, 0.01).expect("params"); + let builder = BurrBuilder::new(params, DefaultBuildHasher::default()).expect("builder"); + let hashes: Vec = (0..n as u64) + .map(|i| crate::hash::hash64(&i.to_le_bytes())) + .collect(); + let filter = builder.build_from_hashes(&hashes).expect("build"); + let bytes = filter.to_wire_bytes(); + let reader = BurrFilterReader::new(&bytes).expect("decoder"); + + for h in &hashes { + let single = contains_hash_from_bytes(&bytes, *h).expect("ok"); + let decoded = reader.contains_hash(*h); + assert_eq!(single, decoded, "single-pass and decoded disagree on {h}"); + assert!(single, "inserted hash {h} not present in single-pass probe"); + } + + // Also check the absent-hash path: a mismatch on negative answers + // would still pass the loop above, so iterate a disjoint probe + // corpus and assert exact equality on every probe (true OR false). + for i in (n as u64)..(n as u64 + 2_000_u64) { + let h = crate::hash::hash64(&i.to_le_bytes()); + let single = contains_hash_from_bytes(&bytes, h).expect("ok"); + let decoded = reader.contains_hash(h); + assert_eq!( + single, decoded, + "single-pass and decoded disagree on absent hash {h}", + ); + } +} + +#[test] +fn contains_hash_from_bytes_rejects_short_buffer() { + use super::contains_hash_from_bytes; + let err = contains_hash_from_bytes(&[0_u8; 4], 42).expect_err("short buffer must error"); + assert!( + matches!(err, crate::Error::InvalidHeader("BurrFilter")), + "expected InvalidHeader(\"BurrFilter\"), got: {err:?}", + ); +} + +#[test] +fn contains_hash_from_bytes_rejects_bad_magic() { + use super::contains_hash_from_bytes; + let params = BurrParams::with_fp_rate(50, 0.01).expect("params"); + let builder = BurrBuilder::new(params, DefaultBuildHasher::default()).expect("builder"); + let hashes: Vec = (0..50_u64) + .map(|i| crate::hash::hash64(&[i as u8])) + .collect(); + let filter = builder.build_from_hashes(&hashes).expect("build"); + let mut bytes = filter.to_wire_bytes(); + bytes[0] ^= 0xFF; + let err = contains_hash_from_bytes(&bytes, 0).expect_err("bad magic must error"); + assert!( + matches!(err, crate::Error::InvalidHeader("BurrFilter")), + "expected InvalidHeader(\"BurrFilter\"), got: {err:?}", + ); +} + +#[test] +fn contains_hash_from_bytes_rejects_bad_version() { + use super::contains_hash_from_bytes; + let params = BurrParams::with_fp_rate(50, 0.01).expect("params"); + let builder = BurrBuilder::new(params, DefaultBuildHasher::default()).expect("builder"); + let hashes: Vec = (0..50_u64) + .map(|i| crate::hash::hash64(&[i as u8])) + .collect(); + let filter = builder.build_from_hashes(&hashes).expect("build"); + let mut bytes = filter.to_wire_bytes(); + let version_offset = crate::file::MAGIC_BYTES.len() + 1; + bytes[version_offset] = 0xFE; + let err = contains_hash_from_bytes(&bytes, 0).expect_err("bad version must error"); + assert!( + matches!(err, crate::Error::InvalidHeader("BurrFilter version")), + "expected InvalidHeader(\"BurrFilter version\"), got: {err:?}", + ); +} + +#[test] +fn contains_hash_from_bytes_rejects_bad_filter_type() { + use super::contains_hash_from_bytes; + let params = BurrParams::with_fp_rate(50, 0.01).expect("params"); + let builder = BurrBuilder::new(params, DefaultBuildHasher::default()).expect("builder"); + let hashes: Vec = (0..50_u64) + .map(|i| crate::hash::hash64(&[i as u8])) + .collect(); + let filter = builder.build_from_hashes(&hashes).expect("build"); + let mut bytes = filter.to_wire_bytes(); + let filter_type_offset = crate::file::MAGIC_BYTES.len(); + bytes[filter_type_offset] = 0xAB; + let err = contains_hash_from_bytes(&bytes, 0).expect_err("bad filter_type must error"); + assert!( + matches!(err, crate::Error::InvalidTag(("FilterType", 0xAB))), + "expected InvalidTag((\"FilterType\", 0xAB)), got: {err:?}", + ); +} + +#[test] +fn contains_hash_from_bytes_rejects_bad_params() { + use super::contains_hash_from_bytes; + let params = BurrParams::with_fp_rate(50, 0.01).expect("params"); + let builder = BurrBuilder::new(params, DefaultBuildHasher::default()).expect("builder"); + let hashes: Vec = (0..50_u64) + .map(|i| crate::hash::hash64(&[i as u8])) + .collect(); + let filter = builder.build_from_hashes(&hashes).expect("build"); + let mut bytes = filter.to_wire_bytes(); + // Set num_layers = 0 → InvalidHeader("BurrFilter params"). + let num_layers_offset = crate::file::MAGIC_BYTES.len() + 5; + bytes[num_layers_offset] = 0; + let err = contains_hash_from_bytes(&bytes, 0).expect_err("num_layers=0 must error"); + assert!( + matches!(err, crate::Error::InvalidHeader("BurrFilter params")), + "expected InvalidHeader(\"BurrFilter params\"), got: {err:?}", + ); +} + +#[test] +fn contains_hash_from_bytes_rejects_corrupted_layer_payload() { + // Tampered num_blocks → checked-add validation in + // contains_hash_from_bytes must reject the layer header before + // reaching the slice. + use super::contains_hash_from_bytes; + let params = BurrParams::with_fp_rate(50, 0.01).expect("params"); + let builder = BurrBuilder::new(params, DefaultBuildHasher::default()).expect("builder"); + let hashes: Vec = (0..50_u64) + .map(|i| crate::hash::hash64(&[i as u8])) + .collect(); + let filter = builder.build_from_hashes(&hashes).expect("build"); + let mut bytes = filter.to_wire_bytes(); + let layer_header_start = crate::file::MAGIC_BYTES.len() + 6 + 8; + let num_blocks_offset = layer_header_start + 4; + bytes[num_blocks_offset] = bytes[num_blocks_offset].wrapping_add(1); + let err = contains_hash_from_bytes(&bytes, 0).expect_err("corrupted num_blocks must error"); + assert!( + matches!(err, crate::Error::InvalidHeader("BurrFilter layer payload")), + "expected InvalidHeader(\"BurrFilter layer payload\"), got: {err:?}", + ); +} + +#[test] +fn contains_hash_from_bytes_returns_false_for_non_inserted() { + // Smoke for the not-present branch — exercises the per-set-bit + // loop's normal exit path (where acc != fingerprint). Also + // cross-validates the single-pass entry point against the decoded + // reader: `contains_hash_from_bytes` and + // `BurrFilterReader::contains_hash` are separate implementations, + // so a mismatch on the absent-path would silently pass an + // absent-only sanity check. + use super::contains_hash_from_bytes; + use super::filter::BurrFilterReader; + let n = 200_usize; + let params = BurrParams::with_fp_rate(n, 0.001).expect("params"); + let builder = BurrBuilder::new(params, DefaultBuildHasher::default()).expect("builder"); + let hashes: Vec = (0..n as u64) + .map(|i| crate::hash::hash64(&i.to_le_bytes())) + .collect(); + let filter = builder.build_from_hashes(&hashes).expect("build"); + let bytes = filter.to_wire_bytes(); + let reader = BurrFilterReader::new(&bytes).expect("decoder"); + + let mut absent_count = 0_u32; + for i in 10_000..11_000_u64 { + let h = crate::hash::hash64(&i.to_le_bytes()); + let single = contains_hash_from_bytes(&bytes, h).expect("ok"); + let decoded = reader.contains_hash(h); + assert_eq!( + single, decoded, + "single-pass and decoded disagree on absent hash {h}", + ); + if !single { + absent_count += 1; + } + } + assert!( + absent_count > 950, + "expected most non-inserted hashes to report absent, got absent_count={absent_count}", + ); +} + +#[test] +fn burr_wire_rejects_corrupted_m_below_w() { + // Corruption test for the Params::new gate added to decode: a + // tampered `m` that drops below `w` (64) must be rejected at + // decode time with InvalidHeader("BurrFilter layer params"), + // NOT silently fail-close in the probe path later. + use super::filter::BurrFilterReader; + // Build a single-layer filter (n = 50 → one layer). + let params = BurrParams::with_fp_rate(50, 0.01).expect("params"); + let builder = BurrBuilder::new(params, DefaultBuildHasher::default()).expect("builder"); + let hashes: Vec = (0..50_u64) + .map(|i| crate::hash::hash64(&[i as u8])) + .collect(); + let filter = builder.build_from_hashes(&hashes).expect("build"); + let mut bytes = filter.to_wire_bytes(); + // m is the first u32 of the first layer header (at HEADER_LEN). + let layer_header_start = crate::file::MAGIC_BYTES.len() + 6 + 8; + // Read original m to size the corrupted_z payload such that the + // num_blocks/z_byte_len cross-checks still succeed (so the test + // exercises specifically the Params::new gate, not the earlier + // length checks). + let m_corrupt: u32 = 32; // w == 64, so m=32 fails m >= w. + bytes[layer_header_start..layer_header_start + 4].copy_from_slice(&m_corrupt.to_le_bytes()); + // Recompute the cross-check fields so the test reaches Params::new. + // num_blocks = m.div_ceil(b); b defaults to 64 → num_blocks=1 + let num_blocks_corrupt: u32 = 1; + bytes[layer_header_start + 4..layer_header_start + 8] + .copy_from_slice(&num_blocks_corrupt.to_le_bytes()); + // z_byte_len = m * stride * 8; stride=1 (r=7 for fpr=0.01) → 256 + let z_byte_len_corrupt: u32 = 32 * 8; + bytes[layer_header_start + 8..layer_header_start + 12] + .copy_from_slice(&z_byte_len_corrupt.to_le_bytes()); + + let err = BurrFilterReader::new(&bytes).expect_err("m < w must reject"); + assert!( + matches!(err, crate::Error::InvalidHeader("BurrFilter layer params")), + "expected InvalidHeader(\"BurrFilter layer params\"), got: {err:?}", + ); +} + +#[test] +fn burr_builder_rejects_empty_input_via_build_from_hashes() { + let params = BurrParams::with_fp_rate(100, 0.01).expect("params"); + let builder = BurrBuilder::new(params, DefaultBuildHasher::default()).expect("builder"); + let err = builder + .build_from_hashes(&[]) + .expect_err("empty hash input must error"); + let msg = format!("{err}"); + assert!( + msg.contains("non-empty"), + "expected non-empty mention: {msg}" + ); +} + +#[test] +fn burr_builder_rejects_empty_input_via_build_keys() { + let params = BurrParams::with_fp_rate(100, 0.01).expect("params"); + let builder = BurrBuilder::new(params, DefaultBuildHasher::default()).expect("builder"); + let keys: [u64; 0] = []; + let err = builder + .build(&keys) + .expect_err("empty key input must error"); + let msg = format!("{err}"); + assert!( + msg.contains("non-empty"), + "expected non-empty mention: {msg}" + ); +} + +#[test] +fn burr_builder_new_rejects_zero_n() { + let mut params = BurrParams::with_fp_rate(100, 0.01).expect("params"); + params.n = 0; + let err = BurrBuilder::new(params, DefaultBuildHasher::default()).expect_err("n=0 must reject"); + assert!(format!("{err}").contains("n must be > 0")); +} + +#[test] +fn burr_builder_new_rejects_r_out_of_range() { + let mut params = BurrParams::with_fp_rate(100, 0.01).expect("params"); + params.r = 0; + let err = BurrBuilder::new(params, DefaultBuildHasher::default()).expect_err("r=0 must reject"); + assert!(format!("{err}").contains("r must be in 1..=64")); + + let mut params = BurrParams::with_fp_rate(100, 0.01).expect("params"); + params.r = 65; + let err = + BurrBuilder::new(params, DefaultBuildHasher::default()).expect_err("r=65 must reject"); + assert!(format!("{err}").contains("r must be in 1..=64")); +} + +#[test] +fn burr_builder_new_rejects_non_64_w() { + let mut params = BurrParams::with_fp_rate(100, 0.01).expect("params"); + params.w = 32; + let err = + BurrBuilder::new(params, DefaultBuildHasher::default()).expect_err("w=32 must reject"); + assert!(format!("{err}").contains("w must be exactly 64")); +} + +#[test] +fn burr_builder_new_rejects_zero_b() { + let mut params = BurrParams::with_fp_rate(100, 0.01).expect("params"); + params.b = 0; + let err = BurrBuilder::new(params, DefaultBuildHasher::default()).expect_err("b=0 must reject"); + assert!(format!("{err}").contains("b must be > 0")); +} + +#[test] +fn burr_builder_new_rejects_b_below_w() { + // b < w lets layer_m hand Ribbon an undersized m. Reviewer-flagged + // invariant: the builder must reject hand-built params with b < w. + let mut params = BurrParams::with_fp_rate(100, 0.01).expect("params"); + params.b = 32; // < w (= 64) + let err = BurrBuilder::new(params, DefaultBuildHasher::default()).expect_err("b= w"), "got: {msg}"); +} + +#[test] +fn burr_builder_new_rejects_zero_max_layers() { + let mut params = BurrParams::with_fp_rate(100, 0.01).expect("params"); + params.max_layers = 0; + let err = BurrBuilder::new(params, DefaultBuildHasher::default()) + .expect_err("max_layers=0 must reject"); + assert!(format!("{err}").contains("max_layers must be > 0")); +} + +#[test] +fn burr_builder_debug_includes_params() { + let params = BurrParams::with_fp_rate(100, 0.01).expect("params"); + let builder = BurrBuilder::new(params, DefaultBuildHasher::default()).expect("builder"); + let debug = format!("{builder:?}"); + assert!(debug.contains("BurrBuilder"), "got: {debug}"); + assert!(debug.contains("params"), "got: {debug}"); +} + +#[test] +fn burr_filter_debug_includes_layer_count() { + let params = BurrParams::with_fp_rate(100, 0.01).expect("params"); + let builder = BurrBuilder::new(params, DefaultBuildHasher::default()).expect("builder"); + let hashes: Vec = (0..100_u64) + .map(|i| crate::hash::hash64(&i.to_le_bytes())) + .collect(); + let filter = builder.build_from_hashes(&hashes).expect("build"); + let debug = format!("{filter:?}"); + assert!(debug.contains("BurrFilter"), "got: {debug}"); + assert!(debug.contains("layer_count"), "got: {debug}"); +} diff --git a/src/table/filter/ribbon/burr/threshold.rs b/src/table/filter/ribbon/burr/threshold.rs new file mode 100644 index 000000000..c4550f679 --- /dev/null +++ b/src/table/filter/ribbon/burr/threshold.rs @@ -0,0 +1,235 @@ +//! Per-block threshold computation for BuRR. +//! +//! Given a layer's `m` row count, block size `b`, and the set of equations +//! that each key would generate (each with a `start` row index), this +//! module decides — block by block — a threshold `τ_i ∈ [0, b]` such that +//! a key with `offset_in_block < τ_i` is KEPT in this layer, while a key +//! with `offset_in_block >= τ_i` is BUMPED to the next layer. +//! +//! The threshold is chosen as the largest value for which the kept set +//! fits the block's effective capacity. In the BuRR paper this capacity +//! is derived rigorously from the Gaussian-elimination success +//! probability; the MVP here uses a conservative load factor (`CAP_NUM / +//! CAP_DEN ≈ 90% of b`) — enough to bring construction failure below the +//! level Standard Ribbon already retries through, while leaving the +//! per-block analytic upgrade for a follow-up. +//! +//! All offsets in this module are measured WITHIN their block (i.e. +//! `start % b`), so a threshold of `b` means "accept everything" and `0` +//! means "bump everything". +//! +//! Every public item in this module is consumed by the BuRR builder +//! (`builder.rs`) and the probe path (`filter.rs`); the previous +//! crate-level `#![allow(dead_code)]` blanket suppression has been +//! removed so any future genuinely-dead code in here surfaces a +//! warning. + +use super::super::hashing::StandardEquation; + +/// Capacity numerator: per-block keep-capacity = `b * CAP_NUM / CAP_DEN`. +/// 90% load factor (CAP_NUM=9, CAP_DEN=10) — leaves ~10% margin for the +/// banded-solver's worst-case eliminations. +const CAP_NUM: usize = 9; +const CAP_DEN: usize = 10; + +/// Compute per-block thresholds for a layer. +/// +/// Returns `Vec` of length `block_count = ceil(m / b)`. Entry `i` is +/// the threshold for block `i` — keys whose `start` falls in row range +/// `[i*b, (i+1)*b)` and whose `offset = start - i*b` is `< thresholds[i]` +/// are KEPT; those with `offset >= thresholds[i]` are BUMPED. +/// +/// `m` is the layer's slot count (must be > 0). `b` is the block size in +/// rows (must be > 0). `equations.len()` may be anything — the function +/// works whether all keys fit, some fit, or every block is overloaded. +/// +/// # Algorithm +/// +/// 1. Bucket equations by `block_idx = start / b`. Per-bucket list of +/// `offset = start % b` values (capped at `b - 1`). +/// 2. Per block, target keep-capacity `cap = b * CAP_NUM / CAP_DEN`. +/// 3. If the bucket has `≤ cap` keys, threshold = `b` (accept all). +/// 4. Otherwise sort offsets ascending; the smallest `cap` offsets are +/// kept. Threshold = the `cap`-th sorted offset (0-indexed) — keys +/// with offset strictly less than this value are kept. +/// +/// Pathological case: if `cap` keys share the same offset value (rare +/// for well-distributed hashes), the threshold will be that offset, and +/// strict `<` may discard a tying key. That's safe — it just means one +/// extra key bumps. Tolerable for the MVP; the analytic per-block +/// variant from the paper handles this exactly. +#[must_use] +pub(crate) fn compute_thresholds(equations: &[StandardEquation], m: usize, b: u8) -> Vec { + debug_assert!(m > 0, "compute_thresholds requires m > 0"); + debug_assert!(b > 0, "compute_thresholds requires b > 0"); + + let b_usize = usize::from(b); + let block_count = m.div_ceil(b_usize); + let cap_per_block = (b_usize * CAP_NUM) / CAP_DEN; + + // First pass: count keys per block to size each bucket exactly. + let mut block_counts = vec![0_usize; block_count]; + for eq in equations { + let block_idx = eq.start / b_usize; + if block_idx < block_count { + block_counts[block_idx] += 1; + } + } + + // Second pass: collect offsets per block. + let mut block_offsets: Vec> = block_counts + .iter() + .map(|&n| Vec::with_capacity(n)) + .collect(); + for eq in equations { + let block_idx = eq.start / b_usize; + if block_idx < block_count { + let offset = (eq.start % b_usize) as u8; + block_offsets[block_idx].push(offset); + } + } + + // Third pass: derive each block's threshold. + let mut thresholds = vec![b; block_count]; + for (i, offsets) in block_offsets.iter_mut().enumerate() { + if offsets.len() <= cap_per_block { + // Block underloaded — accept everything. + continue; + } + offsets.sort_unstable(); + // The threshold is the offset value at the `cap_per_block`-th + // position (sorted ascending). Strict `<` against this threshold + // keeps exactly the `cap_per_block` smallest-offset keys (modulo + // ties at the boundary, which lean toward bumping — safe). + thresholds[i] = offsets[cap_per_block]; + } + + thresholds +} + +/// Partition keys into (kept, bumped) according to the given thresholds. +/// +/// Inputs are parallel slices — `keys[i]` must correspond to +/// `equations[i]` (same hash, same layer seed). `thresholds` indexed by +/// `block_idx = start / b`. +/// +/// Returns `(kept, bumped)` where `kept` is built for this layer and +/// `bumped` is forwarded to the next BuRR layer. +pub(crate) fn partition_keys_by_threshold( + keys: &[K], + equations: &[StandardEquation], + thresholds: &[u8], + b: u8, +) -> (Vec, Vec) { + debug_assert_eq!(keys.len(), equations.len()); + let b_usize = usize::from(b); + + let mut kept = Vec::with_capacity(keys.len()); + let mut bumped = Vec::with_capacity(keys.len() / 10); // expect ~10% + for (key, eq) in keys.iter().zip(equations.iter()) { + let block_idx = eq.start / b_usize; + let offset = (eq.start % b_usize) as u8; + let threshold = thresholds.get(block_idx).copied().unwrap_or(0); + if offset < threshold { + kept.push(key.clone()); + } else { + bumped.push(key.clone()); + } + } + (kept, bumped) +} + +/// Predicate variant: does a single equation get bumped under the given +/// thresholds? Used by the probe path to decide which layer holds a key. +#[expect( + clippy::inline_always, + reason = "called per layer on the filter probe hot path; the function is ~5 instructions and \ + inlining lets LLVM fold the threshold-table indexing into the caller's layer loop" +)] +#[inline(always)] +#[must_use] +pub(crate) fn is_bumped(eq: &StandardEquation, thresholds: &[u8], b: u8) -> bool { + let b_usize = usize::from(b); + let block_idx = eq.start / b_usize; + let offset = (eq.start % b_usize) as u8; + let threshold = thresholds.get(block_idx).copied().unwrap_or(0); + offset >= threshold +} + +#[cfg(test)] +mod tests { + use super::super::super::hashing::StandardEquation; + use super::*; + + fn eq_at(start: usize) -> StandardEquation { + // coeff_lo / coeff_hi are irrelevant for threshold computation. + StandardEquation { + start, + coeff_lo: 1, + coeff_hi: 0, + } + } + + #[test] + fn empty_input_returns_full_thresholds() { + let thresholds = compute_thresholds(&[], 64, 16); + // m=64, b=16 → block_count=4; no keys → all blocks accept everything. + assert_eq!(thresholds, vec![16, 16, 16, 16]); + } + + #[test] + fn underloaded_block_keeps_threshold_at_b() { + // m=64, b=16, cap = 16 * 9 / 10 = 14. With 5 keys in block 0, + // none in others → threshold stays at b=16 everywhere. + let equations: Vec<_> = [0, 1, 2, 3, 4].iter().map(|&start| eq_at(start)).collect(); + let thresholds = compute_thresholds(&equations, 64, 16); + assert_eq!(thresholds, vec![16, 16, 16, 16]); + } + + #[test] + fn overloaded_block_lowers_threshold_to_cap_th_offset() { + // m=64, b=16, cap = 14. Pack block 0 with offsets 0..16 (16 keys + // — overload by 2). The cap-th sorted offset (14) becomes the + // threshold; offsets 0..13 are kept (14 keys), offsets 14..15 + // are bumped (2 keys). + let equations: Vec<_> = (0..16).map(eq_at).collect(); + let thresholds = compute_thresholds(&equations, 64, 16); + assert_eq!(thresholds[0], 14); + // Other blocks empty → threshold at b. + assert_eq!(thresholds[1..], [16, 16, 16]); + } + + #[test] + fn partition_routes_keys_correctly() { + // Keys at starts [0..16] in block 0, threshold = 14. Keys + // 0..13 → kept (14 of them), 14..15 → bumped (2 of them). + let keys: Vec = (0..16).collect(); + let equations: Vec<_> = (0..16).map(eq_at).collect(); + let thresholds = vec![14_u8, 16, 16, 16]; + let (kept, bumped) = partition_keys_by_threshold(&keys, &equations, &thresholds, 16); + assert_eq!(kept.len(), 14); + assert_eq!(bumped, vec![14, 15]); + } + + #[test] + fn is_bumped_predicate_matches_partition() { + let equations: Vec<_> = (0..16).map(eq_at).collect(); + let thresholds = vec![14_u8, 16, 16, 16]; + for (i, eq) in equations.iter().enumerate() { + let bumped = is_bumped(eq, &thresholds, 16); + // First 14 keep; last 2 bump. + assert_eq!(bumped, i >= 14, "key {i} bumped state mismatch"); + } + } + + #[test] + fn keys_outside_block_range_get_bumped() { + // start values past m get treated as block_idx >= block_count; + // the get(block_idx) returns None → threshold defaults to 0 → + // any offset >= 0 → bumped. (This shouldn't happen for well- + // formed equations, but is a safe fallback.) + let eq = eq_at(1000); + let thresholds = vec![16_u8, 16, 16, 16]; + assert!(is_bumped(&eq, &thresholds, 16)); + } +} diff --git a/src/table/filter/ribbon/burr/wire.rs b/src/table/filter/ribbon/burr/wire.rs new file mode 100644 index 000000000..871a25f76 --- /dev/null +++ b/src/table/filter/ribbon/burr/wire.rs @@ -0,0 +1,549 @@ +//! BuRR on-disk wire format. +//! +//! # Layout +//! +//! Designed for the LSM filter block — fixed-width fields up front, then +//! per-layer variable-length payloads. All multi-byte integers are +//! little-endian. +//! +//! ```text +//! offset size field +//! ────── ──── ────────────────────────────────────────── +//! 0 6 MAGIC_BYTES (existing crate constant) +//! 6 1 filter_type = BURR_FILTER_TYPE_BYTE (2) +//! 7 1 format_version (FORMAT_VERSION = 1) +//! 8 1 r (fingerprint bits, 1..=64) +//! 9 1 w (band width, fixed at 64) +//! 10 1 b (block size) +//! 11 1 num_layers (1..=255) +//! 12 8 root_seed (u64 LE) +//! 20 — per-layer payloads (`num_layers` entries): +//! 4 m (u32 LE) — slot count +//! 4 num_blocks (u32 LE) — = m.div_ceil(b) +//! 4 z_byte_len (u32 LE) — = m * stride_words * 8 +//! N thresholds (num_blocks bytes) +//! M z storage (z_byte_len bytes, raw u64 words LE) +//! ``` +//! +//! `stride_words = r.div_ceil(64)`. For the current implementation +//! `r <= 64` so `stride_words = 1` and a row of z is exactly 8 bytes. +//! +//! The per-layer seed is NOT stored — it's re-derived from +//! `root_seed + layer_index` via [`super::builder::derive_layer_seed`] +//! at parse time. Keeps the format compact and removes the temptation +//! to drift seeds across encode/decode. + +use std::hash::BuildHasher; + +use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; +use std::io::{Cursor, Read}; + +use super::super::hashing::{StandardEquation, standard_equation_from_hash}; +use super::super::params::{Mode, Params}; +use super::builder::derive_layer_seed; +use super::filter::BurrFilter; +use super::threshold::is_bumped; +use crate::file::MAGIC_BYTES; + +/// Wire-format identifier for the BuRR filter. Distinct from the legacy +/// bloom values (0 = StandardBloom, 1 = BlockedBloom — both retired +/// alongside this rollout in task #17/#18); 2 is the new BuRR slot. +pub(crate) const BURR_FILTER_TYPE_BYTE: u8 = 2; + +/// Format version. Bumped if/when the wire layout changes +/// incompatibly. Readers reject mismatched versions explicitly. +pub(crate) const FORMAT_VERSION: u8 = 1; + +/// Header length in bytes (MAGIC + filter_type + version + r + w + b + +/// num_layers + root_seed) — 6 + 1 + 1 + 1 + 1 + 1 + 1 + 8 = 20. +const HEADER_LEN: usize = MAGIC_BYTES.len() + 6 + 8; +/// Per-layer fixed header length: m + num_blocks + z_byte_len = 12. +const LAYER_HEADER_LEN: usize = 12; + +/// Serialize a built [`BurrFilter`] into the wire format. +pub(crate) fn encode(filter: &BurrFilter) -> Vec +where + S: BuildHasher + Clone, +{ + let params = filter.params(); + let layers = filter.layers_inner(); + + // Pre-size the buffer to avoid reallocations: header + per-layer + // (fixed header + thresholds + z) for every layer. + let stride_words = usize::from(params.r).div_ceil(64); + let estimated_size: usize = HEADER_LEN + + layers + .iter() + .map(|layer| LAYER_HEADER_LEN + layer.thresholds.len() + layer.m * stride_words * 8) + .sum::(); + let mut buf = Vec::with_capacity(estimated_size); + + // Header. + buf.extend_from_slice(&MAGIC_BYTES); + #[expect(clippy::expect_used, reason = "writing to a Vec cannot fail")] + { + buf.write_u8(BURR_FILTER_TYPE_BYTE).expect("vec write"); + buf.write_u8(FORMAT_VERSION).expect("vec write"); + buf.write_u8(params.r).expect("vec write"); + buf.write_u8(params.w).expect("vec write"); + buf.write_u8(params.b).expect("vec write"); + #[expect( + clippy::cast_possible_truncation, + reason = "max_layers fits u8 by construction" + )] + let num_layers_u8 = layers.len() as u8; + buf.write_u8(num_layers_u8).expect("vec write"); + buf.write_u64::(params.seed) + .expect("vec write"); + } + + // Per-layer payloads. + for layer in layers { + let m = layer.m; + let num_blocks = layer.thresholds.len(); + // Checked multiplication: a layer larger than u32::MAX bytes + // would silently wrap with `as u32` and produce a self- + // corrupting wire format. Filter partitions are capped at ~4KB + // upstream so this is unreachable in practice; the asserts + // make that explicit and turn any future regression into a + // loud panic at write time rather than corruption at read. + #[expect( + clippy::expect_used, + reason = "programmer invariant: filter partitions are capped at \ + ~4 KB upstream; an overflow here means a regression \ + slipped past the partition-size policy" + )] + let z_byte_len: usize = m + .checked_mul(stride_words) + .and_then(|v| v.checked_mul(8)) + .expect("BuRR layer z payload size overflows usize"); + #[expect( + clippy::expect_used, + reason = "programmer invariant: m bounded by partition size; \ + fits u32 by construction" + )] + let m_u32 = u32::try_from(m).expect("BuRR layer m exceeds u32::MAX"); + #[expect( + clippy::expect_used, + reason = "programmer invariant: num_blocks = m.div_ceil(b) ≤ m, \ + fits u32 by construction" + )] + let num_blocks_u32 = + u32::try_from(num_blocks).expect("BuRR layer num_blocks exceeds u32::MAX"); + #[expect( + clippy::expect_used, + reason = "programmer invariant: z_byte_len = m * stride * 8 ≤ \ + partition size in bytes; fits u32 by construction" + )] + let z_byte_len_u32 = + u32::try_from(z_byte_len).expect("BuRR layer z_byte_len exceeds u32::MAX"); + #[expect(clippy::expect_used, reason = "writing to a Vec cannot fail")] + { + buf.write_u32::(m_u32).expect("vec write"); + buf.write_u32::(num_blocks_u32) + .expect("vec write"); + buf.write_u32::(z_byte_len_u32) + .expect("vec write"); + } + buf.extend_from_slice(&layer.thresholds); + // Serialize z as little-endian u64 words. + let z_words = layer.ribbon.z_raw_words(); + debug_assert_eq!(z_words.len(), m * stride_words); + for word in z_words { + buf.extend_from_slice(&word.to_le_bytes()); + } + } + + buf +} + +/// Borrowed-slice view of one decoded layer. +/// +/// `z_bytes` stays as a borrowed slice of the wire buffer — the LSM +/// filter block is constructed afresh per `maybe_contains_hash` call +/// (the underlying `Block` is cached, but `FilterBlock` wraps it +/// freshly), so any per-layer `Vec` allocation here would happen on +/// every point read and dominate the probe path. The trade-off is one +/// 8-byte LE decode per matched row inside the probe loop; for `r <= +/// 64` (stride = 1) that's a single `u64::from_le_bytes` per set bit. +#[derive(Debug)] +pub(crate) struct LayerView<'a> { + pub(crate) m: usize, + pub(crate) seed: u64, + pub(crate) thresholds: &'a [u8], + pub(crate) z_bytes: &'a [u8], +} + +/// Decoded BuRR filter, holding borrowed slices into a wire-format +/// buffer. Layer payloads are zero-copy; only the small header and the +/// per-layer descriptors are eagerly parsed. +#[derive(Debug)] +pub(crate) struct DecodedFilter<'a> { + pub(crate) r: u8, + pub(crate) w: u8, + pub(crate) b: u8, + pub(crate) stride_words: usize, + pub(crate) layers: Vec>, +} + +/// Parse a wire-format BuRR filter slice. Returns an error if the magic +/// bytes don't match, the version is unrecognised, or the buffer is +/// truncated. +pub(crate) fn decode(bytes: &[u8]) -> crate::Result> { + if bytes.len() < HEADER_LEN { + return Err(crate::Error::InvalidHeader("BurrFilter")); + } + + let mut cursor = Cursor::new(bytes); + let mut magic = [0u8; MAGIC_BYTES.len()]; + cursor.read_exact(&mut magic)?; + if magic != MAGIC_BYTES { + return Err(crate::Error::InvalidHeader("BurrFilter")); + } + + let filter_type = cursor.read_u8()?; + if filter_type != BURR_FILTER_TYPE_BYTE { + return Err(crate::Error::InvalidTag(("FilterType", filter_type))); + } + let version = cursor.read_u8()?; + if version != FORMAT_VERSION { + return Err(crate::Error::InvalidHeader("BurrFilter version")); + } + + let r = cursor.read_u8()?; + let w = cursor.read_u8()?; + let b = cursor.read_u8()?; + let num_layers = cursor.read_u8()?; + let root_seed = cursor.read_u64::()?; + + // Header-field invariants. Without these checks a corrupted block + // can flow into Params::new (which would fail and silently skip the + // layer in contains_hash → false negative on read), or trigger + // divide-by-zero in is_bumped when b == 0. Fail closed at decode. + if !(1..=64).contains(&r) || w != 64 || b == 0 || num_layers == 0 { + return Err(crate::Error::InvalidHeader("BurrFilter params")); + } + + let stride_words = usize::from(r).div_ceil(64); + let mut layers = Vec::with_capacity(usize::from(num_layers)); + let mut pos = HEADER_LEN; + + for layer_idx in 0..num_layers { + // On 32-bit targets `pos + LAYER_HEADER_LEN` can wrap if pos was + // advanced past a corrupted layer; compute the endpoint with + // checked_add so the bounds guard cannot succeed by wraparound. + let header_end = pos + .checked_add(LAYER_HEADER_LEN) + .ok_or(crate::Error::InvalidHeader("BurrFilter layer header"))?; + if bytes.len() < header_end { + return Err(crate::Error::InvalidHeader("BurrFilter layer header")); + } + #[expect( + clippy::expect_used, + reason = "programmer invariant: layer header slice is exactly \ + LAYER_HEADER_LEN (12) bytes from the bounds check \ + above; the three 4-byte windows always convert." + )] + let (m_bytes, num_blocks_bytes, z_byte_len_bytes): ([u8; 4], [u8; 4], [u8; 4]) = ( + bytes[pos..pos + 4].try_into().expect("4 bytes"), + bytes[pos + 4..pos + 8].try_into().expect("4 bytes"), + bytes[pos + 8..pos + 12].try_into().expect("4 bytes"), + ); + let m = u32::from_le_bytes(m_bytes) as usize; + let num_blocks = u32::from_le_bytes(num_blocks_bytes) as usize; + let z_byte_len = u32::from_le_bytes(z_byte_len_bytes) as usize; + pos = header_end; + + // Cross-check num_blocks and z_byte_len against r/b/m before + // trusting the layer payload. Mismatches mean read_row would + // index out of bounds; we'd rather error now than panic later. + if m == 0 { + return Err(crate::Error::InvalidHeader("BurrFilter layer m")); + } + let expected_blocks = m.div_ceil(usize::from(b)); + let expected_z_len = m + .checked_mul(stride_words) + .and_then(|n| n.checked_mul(8)) + .ok_or(crate::Error::InvalidHeader("BurrFilter layer payload"))?; + if num_blocks != expected_blocks || z_byte_len != expected_z_len { + return Err(crate::Error::InvalidHeader("BurrFilter layer payload")); + } + + // Validate the per-layer params via Params::new — catches + // m < w and other Ribbon-side rejections at decode time so + // the probe path never has to fail-close on the same input. + Params::new(m, usize::from(w), usize::from(r), Mode::Standard) + .map_err(|_| crate::Error::InvalidHeader("BurrFilter layer params"))?; + + // Checked endpoint arithmetic — on 32-bit targets a corrupted + // num_blocks/z_byte_len could overflow `pos + num_blocks + z_byte_len` + // and let the original `bytes.len() < pos + …` guard succeed by + // wraparound, then panic on the slice indexing below. Compute the + // endpoints with `checked_add` and bail to InvalidHeader on any + // overflow. + let thresholds_end = pos + .checked_add(num_blocks) + .ok_or(crate::Error::InvalidHeader("BurrFilter layer payload"))?; + let z_end = thresholds_end + .checked_add(z_byte_len) + .ok_or(crate::Error::InvalidHeader("BurrFilter layer payload"))?; + if bytes.len() < z_end { + return Err(crate::Error::InvalidHeader("BurrFilter layer payload")); + } + let thresholds = &bytes[pos..thresholds_end]; + let z_bytes = &bytes[thresholds_end..z_end]; + pos = z_end; + + // Per-layer seed re-derived from root_seed + layer_idx to match + // what the builder used. The wire format does NOT store layer + // seeds because they're a pure function of (root_seed, + // layer_idx) — keeping it that way prevents drift. + let seed = derive_layer_seed(root_seed, layer_idx); + + layers.push(LayerView { + m, + seed, + thresholds, + z_bytes, + }); + } + + Ok(DecodedFilter { + r, + w, + b, + stride_words, + layers, + }) +} + +/// Single-pass parse + probe over raw wire bytes. +/// +/// Equivalent to `decode(bytes).map(|d| contains_hash(&d, hash))` but +/// without allocating the intermediate `DecodedFilter` (and its +/// `Vec`). Used on the LSM table read hot path +/// (`FilterBlock::maybe_contains_hash`) where the wire buffer is +/// already in the block cache — re-parsing the header and walking +/// per-layer payloads in place avoids the per-probe heap allocation. +/// +/// Returns: +/// - `Ok(true)` — hash may be present (or wire is corrupted in a way +/// we cannot validate → fail-closed: caller falls through to a real +/// index lookup rather than reporting a false negative); +/// - `Ok(false)` — hash is definitely not in the inserted set; +/// - `Err(InvalidHeader)` — wire prefix is unparseable (bad magic, +/// wrong filter_type/version, truncated). Differs from the +/// fail-closed `true` path: a structurally invalid header is a real +/// error returned upstream so the table read path can surface it. +#[inline] +#[expect( + clippy::many_single_char_names, + reason = "r/w/b/m are well-known params from the BuRR/Ribbon literature; single-letter naming matches the rest of the module." +)] +pub(crate) fn contains_hash_from_bytes(bytes: &[u8], hash: u64) -> crate::Result { + if bytes.len() < HEADER_LEN { + return Err(crate::Error::InvalidHeader("BurrFilter")); + } + + if bytes[..MAGIC_BYTES.len()] != MAGIC_BYTES { + return Err(crate::Error::InvalidHeader("BurrFilter")); + } + let filter_type = bytes[MAGIC_BYTES.len()]; + if filter_type != BURR_FILTER_TYPE_BYTE { + return Err(crate::Error::InvalidTag(("FilterType", filter_type))); + } + let version = bytes[MAGIC_BYTES.len() + 1]; + if version != FORMAT_VERSION { + return Err(crate::Error::InvalidHeader("BurrFilter version")); + } + + let r = bytes[MAGIC_BYTES.len() + 2]; + let w = bytes[MAGIC_BYTES.len() + 3]; + let b = bytes[MAGIC_BYTES.len() + 4]; + let num_layers = bytes[MAGIC_BYTES.len() + 5]; + if !(1..=64).contains(&r) || w != 64 || b == 0 || num_layers == 0 { + return Err(crate::Error::InvalidHeader("BurrFilter params")); + } + let seed_off = MAGIC_BYTES.len() + 6; + let root_seed = u64::from_le_bytes( + bytes[seed_off..seed_off + 8] + .try_into() + .map_err(|_| crate::Error::InvalidHeader("BurrFilter"))?, + ); + + // r <= 64 → stride_words == 1. We mirror the in-memory probe + // invariants without storing stride at all; if r > 64 ever lands + // the validation above already rejected it. + let mut fingerprint_buf = [0_u64; 1]; + let mut pos = HEADER_LEN; + + for layer_idx in 0..num_layers { + // Same checked-add guard as `decode`; on 32-bit a corrupted pos + // could let unchecked `pos + LAYER_HEADER_LEN` wrap past + // `bytes.len()` and panic at the slice indexing below. + let header_end = pos + .checked_add(LAYER_HEADER_LEN) + .ok_or(crate::Error::InvalidHeader("BurrFilter layer header"))?; + if bytes.len() < header_end { + return Err(crate::Error::InvalidHeader("BurrFilter layer header")); + } + let m_bytes: [u8; 4] = bytes[pos..pos + 4] + .try_into() + .map_err(|_| crate::Error::InvalidHeader("BurrFilter"))?; + let num_blocks_bytes: [u8; 4] = bytes[pos + 4..pos + 8] + .try_into() + .map_err(|_| crate::Error::InvalidHeader("BurrFilter"))?; + let z_byte_len_bytes: [u8; 4] = bytes[pos + 8..pos + 12] + .try_into() + .map_err(|_| crate::Error::InvalidHeader("BurrFilter"))?; + let m = u32::from_le_bytes(m_bytes) as usize; + let num_blocks = u32::from_le_bytes(num_blocks_bytes) as usize; + let z_byte_len = u32::from_le_bytes(z_byte_len_bytes) as usize; + pos = header_end; + + if m == 0 { + return Err(crate::Error::InvalidHeader("BurrFilter layer m")); + } + let expected_blocks = m.div_ceil(usize::from(b)); + let expected_z_len = m + .checked_mul(8) + .ok_or(crate::Error::InvalidHeader("BurrFilter layer payload"))?; + if num_blocks != expected_blocks || z_byte_len != expected_z_len { + return Err(crate::Error::InvalidHeader("BurrFilter layer payload")); + } + // Validate per-layer Ribbon params (m vs w etc.) at parse time + // instead of fail-closing inside the probe loop. + let layer_params_base = Params::new(m, usize::from(w), usize::from(r), Mode::Standard) + .map_err(|_| crate::Error::InvalidHeader("BurrFilter layer params"))?; + // Checked endpoints — see the same pattern in `decode`. Avoids + // wraparound on 32-bit when `pos + num_blocks + z_byte_len` + // overflows usize. + let thresholds_end = pos + .checked_add(num_blocks) + .ok_or(crate::Error::InvalidHeader("BurrFilter layer payload"))?; + let z_end = thresholds_end + .checked_add(z_byte_len) + .ok_or(crate::Error::InvalidHeader("BurrFilter layer payload"))?; + if bytes.len() < z_end { + return Err(crate::Error::InvalidHeader("BurrFilter layer payload")); + } + let thresholds = &bytes[pos..thresholds_end]; + let z = &bytes[thresholds_end..z_end]; + pos = z_end; + + let seed = derive_layer_seed(root_seed, layer_idx); + let layer_params = layer_params_base.with_seed(seed); + + fingerprint_buf[0] = 0; + let equation: StandardEquation = + standard_equation_from_hash(hash, seed, &layer_params, &mut fingerprint_buf); + let fingerprint = fingerprint_buf[0]; + + if is_bumped(&equation, thresholds, b) { + continue; + } + + // GF(2) XOR-reduce against the band rows whose coeff bit is set. + let mut acc: u64 = 0; + let mut lo = equation.coeff_lo; + while lo != 0 { + let offset = lo.trailing_zeros() as usize; + let row_byte = (equation.start + offset) * 8; + let Some(slice) = z.get(row_byte..row_byte + 8) else { + // row_byte+8 > z len: payload truncated mid-row. + // Fail closed. + return Ok(true); + }; + let Ok(arr) = <[u8; 8]>::try_from(slice) else { + return Ok(true); + }; + acc ^= u64::from_le_bytes(arr); + lo &= lo - 1; + } + debug_assert_eq!(equation.coeff_hi, 0, "w <= 64 keeps coeff_hi == 0"); + return Ok(acc == fingerprint); + } + + Ok(false) +} + +/// Probe a decoded BuRR filter with a pre-computed hash. Returns +/// `true` if the hash may correspond to an inserted key, `false` if +/// definitely-not-inserted. +/// +/// This is the hot path for the LSM filter framework: the table read +/// path already computes the key's u64 hash for hash-table indexing +/// elsewhere; the filter consumes that same hash directly instead of +/// re-hashing via a `BuildHasher`. +#[inline] +pub(crate) fn contains_hash(decoded: &DecodedFilter<'_>, hash: u64) -> bool { + // r is validated to 1..=64 in decode, so stride_words is always 1 + // for the currently-deployed wire format. We use a single stack u64 + // for both fingerprint and acc to keep this hot path allocation- + // free. If the format ever grows to r > 64 the assertion below + // catches the mismatch — the probe path must be updated at the + // same time. + debug_assert_eq!(decoded.stride_words, 1, "BuRR wire format pins r <= 64"); + let mut fingerprint_buf = [0_u64; 1]; + + for layer in &decoded.layers { + let layer_params = match Params::new( + layer.m, + usize::from(decoded.w), + usize::from(decoded.r), + Mode::Standard, + ) { + Ok(p) => p.with_seed(layer.seed), + // Should be unreachable because decode validates r/w/b/m. + // Fail closed — return true to make the table read path + // fall through to a real index lookup rather than report a + // false negative. + Err(_) => return true, + }; + + fingerprint_buf[0] = 0; + let equation: StandardEquation = + standard_equation_from_hash(hash, layer.seed, &layer_params, &mut fingerprint_buf); + let fingerprint = fingerprint_buf[0]; + + if is_bumped(&equation, layer.thresholds, decoded.b) { + continue; + } + + // Kept at this layer — XOR-reduce the band-rows whose coeff bit + // is set, compare against the fingerprint. start ∈ [0, m-w] and + // every set bit offset ∈ [0, w-1], so row_index ∈ [0, m-1] is + // always in-bounds (proven; no per-row bounds check in the + // loop). z_bytes is borrowed wire bytes; we decode 8 LE bytes + // → u64 per matched row inline (no per-call allocation, vs + // pre-decoding into Vec which would happen on every + // FilterBlock construction during the LSM read path). + let z = layer.z_bytes; + let mut acc: u64 = 0; + let mut lo = equation.coeff_lo; + while lo != 0 { + let offset = lo.trailing_zeros() as usize; + let row_byte = (equation.start + offset) * 8; + // row_byte..row_byte+8 ⊂ z is proven by start+offset < m + // and the decode-time check that z len == m * 8. If the + // invariant ever drifts (corruption, future format change + // missed here), fail closed → return true so the table + // read path falls through to a real index lookup rather + // than producing a false negative on substituted zeros. + let Some(slice) = z.get(row_byte..row_byte + 8) else { + return true; + }; + let Ok(arr) = <[u8; 8]>::try_from(slice) else { + return true; + }; + acc ^= u64::from_le_bytes(arr); + lo &= lo - 1; + } + // coeff_hi is always 0 for w <= 64 (the case we deploy); a + // future w > 64 build path would need to extend the loop here. + debug_assert_eq!(equation.coeff_hi, 0, "w <= 64 keeps coeff_hi == 0"); + + return acc == fingerprint; + } + false +} diff --git a/src/table/filter/ribbon/error.rs b/src/table/filter/ribbon/error.rs new file mode 100644 index 000000000..f770e5a18 --- /dev/null +++ b/src/table/filter/ribbon/error.rs @@ -0,0 +1,301 @@ +use core::fmt; + +#[derive(Debug, Clone, PartialEq)] +pub enum ParamError { + ZeroM, + ZeroN, + ZeroWidth, + WidthTooLarge { w: usize, max: usize }, + ZeroFingerprintBits, + WidthExceedsM { m: usize, w: usize }, + ZeroRetryLimit, + InvalidFalsePositiveRate { fpr: f64 }, + InvalidOverhead { overhead: f64 }, +} + +impl fmt::Display for ParamError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + ParamError::ZeroM => write!(f, "m must be greater than zero"), + ParamError::ZeroN => write!(f, "n must be greater than zero"), + ParamError::ZeroWidth => write!(f, "w must be greater than zero"), + ParamError::WidthTooLarge { w, max } => { + write!(f, "w ({w}) must be less than or equal to {max}") + } + ParamError::ZeroFingerprintBits => write!(f, "r must be greater than zero"), + ParamError::WidthExceedsM { m, w } => { + write!(f, "w ({w}) must be less than or equal to m ({m})") + } + ParamError::ZeroRetryLimit => write!(f, "retry_limit must be greater than zero"), + ParamError::InvalidFalsePositiveRate { fpr } => { + write!(f, "false positive rate must be in (0, 1), got {fpr}") + } + ParamError::InvalidOverhead { overhead } => { + write!(f, "overhead must be in [0, 10], got {overhead}") + } + } + } +} + +impl std::error::Error for ParamError {} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum ConstructionFailure { + InconsistentEquation { + key_index: usize, + row_index: usize, + }, + OutOfBounds { + key_index: Option, + row_index: usize, + m: usize, + }, + /// `m * stride_words` overflowed `usize`. Caller passed an + /// unreasonably large `m` (or `r` is mistuned). Returned before any + /// storage is allocated, so this is a clean error rather than a + /// panic on the `vec!` line. + StorageLengthOverflow { + m: usize, + stride_words: usize, + }, +} + +impl fmt::Display for ConstructionFailure { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + ConstructionFailure::InconsistentEquation { + key_index, + row_index, + } => write!( + f, + "inconsistent equation while inserting key at index {key_index} near row {row_index}" + ), + ConstructionFailure::OutOfBounds { + key_index, + row_index, + m, + } => { + if let Some(key_index) = key_index { + write!( + f, + "row index {row_index} out of bounds for m={m} while inserting key at index {key_index}" + ) + } else { + write!( + f, + "row index {row_index} out of bounds for m={m} during back-substitution" + ) + } + } + ConstructionFailure::StorageLengthOverflow { m, stride_words } => write!( + f, + "m * stride_words overflows usize: m={m} stride_words={stride_words}", + ), + } + } +} + +impl std::error::Error for ConstructionFailure {} + +#[derive(Debug, Clone, PartialEq)] +pub enum BuildError { + InvalidParams(ParamError), + ConstructionFailed { + final_m: usize, + attempts: usize, + last_failure: ConstructionFailure, + }, +} + +impl fmt::Display for BuildError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + BuildError::InvalidParams(err) => write!(f, "invalid parameters: {err}"), + BuildError::ConstructionFailed { + final_m, + attempts, + last_failure, + } => write!( + f, + "construction failed after {attempts} attempt(s) at m={final_m}: {last_failure}" + ), + } + } +} + +impl std::error::Error for BuildError {} + +#[derive(Debug, Clone, PartialEq)] +pub enum FilterReprError { + UnsupportedVersion { found: u8, expected: u8 }, + InvalidParams(ParamError), + StorageLengthOverflow, + InvalidStorageWords { found: usize, expected: usize }, + InvalidStorageBits { found: usize, expected: usize }, +} + +impl fmt::Display for FilterReprError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + FilterReprError::UnsupportedVersion { found, expected } => write!( + f, + "unsupported RibbonFilter version {found}, expected {expected}" + ), + FilterReprError::InvalidParams(err) => { + write!( + f, + "invalid parameters in RibbonFilter representation: {err}" + ) + } + FilterReprError::StorageLengthOverflow => { + write!(f, "RibbonFilter representation storage length overflow") + } + FilterReprError::InvalidStorageWords { found, expected } => write!( + f, + "invalid RibbonFilter storage word length {found}; expected {expected}" + ), + FilterReprError::InvalidStorageBits { found, expected } => write!( + f, + "invalid RibbonFilter storage bit length {found}; expected {expected}" + ), + } + } +} + +impl std::error::Error for FilterReprError {} + +#[cfg(test)] +mod tests { + use super::{BuildError, ConstructionFailure, FilterReprError, ParamError}; + + #[test] + fn param_error_display_is_actionable() { + assert_eq!(ParamError::ZeroM.to_string(), "m must be greater than zero"); + assert_eq!( + ParamError::WidthExceedsM { m: 3, w: 4 }.to_string(), + "w (4) must be less than or equal to m (3)" + ); + } + + #[test] + fn param_error_display_covers_every_variant() { + // Each variant must have an actionable Display message — no + // bare debug formatting, no missing context. + assert!(ParamError::ZeroN.to_string().contains('n')); + assert!(ParamError::ZeroWidth.to_string().contains('w')); + assert!( + ParamError::WidthTooLarge { w: 65, max: 64 } + .to_string() + .contains("65") + ); + assert!(ParamError::ZeroFingerprintBits.to_string().contains('r')); + assert!( + ParamError::ZeroRetryLimit + .to_string() + .contains("retry_limit") + ); + let fpr_msg = ParamError::InvalidFalsePositiveRate { fpr: 1.5 }.to_string(); + assert!(fpr_msg.contains("1.5")); + let oh_msg = ParamError::InvalidOverhead { overhead: -1.0 }.to_string(); + assert!(oh_msg.contains("-1")); + } + + #[test] + fn build_error_display_contains_context() { + let err = BuildError::ConstructionFailed { + final_m: 19, + attempts: 6, + last_failure: ConstructionFailure::InconsistentEquation { + key_index: 7, + row_index: 2, + }, + }; + + let msg = err.to_string(); + assert!(msg.contains("6 attempt")); + assert!(msg.contains("m=19")); + assert!(msg.contains("key at index 7")); + } + + #[test] + fn build_error_invalid_params_display_chains_inner() { + let err = BuildError::InvalidParams(ParamError::ZeroM); + let msg = err.to_string(); + assert!(msg.contains("invalid parameters")); + assert!(msg.contains("m must be greater than zero")); + } + + #[test] + fn construction_failure_out_of_bounds_with_key_index() { + let err = ConstructionFailure::OutOfBounds { + key_index: Some(42), + row_index: 100, + m: 50, + }; + let msg = err.to_string(); + assert!(msg.contains("row index 100")); + assert!(msg.contains("m=50")); + assert!(msg.contains("inserting key at index 42")); + } + + #[test] + fn construction_failure_out_of_bounds_back_sub() { + // Back-substitution branch — no key_index, different phrasing. + let err = ConstructionFailure::OutOfBounds { + key_index: None, + row_index: 7, + m: 4, + }; + let msg = err.to_string(); + assert!(msg.contains("row index 7")); + assert!(msg.contains("m=4")); + assert!(msg.contains("back-substitution")); + } + + #[test] + fn construction_failure_storage_length_overflow_display() { + let err = ConstructionFailure::StorageLengthOverflow { + m: usize::MAX / 2, + stride_words: 4, + }; + let msg = err.to_string(); + assert!(msg.contains("overflows usize")); + assert!(msg.contains("stride_words=4")); + } + + #[test] + fn filter_repr_error_display_covers_every_variant() { + let v = FilterReprError::UnsupportedVersion { + found: 9, + expected: 5, + }; + let msg = v.to_string(); + assert!(msg.contains("version 9")); + assert!(msg.contains("expected 5")); + + let v = FilterReprError::InvalidParams(ParamError::ZeroM); + assert!(v.to_string().contains("RibbonFilter representation")); + + assert!( + FilterReprError::StorageLengthOverflow + .to_string() + .contains("storage length overflow") + ); + + let v = FilterReprError::InvalidStorageWords { + found: 3, + expected: 7, + }; + let msg = v.to_string(); + assert!(msg.contains("word length 3")); + assert!(msg.contains("expected 7")); + + let v = FilterReprError::InvalidStorageBits { + found: 100, + expected: 200, + }; + let msg = v.to_string(); + assert!(msg.contains("bit length 100")); + assert!(msg.contains("expected 200")); + } +} diff --git a/src/table/filter/ribbon/filter.rs b/src/table/filter/ribbon/filter.rs new file mode 100644 index 000000000..88256227f --- /dev/null +++ b/src/table/filter/ribbon/filter.rs @@ -0,0 +1,162 @@ +use std::hash::{BuildHasher, Hash}; + +use super::builder::Scratch; +#[cfg(feature = "ribbon-serde")] +use super::error::FilterReprError; +use super::hashing::{for_each_set_bit_u128_parts, standard_equation_w64, xor_words}; +use super::params::Params; + +#[cfg(feature = "ribbon-serde")] +const RIBBON_FILTER_FORMAT_VERSION: u8 = 1; + +/// On-the-wire / in-memory snapshot of a built `RibbonFilter`. +/// +/// `z` is the band-solution matrix as a flat `Vec`. Length is +/// `params.m * params.fingerprint_words()` and the on-disk byte length +/// is `z.len() * 8`. We use a plain `Vec` rather than `BitVec` +/// because `bitvec`'s `u64: BitStore` impl is gated on +/// `target_has_atomic = "64"` — on 32-bit targets (i686, riscv32, etc.) +/// the bound fails and the crate doesn't build. Ribbon's algorithm +/// stores full `u64` words anyway; the `BitVec` wrapper was upstream +/// flavour, not a load-bearing component. +#[cfg(feature = "ribbon-serde")] +#[derive(serde::Serialize, serde::Deserialize)] +pub struct RibbonFilterRepr { + pub version: u8, + pub params: Params, + pub z: Vec, +} + +#[derive(Debug, Clone)] +pub struct RibbonFilter { + params: Params, + build_hasher: S, + z: Vec, + stride_words: usize, +} + +impl RibbonFilter +where + S: BuildHasher + Clone, +{ + pub(crate) fn new(params: Params, build_hasher: S, z: Vec) -> Self { + let stride_words = params.fingerprint_words(); + Self { + params, + build_hasher, + z, + stride_words, + } + } + + pub fn params(&self) -> Params { + self.params + } + + pub fn new_scratch(&self) -> Scratch { + Scratch::new(self.stride_words) + } + + pub fn contains(&self, key: &Q) -> bool { + let mut scratch = self.new_scratch(); + self.contains_in(key, &mut scratch) + } + + pub fn contains_in(&self, key: &Q, scratch: &mut Scratch) -> bool { + // Hard runtime check, not debug_assert: a mismatched Scratch in + // release would silently truncate via `xor_words` (shorter slice + // wins the zip) and could produce false negatives. The caller + // contract is "Scratch came from RibbonFilter::new_scratch on + // this same filter" — violating it is a programmer error worth + // panicking on in production. + assert_eq!( + scratch.fingerprint.len(), + self.stride_words, + "scratch fingerprint width mismatch; use RibbonFilter::new_scratch() from this filter", + ); + assert_eq!( + scratch.acc.len(), + self.stride_words, + "scratch accumulator width mismatch; use RibbonFilter::new_scratch() from this filter", + ); + scratch.reset(); + + let equation = standard_equation_w64( + &self.build_hasher, + key, + self.params.seed, + &self.params, + &mut scratch.fingerprint, + ); + + for_each_set_bit_u128_parts(equation.coeff_lo, equation.coeff_hi, |offset| { + let row_index = equation.start + offset; + if row_index < self.params.m { + let row = self.z_row(row_index); + xor_words(&mut scratch.acc, row); + } + }); + + scratch.acc == scratch.fingerprint + } + + fn z_row(&self, row: usize) -> &[u64] { + let start = row * self.stride_words; + let end = start + self.stride_words; + &self.z[start..end] + } + + /// Borrowed access to the raw solution-matrix words. + /// + /// Length is `m * stride_words`. Each chunk of `stride_words` u64s + /// is one row's fingerprint bits in LSB-first order. Used by the + /// BuRR wire-format serializer to write the matrix as packed + /// little-endian bytes. + pub(crate) fn z_raw_words(&self) -> &[u64] { + &self.z + } + + #[cfg(feature = "ribbon-serde")] + pub fn to_repr(&self) -> RibbonFilterRepr { + RibbonFilterRepr { + version: RIBBON_FILTER_FORMAT_VERSION, + params: self.params, + z: self.z.clone(), + } + } + + #[cfg(feature = "ribbon-serde")] + pub fn from_repr(repr: RibbonFilterRepr, build_hasher: S) -> Result { + if repr.version != RIBBON_FILTER_FORMAT_VERSION { + return Err(FilterReprError::UnsupportedVersion { + found: repr.version, + expected: RIBBON_FILTER_FORMAT_VERSION, + }); + } + + repr.params + .validate() + .map_err(FilterReprError::InvalidParams)?; + + let stride_words = repr.params.fingerprint_words(); + let expected_words = repr + .params + .m + .checked_mul(stride_words) + .ok_or(FilterReprError::StorageLengthOverflow)?; + + if repr.z.len() != expected_words { + return Err(FilterReprError::InvalidStorageWords { + found: repr.z.len(), + expected: expected_words, + }); + } + + Ok(Self { + params: repr.params, + build_hasher, + stride_words, + z: repr.z, + }) + } +} diff --git a/src/table/filter/ribbon/hashing.rs b/src/table/filter/ribbon/hashing.rs new file mode 100644 index 000000000..e80e36990 --- /dev/null +++ b/src/table/filter/ribbon/hashing.rs @@ -0,0 +1,140 @@ +use core::hash::Hash; +use std::hash::BuildHasher; + +use super::params::{Mode, Params}; + +const MIX_CONST: u64 = 0x9E37_79B9_7F4A_7C15; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) struct StandardEquation { + pub(crate) start: usize, + pub(crate) coeff_lo: u64, + pub(crate) coeff_hi: u64, +} + +#[inline] +pub(crate) fn xor_words(dst: &mut [u64], rhs: &[u64]) { + for (d, r) in dst.iter_mut().zip(rhs.iter()) { + *d ^= *r; + } +} + +#[inline] +pub(crate) fn for_each_set_bit_u128_parts(mut lo: u64, mut hi: u64, mut f: impl FnMut(usize)) { + while lo != 0 { + let bit = lo.trailing_zeros() as usize; + f(bit); + lo &= lo - 1; + } + while hi != 0 { + let bit = hi.trailing_zeros() as usize; + f(64 + bit); + hi &= hi - 1; + } +} + +#[derive(Debug, Clone, Copy)] +pub(crate) struct SplitMix64 { + state: u64, +} + +impl SplitMix64 { + pub(crate) fn new(seed: u64) -> Self { + Self { state: seed } + } + + pub(crate) fn next_u64(&mut self) -> u64 { + self.state = self.state.wrapping_add(MIX_CONST); + let mut z = self.state; + z = (z ^ (z >> 30)).wrapping_mul(0xBF58_476D_1CE4_E5B9); + z = (z ^ (z >> 27)).wrapping_mul(0x94D0_49BB_1331_11EB); + z ^ (z >> 31) + } +} + +fn fastrange_u64(x: u64, range: usize) -> usize { + ((x as u128 * range as u128) >> 64) as usize +} + +#[inline] +pub(crate) fn start_position_from_stream(next_word: u64, m: usize, w: usize) -> usize { + let start_range = m - w + 1; + // TODO: add optional boundary smash strategy here. + // TODO: add fractional-r/ICML tuned layout hooks once layout work starts. + fastrange_u64(next_word, start_range) +} + +pub(crate) fn derive_attempt_seed(base_seed: u64, attempt_index: u64) -> u64 { + let mut sm = SplitMix64::new(base_seed ^ attempt_index.wrapping_mul(MIX_CONST)); + sm.next_u64().wrapping_mul(MIX_CONST) +} + +pub(crate) fn standard_equation_w64( + build_hasher: &S, + key: &Q, + seed: u64, + params: &Params, + fingerprint: &mut [u64], +) -> StandardEquation { + let base_hash = build_hasher.hash_one(key); + standard_equation_from_hash(base_hash, seed, params, fingerprint) +} + +/// Compute the equation directly from a pre-computed key hash. +/// +/// This is the inner loop of [`standard_equation_w64`], factored out so +/// the BuRR wire-format probe path (which consumes pre-hashed inputs from +/// the LSM filter framework) can skip the `build_hasher.hash_one(key)` +/// step entirely. +#[expect( + clippy::inline_always, + reason = "called per layer on the BuRR filter probe hot path; inlining lets LLVM fold the \ + SplitMix stream into the caller and eliminate the &mut [u64] fingerprint pointer" +)] +#[inline(always)] +pub(crate) fn standard_equation_from_hash( + base_hash: u64, + seed: u64, + params: &Params, + fingerprint: &mut [u64], +) -> StandardEquation { + let stream_seed = (base_hash ^ seed).wrapping_mul(MIX_CONST); + let mut stream = SplitMix64::new(stream_seed); + + let start = start_position_from_stream(stream.next_u64(), params.m, params.w); + + let (coeff_lo, coeff_hi) = if params.w <= 64 { + let width_mask = if params.w == 64 { + u64::MAX + } else { + (1u64 << params.w) - 1 + }; + ((stream.next_u64() & width_mask) | 1, 0) + } else { + let lo = stream.next_u64(); + let hi_bits = params.w - 64; + let hi_mask = if hi_bits == 64 { + u64::MAX + } else { + (1u64 << hi_bits) - 1 + }; + (lo | 1, stream.next_u64() & hi_mask) + }; + + if matches!(params.mode, Mode::Homogeneous) { + fingerprint.fill(0); + } else { + for word in fingerprint.iter_mut() { + *word = stream.next_u64(); + } + if let Some(last) = fingerprint.last_mut() { + *last &= params.fingerprint_last_word_mask(); + } + } + + StandardEquation { + start, + coeff_lo, + coeff_hi, + } +} diff --git a/src/table/filter/ribbon/mod.rs b/src/table/filter/ribbon/mod.rs new file mode 100644 index 000000000..26685867e --- /dev/null +++ b/src/table/filter/ribbon/mod.rs @@ -0,0 +1,105 @@ +// Vendored from https://github.com/WilliamRagstad/ribbon-filter v0.2.0. +// Original work copyright (c) William Rågstad, available upstream under +// MIT OR Apache-2.0. Preserved upstream license texts in `_vendored/`. +// +// This in-tree copy and any modifications (module layout, integration with +// the table::filter framework, removal of the standalone `lib.rs` +// crate-level attributes, and BuRR extensions) are distributed under the +// host crate's declared license: Apache-2.0. The dual-licensed upstream +// permits this — Apache-2.0 alone is one of the two licenses upstream +// offers. If the ribbon module is later extracted back into a standalone +// crate the dual MIT/Apache-2.0 posture can be restored at that time. +// +// This module is the **algorithmic foundation** for the LSM filter +// subsystem. Plan: +// 1. Vendor upstream ribbon-filter (Standard + Homogeneous Ribbon over +// GF(2)) as the primitive layer — provides hashing, banded solver, +// packed storage. +// 2. Build BuRR (Bumped Ribbon Retrieval, Walzer & Dillinger 2022) on top +// of those primitives — multi-layer construction where rows that don't +// fit the primary band are "bumped" to a smaller secondary BuRR +// structure, recursively. Closes the construction-failure window +// Standard Ribbon has and pushes memory to ~1% overhead vs the +// information-theoretic minimum. +// 3. lsm-tree consumes BuRR directly (no Standard Ribbon intermediate +// state — bloom is replaced with BuRR in one step). +// 4. (later) Extract `src/table/filter/ribbon/` into a standalone crate +// (`coordinode-ribbon` or similar) bundling Standard + Homogeneous + +// BuRR variants and publish to crates.io. + +// Vendored upstream code follows its own lint conventions; the in-tree +// copy keeps them so a future extraction back into a standalone crate +// produces a clean diff against the upstream. We deliberately use a +// single crate-attribute `#![allow]` here rather than scattering +// `#[expect]` per item: minimising the diff vs upstream is the priority, +// and a future upstream refactor that removes one of the offending casts +// would otherwise yield an `unfulfilled_lint_expectations` error on the +// next sync. +// +// Lint-scope propagation: a crate-attribute `#![allow]` propagates into +// child modules, INCLUDING the first-party `burr/` submodule. That means +// the safety-critical lints (`expect_used`, `unwrap_used`, +// `indexing_slicing`) are currently relaxed inside `burr/` even though +// it's first-party code that would normally follow the host crate's +// stricter lint policy. Re-denying these inside `burr/` would require +// migrating ~30 existing internal sites in BuRR code to safe +// alternatives — that migration is tracked as a follow-up issue. In the +// meantime, new BuRR code uses `#[expect(..., reason)]` per use site +// for any new suppressions; the inherited blanket allow is for legacy +// sites only. +#![allow( + clippy::indexing_slicing, + clippy::cast_possible_truncation, + clippy::cast_sign_loss, + clippy::cast_precision_loss, + clippy::cast_possible_wrap, + clippy::cast_lossless, + clippy::doc_markdown, + clippy::unreadable_literal, + clippy::too_many_lines, + clippy::redundant_pub_crate, + clippy::missing_panics_doc, + clippy::missing_errors_doc, + clippy::module_name_repetitions, + clippy::missing_const_for_fn, + clippy::must_use_candidate, + clippy::return_self_not_must_use, + clippy::manual_range_contains, + clippy::use_self, + clippy::elidable_lifetime_names, + clippy::missing_fields_in_debug, + clippy::expect_used, + clippy::unwrap_used, + missing_docs +)] + +//! Ribbon filter (static approximate-membership filter over GF(2)). +//! +//! Guarantees in the currently-vendored modes (`w <= 64`): +//! - no false negatives for inserted keys after successful build, +//! - probabilistic false positives controlled by `r` fingerprint bits, +//! - deterministic behavior for fixed params, key-set, and hasher. +//! +//! `Mode::Homogeneous` is also available and uses zero right-hand-side +//! equations (smaller storage at the cost of slightly higher false-positive +//! rate at small `r`). +//! +//! See [`Params::new`] for the entry point and [`RibbonBuilder::build`] for +//! the construction call. + +pub mod builder; +pub mod burr; +pub mod error; +pub mod filter; +pub mod hashing; +pub mod params; + +pub use builder::{RibbonBuilder, Scratch}; +pub use error::{BuildError, ConstructionFailure, FilterReprError, ParamError}; +pub use filter::RibbonFilter; +#[cfg(feature = "ribbon-serde")] +pub use filter::RibbonFilterRepr; +pub use params::{Mode, Params}; + +#[cfg(test)] +mod tests; diff --git a/src/table/filter/ribbon/params.rs b/src/table/filter/ribbon/params.rs new file mode 100644 index 000000000..3aae1062f --- /dev/null +++ b/src/table/filter/ribbon/params.rs @@ -0,0 +1,368 @@ +use core::fmt; + +use super::error::ParamError; + +// `ribbon-serde` is wired in Cargo.toml as `["dep:serde"]` — turning it +// on enables the cfg_attr-gated Serialize/Deserialize derives below and +// on `RibbonFilterRepr` in filter.rs. The crate does not consume the +// serde repr internally; the gate is preserved for callers that want +// an in-memory snapshot of a built filter. (bitvec was previously a +// dep here; it was dropped for 32-bit cross-arch compatibility and the +// Repr now serialises a plain `Vec`.) +#[cfg_attr(feature = "ribbon-serde", derive(serde::Serialize, serde::Deserialize))] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Mode { + Standard, + Homogeneous, +} + +impl fmt::Display for Mode { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Mode::Standard => write!(f, "standard"), + Mode::Homogeneous => write!(f, "homogeneous"), + } + } +} + +#[cfg_attr(feature = "ribbon-serde", derive(serde::Serialize, serde::Deserialize))] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct Params { + pub m: usize, + pub w: usize, + pub r: usize, + pub mode: Mode, + pub seed: u64, + pub retry_limit: usize, + pub grow_limit: usize, +} + +impl Params { + pub const MAX_W: usize = 128; + + pub fn new(m: usize, w: usize, r: usize, mode: Mode) -> Result { + let params = Self { + m, + w, + r, + mode, + seed: 0, + // 8 attempts: Standard Ribbon's GF(2) elimination can hit + // InconsistentEquation on the first seed/key combination + // for some inputs. The Rust std `DefaultHasher` hashes + // `u64` keys via `to_ne_bytes`, so the equation system is + // host-endianness-sensitive — a single-attempt build that + // succeeds on x86_64 (LE) may fail on powerpc64 (BE). 8 + // attempts via derived seeds makes the construction + // platform-invariant in practice without changing the + // seed-determinism contract (consumers can still pin a + // seed; the retry just iterates derived seeds within that + // seed family). + // + // BuRR's own build path bypasses retry entirely via + // `build_with_seed_verbatim`, so this default only affects + // direct `RibbonBuilder::build` consumers. + retry_limit: 8, + grow_limit: 0, + }; + params.validate()?; + Ok(params) + } + + #[must_use] + pub fn with_seed(mut self, seed: u64) -> Self { + self.seed = seed; + self + } + + pub fn with_retry_limit(mut self, retry_limit: usize) -> Result { + self.retry_limit = retry_limit; + self.validate()?; + Ok(self) + } + + pub fn with_retry_policy( + mut self, + retry_limit: usize, + grow_limit: usize, + ) -> Result { + self.retry_limit = retry_limit; + self.grow_limit = grow_limit; + self.validate()?; + Ok(self) + } + + pub fn r_from_fpr(fpr: f64) -> Result { + if !(0.0 < fpr && fpr < 1.0) { + return Err(ParamError::InvalidFalsePositiveRate { fpr }); + } + let r = (-fpr.log2()).ceil() as usize; + Ok(r.max(1)) + } + + pub fn from_expected_items( + n: usize, + overhead: f64, + w: usize, + r: usize, + mode: Mode, + ) -> Result { + if n == 0 { + return Err(ParamError::ZeroN); + } + if !(0.0..=10.0).contains(&overhead) { + return Err(ParamError::InvalidOverhead { overhead }); + } + + let m = ((n as f64) * (1.0 + overhead)).ceil() as usize; + Self::new(m.max(w), w, r, mode) + } + + pub fn validate(&self) -> Result<(), ParamError> { + if self.m == 0 { + return Err(ParamError::ZeroM); + } + if self.w == 0 { + return Err(ParamError::ZeroWidth); + } + if self.w > Self::MAX_W { + return Err(ParamError::WidthTooLarge { + w: self.w, + max: Self::MAX_W, + }); + } + if self.r == 0 { + return Err(ParamError::ZeroFingerprintBits); + } + if self.retry_limit == 0 { + return Err(ParamError::ZeroRetryLimit); + } + if self.w > self.m { + return Err(ParamError::WidthExceedsM { + m: self.m, + w: self.w, + }); + } + Ok(()) + } + + pub fn start_range(&self) -> usize { + self.m - self.w + 1 + } + + pub fn fingerprint_words(&self) -> usize { + self.r.div_ceil(64) + } + + pub fn fingerprint_last_word_mask(&self) -> u64 { + let rem = self.r % 64; + if rem == 0 { + u64::MAX + } else { + (1u64 << rem) - 1 + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn ok_params() -> Params { + Params::new(128, 64, 8, Mode::Standard).expect("valid params") + } + + #[test] + fn mode_display_matches_expected_strings() { + assert_eq!(Mode::Standard.to_string(), "standard"); + assert_eq!(Mode::Homogeneous.to_string(), "homogeneous"); + } + + #[test] + fn new_accepts_valid_params_and_pins_retry_default() { + let p = ok_params(); + assert_eq!(p.m, 128); + assert_eq!(p.w, 64); + assert_eq!(p.r, 8); + // Default retry_limit is 8 (endian-portability hedge). + assert_eq!(p.retry_limit, 8); + assert_eq!(p.grow_limit, 0); + } + + #[test] + fn new_rejects_zero_m() { + assert_eq!( + Params::new(0, 64, 8, Mode::Standard), + Err(ParamError::ZeroM) + ); + } + + #[test] + fn new_rejects_zero_w() { + assert_eq!( + Params::new(128, 0, 8, Mode::Standard), + Err(ParamError::ZeroWidth) + ); + } + + #[test] + fn new_rejects_w_above_max() { + assert!(matches!( + Params::new(256, Params::MAX_W + 1, 8, Mode::Standard), + Err(ParamError::WidthTooLarge { .. }) + )); + } + + #[test] + fn new_rejects_zero_r() { + assert_eq!( + Params::new(128, 64, 0, Mode::Standard), + Err(ParamError::ZeroFingerprintBits) + ); + } + + #[test] + fn new_rejects_w_above_m() { + assert!(matches!( + Params::new(32, 64, 8, Mode::Standard), + Err(ParamError::WidthExceedsM { .. }) + )); + } + + #[test] + fn with_seed_preserves_other_fields() { + let p = ok_params().with_seed(0xDEAD_BEEF); + assert_eq!(p.seed, 0xDEAD_BEEF); + assert_eq!(p.m, 128); + assert_eq!(p.w, 64); + } + + #[test] + fn with_retry_limit_rejects_zero() { + assert_eq!( + ok_params().with_retry_limit(0), + Err(ParamError::ZeroRetryLimit) + ); + } + + #[test] + fn with_retry_limit_accepts_positive() { + let p = ok_params().with_retry_limit(3).expect("valid"); + assert_eq!(p.retry_limit, 3); + } + + #[test] + fn with_retry_policy_sets_both_fields() { + let p = ok_params().with_retry_policy(2, 5).expect("valid"); + assert_eq!(p.retry_limit, 2); + assert_eq!(p.grow_limit, 5); + } + + #[test] + fn with_retry_policy_rejects_zero_retry_limit() { + assert_eq!( + ok_params().with_retry_policy(0, 5), + Err(ParamError::ZeroRetryLimit) + ); + } + + #[test] + fn r_from_fpr_rejects_zero_and_one() { + assert!(matches!( + Params::r_from_fpr(0.0), + Err(ParamError::InvalidFalsePositiveRate { .. }) + )); + assert!(matches!( + Params::r_from_fpr(1.0), + Err(ParamError::InvalidFalsePositiveRate { .. }) + )); + assert!(matches!( + Params::r_from_fpr(-0.1), + Err(ParamError::InvalidFalsePositiveRate { .. }) + )); + } + + #[test] + fn r_from_fpr_returns_ceil_neg_log2_floored_at_one() { + // fpr = 0.5 → -log2 = 1 → r = 1 + assert_eq!(Params::r_from_fpr(0.5).unwrap(), 1); + // fpr = 0.01 → -log2 ≈ 6.64 → ceil = 7 + assert_eq!(Params::r_from_fpr(0.01).unwrap(), 7); + // fpr very close to 1.0 → -log2 ≈ 0 → max(0, 1) = 1 + assert_eq!(Params::r_from_fpr(0.999).unwrap(), 1); + } + + #[test] + fn from_expected_items_rejects_zero_n() { + assert_eq!( + Params::from_expected_items(0, 0.1, 64, 8, Mode::Standard), + Err(ParamError::ZeroN) + ); + } + + #[test] + fn from_expected_items_rejects_overhead_out_of_range() { + assert!(matches!( + Params::from_expected_items(100, -0.1, 64, 8, Mode::Standard), + Err(ParamError::InvalidOverhead { .. }) + )); + assert!(matches!( + Params::from_expected_items(100, 11.0, 64, 8, Mode::Standard), + Err(ParamError::InvalidOverhead { .. }) + )); + } + + #[test] + fn from_expected_items_floors_m_at_w() { + // n=1, overhead=0 → raw m = 1, floors to w = 64. + let p = Params::from_expected_items(1, 0.0, 64, 8, Mode::Standard).expect("valid"); + assert_eq!(p.m, 64); + assert_eq!(p.w, 64); + } + + #[test] + fn start_range_is_m_minus_w_plus_one() { + let p = ok_params(); + assert_eq!(p.start_range(), 128 - 64 + 1); + } + + #[test] + fn fingerprint_words_round_up_for_non_multiple_of_64() { + assert_eq!( + Params::new(128, 64, 1, Mode::Standard) + .unwrap() + .fingerprint_words(), + 1 + ); + assert_eq!( + Params::new(128, 64, 64, Mode::Standard) + .unwrap() + .fingerprint_words(), + 1 + ); + assert_eq!( + Params::new(128, 64, 65, Mode::Standard) + .unwrap() + .fingerprint_words(), + 2 + ); + assert_eq!( + Params::new(256, 64, 128, Mode::Standard) + .unwrap() + .fingerprint_words(), + 2 + ); + } + + #[test] + fn fingerprint_last_word_mask_full_when_r_multiple_of_64() { + let p = Params::new(128, 64, 64, Mode::Standard).unwrap(); + assert_eq!(p.fingerprint_last_word_mask(), u64::MAX); + } + + #[test] + fn fingerprint_last_word_mask_low_bits_when_r_not_multiple_of_64() { + let p = Params::new(128, 64, 5, Mode::Standard).unwrap(); + assert_eq!(p.fingerprint_last_word_mask(), 0b11111); + } +} diff --git a/src/table/filter/ribbon/tests.rs b/src/table/filter/ribbon/tests.rs new file mode 100644 index 000000000..38fc684c9 --- /dev/null +++ b/src/table/filter/ribbon/tests.rs @@ -0,0 +1,1061 @@ +use super::{BuildError, Mode, ParamError, Params, RibbonBuilder}; +use std::collections::hash_map::DefaultHasher; +use std::hash::BuildHasherDefault; + +use super::hashing::{standard_equation_w64, start_position_from_stream}; + +type DefaultBuildHasher = BuildHasherDefault; + +#[test] +fn params_rejects_zero_m() { + let err = Params::new(0, 4, 8, Mode::Standard).expect_err("m=0 should fail"); + assert_eq!(err, ParamError::ZeroM); +} + +#[test] +fn params_rejects_zero_w() { + let err = Params::new(10, 0, 8, Mode::Standard).expect_err("w=0 should fail"); + assert_eq!(err, ParamError::ZeroWidth); +} + +#[test] +fn params_rejects_zero_n_in_expected_items() { + let err = + Params::from_expected_items(0, 0.1, 4, 8, Mode::Standard).expect_err("n=0 should fail"); + assert_eq!(err, ParamError::ZeroN); +} + +#[test] +fn params_rejects_zero_r() { + let err = Params::new(10, 4, 0, Mode::Standard).expect_err("r=0 should fail"); + assert_eq!(err, ParamError::ZeroFingerprintBits); +} + +#[test] +fn params_rejects_w_greater_than_m() { + let err = Params::new(7, 8, 8, Mode::Standard).expect_err("w>m should fail"); + assert_eq!(err, ParamError::WidthExceedsM { m: 7, w: 8 }); +} + +#[test] +fn params_rejects_zero_retry_limit() { + let params = Params::new(16, 8, 8, Mode::Standard).expect("base params should be valid"); + let err = params + .with_retry_limit(0) + .expect_err("retry_limit=0 should fail"); + assert_eq!(err, ParamError::ZeroRetryLimit); +} + +#[test] +fn params_accepts_valid_values() { + let params = Params::new(16, 8, 12, Mode::Standard).expect("valid params should pass"); + assert_eq!(params.m, 16); + assert_eq!(params.w, 8); + assert_eq!(params.r, 12); +} + +#[test] +fn params_r_from_fpr_rounding_and_range() { + assert_eq!(Params::r_from_fpr(0.5).expect("valid fpr"), 1); + assert_eq!(Params::r_from_fpr(0.1).expect("valid fpr"), 4); + assert!(matches!( + Params::r_from_fpr(0.0), + Err(ParamError::InvalidFalsePositiveRate { .. }) + )); +} + +#[test] +fn params_from_expected_items_computes_m() { + let p = Params::from_expected_items(1000, 0.2, 16, 8, Mode::Standard) + .expect("params should be valid"); + assert_eq!(p.m, 1200); + assert_eq!(p.w, 16); + assert_eq!(p.r, 8); +} + +#[test] +fn params_from_expected_items_rejects_overhead_out_of_range() { + let err = Params::from_expected_items(1000, 10.1, 16, 8, Mode::Standard) + .expect_err("overhead > 10 should fail"); + assert!(matches!(err, ParamError::InvalidOverhead { .. })); +} + +#[test] +fn hash_pipeline_start_in_range_and_pivot_forced() { + let hasher = DefaultBuildHasher::default(); + let params = Params::new(128, 17, 13, Mode::Standard).expect("params must be valid"); + let mut fp = vec![0u64; params.fingerprint_words()]; + + let eq = standard_equation_w64(&hasher, &"hello-key", 42, ¶ms, &mut fp); + + assert!(eq.start < params.start_range()); + assert_eq!(eq.coeff_lo & 1, 1); +} + +#[test] +fn hash_pipeline_masks_fingerprint_to_r_bits() { + let hasher = DefaultBuildHasher::default(); + let params = Params::new(64, 8, 9, Mode::Standard).expect("params must be valid"); + let mut fp = vec![0u64; params.fingerprint_words()]; + + let _ = standard_equation_w64(&hasher, &12345u64, 7, ¶ms, &mut fp); + + assert_eq!(fp[0] & !params.fingerprint_last_word_mask(), 0); +} + +#[test] +fn hash_pipeline_is_deterministic_for_seed_and_key() { + let hasher = DefaultBuildHasher::default(); + let params = Params::new(96, 16, 20, Mode::Standard).expect("params must be valid"); + let mut fp_a = vec![0u64; params.fingerprint_words()]; + let mut fp_b = vec![0u64; params.fingerprint_words()]; + + let eq_a = standard_equation_w64(&hasher, &"deterministic-key", 999, ¶ms, &mut fp_a); + let eq_b = standard_equation_w64(&hasher, &"deterministic-key", 999, ¶ms, &mut fp_b); + + assert_eq!(eq_a, eq_b); + assert_eq!(fp_a, fp_b); +} + +#[test] +fn standard_builder_has_no_false_negatives_1k() { + let hasher = DefaultBuildHasher::default(); + let params = Params::new(3000, 16, 12, Mode::Standard).expect("params should be valid"); + let builder = RibbonBuilder::new(params.with_seed(11), hasher).expect("builder should build"); + + let keys: Vec = (0..1000).collect(); + let filter = builder.build(&keys).expect("construction should succeed"); + + for key in &keys { + assert!(filter.contains(key), "false negative for key {key}"); + } +} + +#[test] +fn standard_builder_has_no_false_negatives_10k() { + let hasher = DefaultBuildHasher::default(); + let params = Params::new(30000, 16, 10, Mode::Standard).expect("params should be valid"); + let builder = RibbonBuilder::new(params.with_seed(13), hasher).expect("builder should build"); + + let keys: Vec = (0..10000).collect(); + let filter = builder.build(&keys).expect("construction should succeed"); + + for key in &keys { + assert!(filter.contains(key), "false negative for key {key}"); + } +} + +#[test] +fn standard_builder_reports_inconsistent_equation_failure() { + let hasher = DefaultBuildHasher::default(); + let params = Params::new(16, 16, 8, Mode::Standard) + .expect("params should be valid") + .with_seed(5); + let builder = RibbonBuilder::new(params, hasher).expect("builder should build"); + + let keys: Vec = (0..200).collect(); + let result = builder.build(&keys); + + match result { + Err(BuildError::ConstructionFailed { last_failure, .. }) => { + assert!(matches!( + last_failure, + super::ConstructionFailure::InconsistentEquation { .. } + )); + } + Err(other) => panic!("expected construction failure, got {other}"), + Ok(_) => panic!("expected failure, got success"), + } +} + +#[test] +fn standard_builder_is_deterministic_for_same_input() { + let hasher = DefaultBuildHasher::default(); + let params = Params::new(3000, 16, 9, Mode::Standard) + .expect("params should be valid") + .with_seed(99); + + let builder_a = RibbonBuilder::new(params, hasher.clone()).expect("builder should build"); + let builder_b = RibbonBuilder::new(params, hasher).expect("builder should build"); + + let keys: Vec = (1000..2000).collect(); + let filter_a = builder_a.build(&keys).expect("first build should succeed"); + let filter_b = builder_b.build(&keys).expect("second build should succeed"); + + for probe in 990..2010u64 { + assert_eq!( + filter_a.contains(&probe), + filter_b.contains(&probe), + "non-deterministic result for key {probe}" + ); + } +} + +#[derive(Default, Clone)] +struct ConstantBuildHasher; + +impl std::hash::BuildHasher for ConstantBuildHasher { + type Hasher = ConstantHasher; + + fn build_hasher(&self) -> Self::Hasher { + ConstantHasher + } +} + +#[derive(Default, Clone)] +struct ConstantHasher; + +impl std::hash::Hasher for ConstantHasher { + fn finish(&self) -> u64 { + 0 + } + + fn write(&mut self, _bytes: &[u8]) {} +} + +#[test] +fn builder_supports_custom_buildhasher() { + let hasher = ConstantBuildHasher; + let params = Params::new(3000, 16, 9, Mode::Standard) + .expect("params should be valid") + .with_seed(88); + let builder = RibbonBuilder::new(params, hasher).expect("builder should build"); + + let keys: Vec = (0..200).collect(); + let filter = builder.build(&keys).expect("build should succeed"); + + let mut scratch = filter.new_scratch(); + for key in &keys { + assert!(filter.contains_in(key, &mut scratch)); + } +} + +#[test] +fn contains_and_contains_in_are_equivalent() { + let hasher = DefaultBuildHasher::default(); + let params = Params::new(3000, 16, 9, Mode::Standard) + .expect("params should be valid") + .with_seed(77); + let builder = RibbonBuilder::new(params, hasher).expect("builder should build"); + let keys: Vec = (1000..2000).collect(); + let filter = builder.build(&keys).expect("build should succeed"); + + let mut scratch = filter.new_scratch(); + for probe in 900..2100u64 { + assert_eq!( + filter.contains(&probe), + filter.contains_in(&probe, &mut scratch), + "contains mismatch at key {probe}" + ); + } +} + +#[test] +fn retry_path_is_exercised_and_eventually_succeeds() { + let hasher = DefaultBuildHasher::default(); + let keys: Vec = (0..500).collect(); + let params = Params::new(16, 16, 8, Mode::Standard) + .expect("params valid") + .with_seed(1) + .with_retry_policy(3, 0) + .expect("retry policy valid"); + let builder = RibbonBuilder::new(params, hasher).expect("builder valid"); + + match builder.build(&keys) { + Err(BuildError::ConstructionFailed { + final_m, + attempts, + last_failure, + }) => { + assert_eq!(final_m, 16); + assert_eq!(attempts, 3); + assert!(matches!( + last_failure, + super::ConstructionFailure::InconsistentEquation { .. } + )); + } + other => panic!("expected retry-exhausted failure, got {other:?}"), + } +} + +#[test] +fn growth_path_is_exercised_and_reports_grown_m() { + let hasher = DefaultBuildHasher::default(); + let keys: Vec = (0..500).collect(); + let params = Params::new(16, 16, 8, Mode::Standard) + .expect("params valid") + .with_seed(1) + .with_retry_policy(2, 2) + .expect("retry policy valid"); + let builder = RibbonBuilder::new(params, hasher).expect("builder valid"); + + match builder.build(&keys) { + Err(BuildError::ConstructionFailed { + final_m, + attempts, + last_failure, + }) => { + assert_eq!(attempts, 6); + assert_eq!(final_m, 19); + assert!(matches!( + last_failure, + super::ConstructionFailure::InconsistentEquation { .. } + )); + } + other => panic!("expected growth-exhausted failure, got {other:?}"), + } +} + +#[test] +fn terminal_failure_reports_attempts_and_final_m() { + let hasher = DefaultBuildHasher::default(); + let params = Params::new(16, 16, 8, Mode::Standard) + .expect("params valid") + .with_seed(1) + .with_retry_policy(2, 2) + .expect("retry policy valid"); + let builder = RibbonBuilder::new(params, hasher).expect("builder valid"); + let keys: Vec = (0..500).collect(); + + match builder.build(&keys) { + Err(BuildError::ConstructionFailed { + final_m, + attempts, + last_failure, + }) => { + assert_eq!(attempts, 6); + assert_eq!(final_m, 19); + assert!(matches!( + last_failure, + super::ConstructionFailure::InconsistentEquation { .. } + )); + } + other => panic!("expected terminal construction failure, got {other:?}"), + } +} + +#[test] +fn successful_build_persists_selected_attempt_seed() { + let hasher = DefaultBuildHasher::default(); + let base_seed = 123u64; + let params = Params::new(3000, 16, 9, Mode::Standard) + .expect("params valid") + .with_seed(base_seed) + .with_retry_policy(1, 0) + .expect("retry policy valid"); + let builder = RibbonBuilder::new(params, hasher).expect("builder valid"); + let keys: Vec = (0..1000).collect(); + + let filter = builder.build(&keys).expect("build should succeed"); + assert_eq!( + filter.params().seed, + super::hashing::derive_attempt_seed(base_seed, 0) + ); +} + +#[test] +fn homogeneous_build_succeeds_and_has_no_false_negatives() { + let hasher = DefaultBuildHasher::default(); + let params = Params::new(4000, 16, 8, Mode::Homogeneous) + .expect("params valid") + .with_seed(55); + let builder = RibbonBuilder::new(params, hasher).expect("builder valid"); + let keys: Vec = (0..1000).collect(); + + let filter = builder + .build(&keys) + .expect("homogeneous build should succeed"); + let mut scratch = filter.new_scratch(); + for key in &keys { + assert!(filter.contains_in(key, &mut scratch)); + } +} + +#[test] +fn homogeneous_mode_false_positive_rate_is_sane_across_seeds_and_sizes() { + let hasher = DefaultBuildHasher::default(); + let r = 8usize; + let seeds = [7u64, 77u64, 777u64]; + let sizes = [2_000usize, 8_000usize]; + let queries = 40_000usize; + + for &seed in &seeds { + for &n in &sizes { + let params = Params::new(n * 4, 16, r, Mode::Homogeneous) + .expect("params valid") + .with_seed(seed) + .with_retry_policy(2, 0) + .expect("retry policy valid"); + let builder = RibbonBuilder::new(params, hasher.clone()).expect("builder valid"); + let keys: Vec = (0..n as u64).collect(); + let filter = builder + .build(&keys) + .expect("homogeneous build should succeed"); + + let mut scratch = filter.new_scratch(); + let mut fp = 0usize; + let query_start = 10_000_000u64 + (seed << 20) + n as u64; + for q in 0..queries { + if filter.contains_in(&(query_start + q as u64), &mut scratch) { + fp += 1; + } + } + + let observed = fp as f64 / queries as f64; + let expected = 2f64.powi(-(r as i32)); + + assert!( + observed > 0.0005, + "homogeneous fp unexpectedly near-zero seed={seed} n={n}: observed={observed}, expected~{expected}" + ); + assert!( + observed < 0.05, + "homogeneous fp unexpectedly near-trivial-high seed={seed} n={n}: observed={observed}, expected~{expected}" + ); + assert!( + observed >= expected / 8.0 && observed <= expected * 8.0, + "homogeneous fp far from expected envelope seed={seed} n={n}: observed={observed}, expected~{expected}" + ); + } + } +} + +#[test] +fn construction_failure_out_of_bounds_display_contains_context() { + let err = super::ConstructionFailure::OutOfBounds { + key_index: Some(12), + row_index: 99, + m: 80, + }; + let msg = err.to_string(); + assert!(msg.contains("row index 99")); + assert!(msg.contains("m=80")); + assert!(msg.contains("key at index 12")); +} + +#[test] +fn homogeneous_pipeline_has_zero_fingerprint() { + let hasher = DefaultBuildHasher::default(); + let params = Params::new(128, 16, 9, Mode::Homogeneous).expect("params must be valid"); + let mut fp = vec![0u64; params.fingerprint_words()]; + + let _ = standard_equation_w64(&hasher, &"h-key", 11, ¶ms, &mut fp); + + assert!(fp.iter().all(|&w| w == 0)); +} + +#[test] +fn width_128_pipeline_sets_bits_in_both_halves() { + let hasher = DefaultBuildHasher::default(); + let params = Params::new(400, 128, 8, Mode::Standard).expect("params valid"); + + let mut saw_hi = false; + for seed in 0..500u64 { + let mut fp = vec![0u64; params.fingerprint_words()]; + let eq = standard_equation_w64(&hasher, &"w128-key", seed, ¶ms, &mut fp); + + if eq.coeff_hi != 0 { + saw_hi = true; + break; + } + } + + assert!( + saw_hi, + "expected at least one seed with high-half coefficient bits" + ); +} + +#[test] +fn builder_supports_width_above_64() { + let hasher = DefaultBuildHasher::default(); + let params = Params::new(4000, 96, 10, Mode::Standard) + .expect("params should be valid") + .with_seed(303) + .with_retry_policy(4, 1) + .expect("retry policy valid"); + let builder = RibbonBuilder::new(params, hasher).expect("builder should build"); + let keys: Vec = (0..800).collect(); + let filter = builder.build(&keys).expect("construction should succeed"); + let mut scratch = filter.new_scratch(); + + for key in &keys { + assert!(filter.contains_in(key, &mut scratch)); + } +} + +#[test] +fn bitpacked_storage_maintains_membership_behavior() { + let hasher = DefaultBuildHasher::default(); + let params = Params::new(3000, 16, 12, Mode::Standard) + .expect("params should be valid") + .with_seed(1234); + let builder = RibbonBuilder::new(params, hasher).expect("builder should build"); + let keys: Vec = (0..1000).collect(); + let filter = builder.build(&keys).expect("build should succeed"); + + let mut scratch = filter.new_scratch(); + for key in &keys { + assert!(filter.contains_in(key, &mut scratch)); + } +} + +#[test] +fn compatibility_matrix_modes_widths_and_fingerprints() { + let hasher = DefaultBuildHasher::default(); + let modes = [Mode::Standard, Mode::Homogeneous]; + let widths = [16usize, 80usize, 128usize]; + let rs = [8usize, 12usize]; + + for mode in modes { + for w in widths { + for r in rs { + let params = Params::new(6000, w, r, mode) + .expect("params should be valid") + .with_seed(700 + w as u64 + r as u64) + .with_retry_policy(5, 2) + .expect("retry policy valid"); + let builder = + RibbonBuilder::new(params, hasher.clone()).expect("builder should build"); + let keys: Vec = (0..1000).collect(); + let filter = builder.build(&keys).expect("construction should succeed"); + let mut scratch = filter.new_scratch(); + + for key in &keys { + assert!( + filter.contains_in(key, &mut scratch), + "false negative for mode={mode}, w={w}, r={r}, key={key}" + ); + } + } + } + } +} + +#[test] +fn start_position_hook_stays_in_bounds() { + let m = 256usize; + let w = 64usize; + let range = m - w + 1; + + for x in [0u64, 1, 7, u64::MAX / 2, u64::MAX] { + let s = start_position_from_stream(x, m, w); + assert!(s < range, "start position out of range for x={x}: {s}"); + } +} + +#[test] +fn statistical_false_positive_rates_are_within_confidence_bounds() { + let hasher = DefaultBuildHasher::default(); + let seeds = [11u64, 42u64, 777u64]; + let sizes = [1000usize, 5000usize]; + let rs = [8usize, 12usize]; + + for &seed in &seeds { + for &n in &sizes { + for &r in &rs { + let params = Params::new(n * 4, 16, r, Mode::Standard) + .expect("params should be valid") + .with_seed(seed) + .with_retry_policy(4, 1) + .expect("retry policy should be valid"); + let builder = + RibbonBuilder::new(params, hasher.clone()).expect("builder should be valid"); + let keys: Vec = (0..n as u64).collect(); + let filter = builder.build(&keys).expect("construction should succeed"); + + let queries = 20_000usize; + let query_start = 10_000_000u64 + (seed << 20) + n as u64; + let mut scratch = filter.new_scratch(); + let mut fp = 0usize; + for q in 0..queries { + if filter.contains_in(&(query_start + q as u64), &mut scratch) { + fp += 1; + } + } + + let p = 2f64.powi(-(r as i32)); + let mean = (queries as f64) * p; + let var = (queries as f64) * p * (1.0 - p); + let sigma = var.sqrt(); + let tolerance = (8.0 * sigma).max(8.0); + let lower = (mean - tolerance).max(0.0); + let upper = mean + tolerance; + let observed = fp as f64; + + assert!( + observed >= lower && observed <= upper, + "fp out of bounds seed={seed} n={n} r={r}: observed={observed} expected~{mean} bounds=[{lower}, {upper}]" + ); + } + } + } +} + +fn lcg_next(state: &mut u64) -> u64 { + *state = state + .wrapping_mul(6364136223846793005) + .wrapping_add(1442695040888963407); + *state +} + +#[test] +fn property_no_false_negatives_across_generated_cases() { + let hasher = DefaultBuildHasher::default(); + let mut rng = 1u64; + + for case in 0..20u64 { + let seed = lcg_next(&mut rng); + let n = 200 + (lcg_next(&mut rng) % 400) as usize; + let w_choices = [16usize, 32usize, 80usize, 128usize]; + let w = w_choices[(lcg_next(&mut rng) as usize) % w_choices.len()]; + let r_choices = [8usize, 10usize, 12usize]; + let r = r_choices[(lcg_next(&mut rng) as usize) % r_choices.len()]; + let mode = if (lcg_next(&mut rng) & 1) == 0 { + Mode::Standard + } else { + Mode::Homogeneous + }; + + let params = Params::new((n * 5).max(w), w, r, mode) + .expect("params should be valid") + .with_seed(seed) + .with_retry_policy(4, 2) + .expect("retry policy should be valid"); + let builder = RibbonBuilder::new(params, hasher.clone()).expect("builder should be valid"); + let keys: Vec = (0..n as u64) + .map(|i| i.wrapping_mul(0x9E37_79B9_7F4A_7C15).wrapping_add(seed)) + .collect(); + let filter = builder + .build(&keys) + .expect("construction should succeed for generated case"); + let mut scratch = filter.new_scratch(); + + for key in &keys { + assert!( + filter.contains_in(key, &mut scratch), + "false negative for case={case}, mode={mode}, w={w}, r={r}, seed={seed}, key={key}" + ); + } + } +} + +#[test] +fn property_determinism_across_generated_cases() { + let hasher = DefaultBuildHasher::default(); + let mut rng = 99u64; + + for case in 0..16u64 { + let seed = lcg_next(&mut rng); + let n = 180 + (lcg_next(&mut rng) % 320) as usize; + let w_choices = [16usize, 64usize, 96usize]; + let w = w_choices[(lcg_next(&mut rng) as usize) % w_choices.len()]; + let r = if (lcg_next(&mut rng) & 1) == 0 { 8 } else { 12 }; + let mode = if (lcg_next(&mut rng) & 1) == 0 { + Mode::Standard + } else { + Mode::Homogeneous + }; + + let params = Params::new((n * 5).max(w), w, r, mode) + .expect("params should be valid") + .with_seed(seed) + .with_retry_policy(4, 2) + .expect("retry policy should be valid"); + let keys: Vec = (0..n as u64) + .map(|i| i.wrapping_mul(0xD6E8_FEB8_6659_FD93).wrapping_add(seed)) + .collect(); + + let builder_a = RibbonBuilder::new(params, hasher.clone()).expect("builder a valid"); + let builder_b = RibbonBuilder::new(params, hasher.clone()).expect("builder b valid"); + let filter_a = builder_a.build(&keys).expect("build a should succeed"); + let filter_b = builder_b.build(&keys).expect("build b should succeed"); + + let mut scratch_a = filter_a.new_scratch(); + let mut scratch_b = filter_b.new_scratch(); + for probe in 0..(n as u64 + 128) { + let q = probe.wrapping_mul(0x94D0_49BB_1331_11EB).wrapping_add(seed); + assert_eq!( + filter_a.contains_in(&q, &mut scratch_a), + filter_b.contains_in(&q, &mut scratch_b), + "determinism mismatch case={case}, mode={mode}, w={w}, r={r}, seed={seed}, q={q}" + ); + } + } +} + +fn adversarial_patterns(n: usize) -> Vec<(&'static str, Vec)> { + let ordered: Vec = (0..n as u64).collect(); + let constant_low_bits: Vec = (0..n as u64).map(|i| (i << 16) | 0xFFFF).collect(); + let stride_1024: Vec = (0..n as u64).map(|i| i * 1024).collect(); + let gray_code: Vec = (0..n as u64).map(|i| i ^ (i >> 1)).collect(); + let mul_mix_a: Vec = (0..n as u64) + .map(|i| i.wrapping_mul(0x9E37_79B9_7F4A_7C15)) + .collect(); + let mul_mix_b: Vec = (0..n as u64) + .map(|i| i.wrapping_mul(0xD6E8_FEB8_6659_FD93)) + .collect(); + + vec![ + ("ordered_u64", ordered), + ("constant_low_bits", constant_low_bits), + ("stride_1024", stride_1024), + ("gray_code", gray_code), + ("mul_mix_a", mul_mix_a), + ("mul_mix_b", mul_mix_b), + ] +} + +#[test] +fn adversarial_regression_corpus_has_no_false_negatives() { + let hasher = DefaultBuildHasher::default(); + let n = 2000usize; + + for (name, keys) in adversarial_patterns(n) { + let params = Params::new(8000, 16, 10, Mode::Standard) + .expect("params should be valid") + .with_seed(404) + .with_retry_policy(6, 2) + .expect("retry policy should be valid"); + let builder = RibbonBuilder::new(params, hasher.clone()).expect("builder should be valid"); + let filter = builder + .build(&keys) + .expect("construction should succeed for adversarial set"); + let mut scratch = filter.new_scratch(); + + for key in &keys { + assert!( + filter.contains_in(key, &mut scratch), + "false negative in adversarial set '{name}' for key {key}" + ); + } + } +} + +#[cfg(feature = "ribbon-serde")] +#[test] +fn serde_roundtrip_preserves_membership_behavior() { + let hasher = DefaultBuildHasher::default(); + let params = Params::new(3000, 16, 10, Mode::Standard) + .expect("params should be valid") + .with_seed(4242); + let builder = RibbonBuilder::new(params, hasher).expect("builder should be valid"); + let keys: Vec = (0..1000).collect(); + let filter = builder.build(&keys).expect("build should succeed"); + + let repr = filter.to_repr(); + let encoded = serde_json::to_string(&repr).expect("serialize should succeed"); + let decoded_repr: super::RibbonFilterRepr = + serde_json::from_str(&encoded).expect("deserialize should succeed"); + let decoded = super::RibbonFilter::from_repr(decoded_repr, DefaultBuildHasher::default()) + .expect("reconstructing filter should succeed"); + + let mut scratch_original = filter.new_scratch(); + let mut scratch_decoded = decoded.new_scratch(); + for probe in 0..1200u64 { + assert_eq!( + filter.contains_in(&probe, &mut scratch_original), + decoded.contains_in(&probe, &mut scratch_decoded), + "membership mismatch for probe {probe}" + ); + } +} + +#[cfg(feature = "ribbon-serde")] +#[test] +fn serde_rejects_unknown_filter_version() { + let hasher = DefaultBuildHasher::default(); + let params = Params::new(512, 16, 8, Mode::Standard) + .expect("params should be valid") + .with_seed(9); + let builder = RibbonBuilder::new(params, hasher).expect("builder should be valid"); + let keys: Vec = (0..100).collect(); + let filter = builder.build(&keys).expect("build should succeed"); + + let mut value = serde_json::to_value(filter.to_repr()).expect("serialize should succeed"); + value["version"] = serde_json::Value::from(99u64); + + let repr = serde_json::from_value::(value) + .expect("deserializing repr should succeed"); + let err = super::RibbonFilter::from_repr(repr, DefaultBuildHasher::default()) + .expect_err("reconstructing unknown version should fail"); + assert!(err.to_string().contains("unsupported RibbonFilter version")); +} + +#[cfg(feature = "ribbon-serde")] +#[test] +fn serde_rejects_incorrect_storage_word_length() { + let hasher = DefaultBuildHasher::default(); + let params = Params::new(512, 16, 8, Mode::Standard) + .expect("params should be valid") + .with_seed(10); + let builder = RibbonBuilder::new(params, hasher).expect("builder should be valid"); + let keys: Vec = (0..100).collect(); + let filter = builder.build(&keys).expect("build should succeed"); + + let mut repr = filter.to_repr(); + let expected_words = repr.params.m * repr.params.fingerprint_words(); + let wrong_words = expected_words - 1; + repr.z = vec![0_u64; wrong_words]; + + let err = super::RibbonFilter::from_repr(repr, DefaultBuildHasher::default()) + .expect_err("reconstructing invalid storage should fail"); + assert!( + err.to_string() + .contains("invalid RibbonFilter storage word length") + ); +} + +#[test] +fn param_error_display_covers_all_variants() { + use super::error::ParamError; + let cases = [ + (ParamError::ZeroM, "m must"), + (ParamError::ZeroN, "n must"), + (ParamError::ZeroWidth, "w must"), + (ParamError::ZeroFingerprintBits, "r must"), + (ParamError::ZeroRetryLimit, "retry_limit must"), + ]; + for (err, needle) in &cases { + let s = format!("{err}"); + assert!( + s.contains(needle), + "ParamError::{err:?} display missing '{needle}': got {s}" + ); + } + let s = format!("{}", ParamError::WidthTooLarge { w: 200, max: 128 }); + assert!(s.contains("200") && s.contains("128"), "got: {s}"); + let s = format!("{}", ParamError::WidthExceedsM { m: 4, w: 16 }); + assert!( + s.contains("m=4") || s.contains("w=16") || s.contains('4') && s.contains("16"), + "got: {s}" + ); + let s = format!("{}", ParamError::InvalidFalsePositiveRate { fpr: 2.0 }); + assert!(s.contains('2') || s.contains("false"), "got: {s}"); + let s = format!("{}", ParamError::InvalidOverhead { overhead: -0.5 }); + assert!(s.contains("overhead") || s.contains("-0.5"), "got: {s}"); +} + +#[test] +fn construction_failure_display_inconsistent_eq() { + use super::error::ConstructionFailure; + let err = ConstructionFailure::InconsistentEquation { + key_index: 17, + row_index: 99, + }; + let s = format!("{err}"); + assert!(s.contains("17") && s.contains("99"), "got: {s}"); +} + +#[test] +fn construction_failure_display_out_of_bounds() { + use super::error::ConstructionFailure; + let err = ConstructionFailure::OutOfBounds { + key_index: Some(5), + row_index: 1000, + m: 800, + }; + let s = format!("{err}"); + assert!( + s.contains('5') && s.contains("1000") && s.contains("800"), + "got: {s}" + ); +} + +#[test] +fn build_error_display_covers_variants() { + use super::error::{BuildError, ConstructionFailure, ParamError}; + let invalid = BuildError::InvalidParams(ParamError::ZeroM); + let s = format!("{invalid}"); + assert!(s.contains("invalid parameters"), "got: {s}"); + + let cf = BuildError::ConstructionFailed { + final_m: 4096, + attempts: 8, + last_failure: ConstructionFailure::InconsistentEquation { + key_index: 0, + row_index: 0, + }, + }; + let s = format!("{cf}"); + assert!(s.contains("4096") || s.contains('8'), "got: {s}"); +} + +#[test] +#[cfg(feature = "ribbon-serde")] +fn filter_repr_error_display_covers_variants() { + use super::error::{FilterReprError, ParamError}; + let s = format!( + "{}", + FilterReprError::UnsupportedVersion { + found: 99, + expected: 1 + } + ); + assert!(s.contains("99") && s.contains('1'), "got: {s}"); + let s = format!("{}", FilterReprError::InvalidParams(ParamError::ZeroM)); + assert!(s.contains("invalid") || s.contains("param"), "got: {s}"); + let s = format!("{}", FilterReprError::StorageLengthOverflow); + assert!(s.contains("overflow") || s.contains("storage"), "got: {s}"); + let s = format!( + "{}", + FilterReprError::InvalidStorageWords { + found: 5, + expected: 10 + } + ); + assert!(s.contains('5') && s.contains("10"), "got: {s}"); + let s = format!( + "{}", + FilterReprError::InvalidStorageBits { + found: 5, + expected: 10 + } + ); + assert!(s.contains('5') && s.contains("10"), "got: {s}"); +} + +#[test] +fn build_with_seed_verbatim_succeeds_with_generous_m() { + // Direct exercise of `build_with_seed_verbatim` (the no-retry, + // verbatim-seed entry BuRR builds layers through). Generous m so a + // single attempt with the fixed seed lands. + let hasher = DefaultBuildHasher::default(); + let params = Params::new(3000, 16, 9, Mode::Standard).expect("valid"); + let builder = RibbonBuilder::new(params, hasher).expect("builder"); + let keys: Vec = (0..500).collect(); + let filter = builder + .build_with_seed_verbatim(&keys, 0xABCD_1234, 3000) + .expect("verbatim build should land at m=3000"); + for k in &keys { + assert!(filter.contains(k), "false negative for {k}"); + } + // Persisted seed must be the one we passed, not derive_attempt_seed-mixed. + assert_eq!(filter.params().seed, 0xABCD_1234); +} + +#[test] +fn build_with_seed_verbatim_reports_single_attempt_on_failure() { + // Tight m forces a build failure even with a verbatim seed. + // The error must carry attempts=1 (no retry budget on verbatim). + let hasher = DefaultBuildHasher::default(); + let params = Params::new(16, 16, 8, Mode::Standard).expect("valid"); + let builder = RibbonBuilder::new(params, hasher).expect("builder"); + let keys: Vec = (0..200).collect(); + let result = builder.build_with_seed_verbatim(&keys, 0xDEAD_BEEF, 16); + match result { + Err(BuildError::ConstructionFailed { + attempts, final_m, .. + }) => { + assert_eq!(attempts, 1, "verbatim must not retry"); + assert_eq!(final_m, 16); + } + Ok(_) => panic!("tight m=16 with 200 keys should not build"), + Err(other) => panic!("expected ConstructionFailed, got {other:?}"), + } +} + +#[test] +fn build_with_seed_verbatim_from_hashes_round_trips() { + // Same as build_with_seed_verbatim but feeds raw u64 hashes — the + // entry point BuRR actually uses with xxh3-hashed LSM keys. + let bh = DefaultBuildHasher::default(); + let params = Params::new(3000, 16, 9, Mode::Standard).expect("valid"); + let builder = RibbonBuilder::new(params, bh).expect("builder"); + let hashes: Vec = (0..500_u64) + .map(|i| crate::hash::hash64(&i.to_le_bytes())) + .collect(); + let filter = builder + .build_with_seed_verbatim_from_hashes(&hashes, 0x1234_5678_9ABC_DEF0, 3000) + .expect("verbatim-from-hashes build should land"); + assert_eq!(filter.params().seed, 0x1234_5678_9ABC_DEF0); +} + +#[test] +fn build_with_seed_verbatim_from_hashes_propagates_failure() { + // Hashes that overload a tight m must surface ConstructionFailed + // through the from-hashes wrapper too — same single-attempt + // contract as the key-based variant. + let bh = DefaultBuildHasher::default(); + let params = Params::new(16, 16, 8, Mode::Standard).expect("valid"); + let builder = RibbonBuilder::new(params, bh).expect("builder"); + let hashes: Vec = (0..200_u64) + .map(|i| crate::hash::hash64(&i.to_le_bytes())) + .collect(); + let result = builder.build_with_seed_verbatim_from_hashes(&hashes, 7, 16); + match result { + Err(BuildError::ConstructionFailed { + attempts, final_m, .. + }) => { + assert_eq!(attempts, 1); + assert_eq!(final_m, 16); + } + Ok(_) => panic!("tight m=16 with 200 hashes must not build"), + Err(other) => panic!("expected ConstructionFailed, got {other:?}"), + } +} + +#[test] +fn build_homogeneous_does_not_iterate_retries() { + // In Homogeneous mode the inner retry loop must break after the + // first attempt (the algorithm has no notion of retrying with a + // different seed). With retry_limit=5 we should see attempts=1 on + // failure, not 5. + let hasher = DefaultBuildHasher::default(); + let params = Params::new(16, 16, 8, Mode::Homogeneous) + .expect("valid") + .with_seed(1) + .with_retry_policy(5, 0) + .expect("retry policy valid"); + let builder = RibbonBuilder::new(params, hasher).expect("builder"); + let keys: Vec = (0..200).collect(); + let result = builder.build(&keys); + // Homogeneous "build" always succeeds on insertable input because + // unoccupied rows get random fingerprints. With m=16 << 200 keys + // the insertion loop should fail; if so, attempts must be 1. + if let Err(BuildError::ConstructionFailed { attempts, .. }) = result { + assert_eq!(attempts, 1, "Homogeneous must break after first attempt"); + } + // If it succeeded, that's also fine — the break path was still + // exercised on the success arm via the loop's natural exit. +} + +#[test] +fn build_homogeneous_does_not_grow() { + // The outer grow loop must also short-circuit in Homogeneous mode: + // even with grow_limit=3 we should never grow m past the original. + let hasher = DefaultBuildHasher::default(); + let params = Params::new(16, 16, 8, Mode::Homogeneous) + .expect("valid") + .with_seed(2) + .with_retry_policy(2, 3) + .expect("retry policy valid"); + let builder = RibbonBuilder::new(params, hasher).expect("builder"); + let keys: Vec = (0..200).collect(); + if let Err(BuildError::ConstructionFailed { final_m, .. }) = builder.build(&keys) { + assert_eq!(final_m, 16, "Homogeneous must not grow m"); + } +} + +#[test] +fn builder_terminal_failure_after_grow_exhausted() { + let hasher = DefaultBuildHasher::default(); + // Tight: m=16, w=16 — small chance of single-attempt success. With + // grow_limit=0 + retry_limit=1 it should fail. + let params = Params::new(16, 16, 8, Mode::Standard) + .expect("valid") + .with_seed(123) + .with_retry_limit(1) + .expect("retry"); + let builder = RibbonBuilder::new(params, hasher).expect("builder"); + let keys: Vec = (0..16).collect(); + let result = builder.build(&keys); + if let Err(super::error::BuildError::ConstructionFailed { + attempts, final_m, .. + }) = result + { + assert_eq!(attempts, 1); + assert_eq!(final_m, 16); + } + // Either succeeds or fails — both are valid; we just exercised the path. +} diff --git a/src/table/filter/standard_bloom/builder.rs b/src/table/filter/standard_bloom/builder.rs deleted file mode 100644 index 8295c4f15..000000000 --- a/src/table/filter/standard_bloom/builder.rs +++ /dev/null @@ -1,188 +0,0 @@ -// Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) - -use super::super::bit_array::Builder as BitArrayBuilder; -use crate::{file::MAGIC_BYTES, table::filter::FilterType}; -use byteorder::{LittleEndian, WriteBytesExt}; -use std::io::Write; - -pub fn secondary_hash(h1: u64) -> u64 { - // Taken from https://github.com/tomtomwombat/fastbloom - h1.wrapping_shr(32).wrapping_mul(0x51_7c_c1_b7_27_22_0a_95) -} - -#[derive(Debug)] -pub struct Builder { - /// Raw bytes exposed as bit array - inner: BitArrayBuilder, - - /// Bit count - pub(super) m: usize, - - /// Number of hash functions - pub(super) k: usize, -} - -impl Builder { - #[expect( - clippy::expect_used, - reason = "we write into a Vec, so no I/O error can happen" - )] - #[must_use] - pub fn build(&self) -> Vec { - let mut v = vec![]; - - // Write header - v.write_all(&MAGIC_BYTES).expect("should not fail"); - - // NOTE: Filter type - v.write_u8(FilterType::StandardBloom.into()) - .expect("should not fail"); - - // NOTE: Hash type (unused) - v.write_u8(0).expect("should not fail"); - - v.write_u64::(self.m as u64) - .expect("should not fail"); - v.write_u64::(self.k as u64) - .expect("should not fail"); - v.write_all(self.inner.bytes()).expect("should not fail"); - - v - } - - /// Constructs a bloom filter that can hold `n` items - /// while maintaining a certain false positive rate `fpr`. - #[must_use] - pub fn with_fp_rate(n: usize, fpr: f32) -> Self { - use std::f32::consts::LN_2; - - assert!(n > 0); - - // NOTE: Some sensible minimum - let fpr = fpr.max(0.000_000_1); - - let m = Self::calculate_m(n, fpr); - - #[expect( - clippy::cast_precision_loss, - reason = "bpk tends to be in the range of 0-50, so easily fits into u32" - )] - let bpk = (m / n) as f32; - - #[expect( - clippy::cast_sign_loss, - clippy::cast_possible_truncation, - reason = "bpk easily fits into u32 and LN_2 < 1.0, so should still fit into a usize as well" - )] - let k = ((bpk * LN_2) as usize).max(1); - - Self { - inner: BitArrayBuilder::with_capacity(m / 8), - m, - k, - } - } - - /// Constructs a bloom filter that can hold `n` items - /// with `bpk` bits per key. - /// - /// 10 bits per key is a sensible default. - #[must_use] - pub fn with_bpk(n: usize, bpk: f32) -> Self { - use std::f32::consts::LN_2; - - assert!(bpk > 0.0); - assert!(n > 0); - - #[expect( - clippy::cast_possible_truncation, - clippy::cast_sign_loss, - reason = "bpk tends to be in the range of 0-50, so easily fits into usize" - )] - let m = n * (bpk as usize); - - #[expect( - clippy::cast_possible_truncation, - clippy::cast_sign_loss, - reason = "bpk easily fits into usize and LN_2 < 1.0, so should still fit into a usize as well" - )] - let k = ((bpk * LN_2) as usize).max(1); - - // NOTE: Round up so we don't get too little bits - #[expect( - clippy::cast_possible_truncation, - clippy::cast_sign_loss, - clippy::cast_precision_loss, - reason = "m already fits, and because we divide, it should definitely fit into usize" - )] - let bytes = (m as f32 / 8.0).ceil() as usize; - - Self { - inner: BitArrayBuilder::with_capacity(bytes), - m: bytes * 8, - k, - } - } - - pub(crate) fn calculate_m(n: usize, fp_rate: f32) -> usize { - use std::f32::consts::LN_2; - - #[expect( - clippy::cast_precision_loss, - reason = "n tends to be in the single millions at most, so f32 should be precise enough" - )] - let n = n as f32; - let ln2_squared = LN_2.powi(2); - - let numerator = n * fp_rate.ln(); - let m = -(numerator / ln2_squared); - - // NOTE: Round up to next byte - #[expect( - clippy::cast_possible_truncation, - clippy::cast_sign_loss, - reason = "m already fits, and because we divide, it should definitely fit into usize" - )] - let result = ((m / 8.0).ceil() * 8.0) as usize; - result - } - - /// Adds the key to the filter. - pub fn set_with_hash(&mut self, mut h1: u64) { - let mut h2 = secondary_hash(h1); - - for i in 1..=(self.k as u64) { - let idx = h1 % (self.m as u64); - - #[expect( - clippy::cast_possible_truncation, - reason = "filters tend to be pretty small, definitely less than 4 GiB, even for large tables" - )] - self.inner.enable_bit(idx as usize); - - h1 = h1.wrapping_add(h2); - h2 = h2.wrapping_mul(i); - } - } - - /// Gets the hash of a key. - #[must_use] - pub fn get_hash(key: &[u8]) -> u64 { - crate::hash::hash64(key) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use test_log::test; - - #[test] - fn bloom_calculate_m() { - assert_eq!(9_592, Builder::calculate_m(1_000, 0.01)); - assert_eq!(4_800, Builder::calculate_m(1_000, 0.1)); - assert_eq!(4_792_536, Builder::calculate_m(1_000_000, 0.1)); - } -} diff --git a/src/table/filter/standard_bloom/mod.rs b/src/table/filter/standard_bloom/mod.rs deleted file mode 100644 index 267cee73d..000000000 --- a/src/table/filter/standard_bloom/mod.rs +++ /dev/null @@ -1,306 +0,0 @@ -// Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) - -pub(crate) mod builder; - -pub use builder::Builder; - -use super::bit_array::BitArrayReader; -use crate::{ - file::MAGIC_BYTES, - table::filter::{FilterType, standard_bloom::builder::secondary_hash}, -}; -use byteorder::{LittleEndian, ReadBytesExt}; -use std::io::{Cursor, Read}; - -/// A standard bloom filter -/// -/// Allows buffering the key hashes before actual filter construction -/// which is needed to properly calculate the filter size, as the number of items -/// are unknown during table construction. -/// -/// The filter uses double hashing instead of `k` hash functions, see: -/// -pub struct StandardBloomFilterReader<'a> { - /// Raw bytes exposed as bit array - inner: BitArrayReader<'a>, - - /// Bit count - m: usize, - - /// Number of hash functions - k: usize, -} - -impl<'a> StandardBloomFilterReader<'a> { - pub fn new(slice: &'a [u8]) -> crate::Result { - let mut reader = Cursor::new(slice); - - // Check header - let mut magic = [0u8; MAGIC_BYTES.len()]; - reader.read_exact(&mut magic)?; - - if magic != MAGIC_BYTES { - return Err(crate::Error::InvalidHeader("BloomFilter")); - } - - // NOTE: Filter type - let filter_type = reader.read_u8()?; - let filter_type = FilterType::try_from(filter_type)?; - assert_eq!( - FilterType::StandardBloom, - filter_type, - "Invalid filter type, got={filter_type:?}, expected={:?}", - FilterType::StandardBloom - ); - - // NOTE: Hash type (unused) - let hash_type = reader.read_u8()?; - assert_eq!(0, hash_type, "Invalid bloom hash type"); - - #[expect( - clippy::cast_possible_truncation, - reason = "filters in a single table tend to be a couple of megabits of data at most, so easily fits into usize" - )] - let m = reader.read_u64::()? as usize; - - #[expect( - clippy::cast_possible_truncation, - reason = "k easily fits into any integer" - )] - let k = reader.read_u64::()? as usize; - - #[expect( - clippy::cast_possible_truncation, - reason = "filters in a single table tend to be a couple of megabytes of data at most, so easily fits into usize" - )] - let offset = reader.position() as usize; - - #[expect( - clippy::expect_used, - reason = "offset is expected to be with slice bounds" - )] - Ok(Self { - k, - m, - inner: BitArrayReader::new(slice.get(offset..).expect("should be in bounds")), - }) - } - - /// Size of bloom filter in bytes. - #[must_use] - #[expect(clippy::len_without_is_empty)] - pub fn len(&self) -> usize { - self.inner.bytes().len() - } - - /// Returns `true` if the hash may be contained. - /// - /// Will never have a false negative. - #[must_use] - pub fn contains_hash(&self, mut h1: u64) -> bool { - let mut h2 = secondary_hash(h1); - - for i in 1..=(self.k as u64) { - let idx = h1 % (self.m as u64); - - #[expect( - clippy::cast_possible_truncation, - reason = "filters in a single table tend to be a couple of megabytes of data at most, so easily fits into usize" - )] - if !self.has_bit(idx as usize) { - return false; - } - - h1 = h1.wrapping_add(h2); - h2 = h2.wrapping_mul(i); - } - - true - } - - /// Returns `true` if the item may be contained. - /// - /// Will never have a false negative. - #[must_use] - pub fn contains(&self, key: &[u8]) -> bool { - self.contains_hash(Self::get_hash(key)) - } - - /// Returns `true` if the bit at `idx` is `1`. - fn has_bit(&self, idx: usize) -> bool { - self.inner.get(idx) - } - - /// Gets the hash of a key. - fn get_hash(key: &[u8]) -> u64 { - Builder::get_hash(key) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use test_log::test; - - #[test] - fn filter_bloom_standard_serde_round_trip() -> crate::Result<()> { - let mut filter = Builder::with_fp_rate(10, 0.0001); - - let keys = &[ - b"item0", b"item1", b"item2", b"item3", b"item4", b"item5", b"item6", b"item7", - b"item8", b"item9", - ]; - - for key in keys { - filter.set_with_hash(StandardBloomFilterReader::get_hash(*key)); - } - - let filter_bytes = filter.build(); - let filter_copy = StandardBloomFilterReader::new(&filter_bytes)?; - - assert_eq!(filter.k, filter_copy.k); - assert_eq!(filter.m, filter_copy.m); - assert!(!filter_copy.contains(b"asdasads")); - assert!(!filter_copy.contains(b"item10")); - assert!(!filter_copy.contains(b"cxycxycxy")); - - Ok(()) - } - - #[test] - fn filter_bloom_standard_basic() -> crate::Result<()> { - let mut filter = Builder::with_fp_rate(10, 0.0001); - - let keys = [ - b"item0" as &[u8], - b"item1", - b"item2", - b"item3", - b"item4", - b"item5", - b"item6", - b"item7", - b"item8", - b"item9", - ]; - - for key in &keys { - filter.set_with_hash(Builder::get_hash(key)); - } - - let filter_bytes = filter.build(); - let filter = StandardBloomFilterReader::new(&filter_bytes)?; - - for key in &keys { - assert!(filter.contains(key)); - } - - assert!(!filter.contains(b"asdasdasdasdasdasdasd")); - - Ok(()) - } - - #[test] - fn filter_bloom_standard_bpk() -> crate::Result<()> { - let item_count = 1_000; - let probe_count = 100_000; - let bpk = 5.0; - - let mut filter = Builder::with_bpk(item_count, bpk); - - for key in (0..item_count).map(|_| nanoid::nanoid!()) { - let key = key.as_bytes(); - - filter.set_with_hash(Builder::get_hash(key)); - } - - let filter_bytes = filter.build(); - let filter = StandardBloomFilterReader::new(&filter_bytes)?; - - let mut false_positives = 0; - - for key in (0..probe_count).map(|_| nanoid::nanoid!()) { - let key = key.as_bytes(); - - if filter.contains(key) { - false_positives += 1; - } - } - - #[expect(clippy::cast_precision_loss)] - let fpr = false_positives as f32 / probe_count as f32; - assert!(fpr < 0.13); - - Ok(()) - } - - #[test] - fn filter_bloom_standard_fpr() -> crate::Result<()> { - let item_count = 100_000; - let wanted_fpr = 0.1; - - let mut filter = Builder::with_fp_rate(item_count, wanted_fpr); - - for key in (0..item_count).map(|_| nanoid::nanoid!()) { - let key = key.as_bytes(); - - filter.set_with_hash(Builder::get_hash(key)); - } - - let filter_bytes = filter.build(); - let filter = StandardBloomFilterReader::new(&filter_bytes)?; - - let mut false_positives = 0; - - for key in (0..item_count).map(|_| nanoid::nanoid!()) { - let key = key.as_bytes(); - - if filter.contains(key) { - false_positives += 1; - } - } - - #[expect(clippy::cast_precision_loss)] - let fpr = false_positives as f32 / item_count as f32; - assert!(fpr > 0.05); - assert!(fpr < 0.13); - - Ok(()) - } - - #[test] - fn filter_bloom_standard_fpr_2() -> crate::Result<()> { - let item_count = 100_000; - let wanted_fpr = 0.5; - - let mut filter = Builder::with_fp_rate(item_count, wanted_fpr); - - for key in (0..item_count).map(|_| nanoid::nanoid!()) { - let key = key.as_bytes(); - - filter.set_with_hash(Builder::get_hash(key)); - } - - let filter_bytes = filter.build(); - let filter = StandardBloomFilterReader::new(&filter_bytes)?; - - let mut false_positives = 0; - - for key in (0..item_count).map(|_| nanoid::nanoid!()) { - let key = key.as_bytes(); - - if filter.contains(key) { - false_positives += 1; - } - } - - #[expect(clippy::cast_precision_loss)] - let fpr = false_positives as f32 / item_count as f32; - assert!(fpr > 0.45); - assert!(fpr < 0.55); - - Ok(()) - } -} diff --git a/src/table/id.rs b/src/table/id.rs index 309c5e3d8..d5d5bea98 100644 --- a/src/table/id.rs +++ b/src/table/id.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use crate::tree::inner::TreeId; diff --git a/src/table/index_block/block_handle.rs b/src/table/index_block/block_handle.rs index 8653c740f..44612054a 100644 --- a/src/table/index_block/block_handle.rs +++ b/src/table/index_block/block_handle.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2025-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use crate::{SeqNo, UserKey}; use crate::{ diff --git a/src/table/index_block/iter.rs b/src/table/index_block/iter.rs index a95a6e670..f5bb79d28 100644 --- a/src/table/index_block/iter.rs +++ b/src/table/index_block/iter.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use crate::{ SeqNo, diff --git a/src/table/index_block/mod.rs b/src/table/index_block/mod.rs index 3199b033a..bc4dadd59 100644 --- a/src/table/index_block/mod.rs +++ b/src/table/index_block/mod.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2025-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation mod block_handle; mod iter; diff --git a/src/table/inner.rs b/src/table/inner.rs index c68e14615..dc0bf765b 100644 --- a/src/table/inner.rs +++ b/src/table/inner.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation #[cfg(feature = "metrics")] use crate::metrics::Metrics; diff --git a/src/table/iter.rs b/src/table/iter.rs index 928fbfd1b..e40d3490d 100644 --- a/src/table/iter.rs +++ b/src/table/iter.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2025-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use super::{BlockOffset, DataBlock, GlobalTableId, data_block::Iter as DataBlockIter}; use crate::{ diff --git a/src/table/meta.rs b/src/table/meta.rs index 770df83c2..f1f5fda76 100644 --- a/src/table/meta.rs +++ b/src/table/meta.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use super::{Block, BlockHandle, DataBlock}; use crate::fs::FsFile; diff --git a/src/table/mod.rs b/src/table/mod.rs index a5323eb2f..3e346a925 100644 --- a/src/table/mod.rs +++ b/src/table/mod.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2025-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation pub mod block; pub(crate) mod block_index; @@ -270,7 +270,7 @@ impl Table { fn check_bloom(&self, key: &[u8], key_hash: u64) -> crate::Result { debug_assert_eq!( key_hash, - crate::table::filter::standard_bloom::Builder::get_hash(key), + crate::hash::hash64(key), "key_hash must match the hash of the provided key" ); @@ -1087,9 +1087,9 @@ impl Table { /// to avoid redundant hashing — same pattern as [`Table::get`]). pub(crate) fn bloom_may_contain_key(&self, key: &[u8], key_hash: u64) -> crate::Result { debug_assert_eq!( - crate::table::filter::standard_bloom::Builder::get_hash(key), + crate::hash::hash64(key), key_hash, - "bloom_may_contain_key: key_hash must be BloomBuilder::get_hash(key)" + "bloom_may_contain_key: key_hash must be crate::hash::hash64(key)" ); // Full (non-partitioned) filter — delegate to hash-only path. diff --git a/src/table/multi_writer.rs b/src/table/multi_writer.rs index 6b8e61028..e0c63e52c 100644 --- a/src/table/multi_writer.rs +++ b/src/table/multi_writer.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use super::{filter::BloomConstructionPolicy, writer::Writer}; use crate::{ diff --git a/src/table/regions.rs b/src/table/regions.rs index da0a46f9b..a633b5e34 100644 --- a/src/table/regions.rs +++ b/src/table/regions.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use super::{BlockHandle, BlockOffset}; use sfa::TocEntry; diff --git a/src/table/scanner.rs b/src/table/scanner.rs index 3f7d2427e..56ecc0cb0 100644 --- a/src/table/scanner.rs +++ b/src/table/scanner.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use super::{Block, DataBlock}; use crate::{ diff --git a/src/table/tests.rs b/src/table/tests.rs index fe4467343..7f578b070 100644 --- a/src/table/tests.rs +++ b/src/table/tests.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation #![allow( clippy::doc_markdown, @@ -10,10 +10,7 @@ #![expect(clippy::expect_used, reason = "test code")] use super::*; -use crate::{ - config::BloomConstructionPolicy, fs::StdFs, - table::filter::standard_bloom::Builder as BloomBuilder, -}; +use crate::{config::BloomConstructionPolicy, fs::StdFs, hash::hash64}; use tempfile::tempdir; use test_log::test; @@ -482,19 +479,13 @@ fn table_point_read() -> crate::Result<()> { assert_eq!( b"abc", &*table - .get(b"abc", SeqNo::MAX, BloomBuilder::get_hash(b"abc"))? + .get(b"abc", SeqNo::MAX, hash64(b"abc"))? .unwrap() .key .user_key, ); - assert_eq!( - None, - table.get(b"def", SeqNo::MAX, BloomBuilder::get_hash(b"def"))?, - ); - assert_eq!( - None, - table.get(b"____", SeqNo::MAX, BloomBuilder::get_hash(b"____"))?, - ); + assert_eq!(None, table.get(b"def", SeqNo::MAX, hash64(b"def"))?,); + assert_eq!(None, table.get(b"____", SeqNo::MAX, hash64(b"____"))?,); assert_eq!( table.metadata.key_range, @@ -532,7 +523,7 @@ fn table_point_read_index_block_restart_interval() -> crate::Result<()> { .get( b"adj:out:vertex-0001:edge-0011", SeqNo::MAX, - BloomBuilder::get_hash(b"adj:out:vertex-0001:edge-0011"), + hash64(b"adj:out:vertex-0001:edge-0011"), )? .expect("test assertion: expected value for edge-0011") .value, @@ -591,11 +582,7 @@ fn table_point_read_zstd_dictionary() -> crate::Result<()> { assert_eq!( b"value-00001-padding-to-make-it-longer", &*table - .get( - b"key-00001", - SeqNo::MAX, - BloomBuilder::get_hash(b"key-00001"), - )? + .get(b"key-00001", SeqNo::MAX, hash64(b"key-00001"),)? .expect("test assertion: expected value for key-00001") .value, ); @@ -695,7 +682,7 @@ fn table_point_read_mvcc_block_boundary() -> crate::Result<()> { |table| { assert_eq!(2, table.metadata.data_block_count); - let key_hash = BloomBuilder::get_hash(b"a"); + let key_hash = hash64(b"a"); assert_eq!( b"5", @@ -898,7 +885,7 @@ fn table_point_read_partitioned_filter_smoke_test() -> crate::Result<()> { assert_eq!(1, table.metadata.data_block_count); for item in &items { - let key_hash = BloomBuilder::get_hash(&item.key.user_key); + let key_hash = hash64(&item.key.user_key); assert_eq!( item.value, @@ -942,62 +929,14 @@ fn table_partitioned_filter() -> crate::Result<()> { "filter TLI should exist" ); - assert_eq!( - b"a7", - &*table - .get(b"a", 8, BloomBuilder::get_hash(b"a"))? - .unwrap() - .value, - ); - assert_eq!( - b"a6", - &*table - .get(b"a", 7, BloomBuilder::get_hash(b"a"))? - .unwrap() - .value, - ); - assert_eq!( - b"a5", - &*table - .get(b"a", 6, BloomBuilder::get_hash(b"a"))? - .unwrap() - .value, - ); - assert_eq!( - b"a4", - &*table - .get(b"a", 5, BloomBuilder::get_hash(b"a"))? - .unwrap() - .value, - ); - assert_eq!( - b"a3", - &*table - .get(b"a", 4, BloomBuilder::get_hash(b"a"))? - .unwrap() - .value, - ); - assert_eq!( - b"b5", - &*table - .get(b"b", 6, BloomBuilder::get_hash(b"b"))? - .unwrap() - .value, - ); - assert_eq!( - b"c8", - &*table - .get(b"c", 9, BloomBuilder::get_hash(b"c"))? - .unwrap() - .value, - ); - assert_eq!( - b"d10", - &*table - .get(b"d", 11, BloomBuilder::get_hash(b"d"))? - .unwrap() - .value, - ); + assert_eq!(b"a7", &*table.get(b"a", 8, hash64(b"a"))?.unwrap().value,); + assert_eq!(b"a6", &*table.get(b"a", 7, hash64(b"a"))?.unwrap().value,); + assert_eq!(b"a5", &*table.get(b"a", 6, hash64(b"a"))?.unwrap().value,); + assert_eq!(b"a4", &*table.get(b"a", 5, hash64(b"a"))?.unwrap().value,); + assert_eq!(b"a3", &*table.get(b"a", 4, hash64(b"a"))?.unwrap().value,); + assert_eq!(b"b5", &*table.get(b"b", 6, hash64(b"b"))?.unwrap().value,); + assert_eq!(b"c8", &*table.get(b"c", 9, hash64(b"c"))?.unwrap().value,); + assert_eq!(b"d10", &*table.get(b"d", 11, hash64(b"d"))?.unwrap().value,); Ok(()) }, None, @@ -1531,62 +1470,14 @@ fn table_partitioned_index() -> crate::Result<()> { "should use partitioned index", ); - assert_eq!( - b"a7", - &*table - .get(b"a", 8, BloomBuilder::get_hash(b"a"))? - .unwrap() - .value, - ); - assert_eq!( - b"a6", - &*table - .get(b"a", 7, BloomBuilder::get_hash(b"a"))? - .unwrap() - .value, - ); - assert_eq!( - b"a5", - &*table - .get(b"a", 6, BloomBuilder::get_hash(b"a"))? - .unwrap() - .value, - ); - assert_eq!( - b"a4", - &*table - .get(b"a", 5, BloomBuilder::get_hash(b"a"))? - .unwrap() - .value, - ); - assert_eq!( - b"a3", - &*table - .get(b"a", 4, BloomBuilder::get_hash(b"a"))? - .unwrap() - .value, - ); - assert_eq!( - b"b5", - &*table - .get(b"b", 6, BloomBuilder::get_hash(b"b"))? - .unwrap() - .value, - ); - assert_eq!( - b"c8", - &*table - .get(b"c", 9, BloomBuilder::get_hash(b"c"))? - .unwrap() - .value, - ); - assert_eq!( - b"d10", - &*table - .get(b"d", 11, BloomBuilder::get_hash(b"d"))? - .unwrap() - .value, - ); + assert_eq!(b"a7", &*table.get(b"a", 8, hash64(b"a"))?.unwrap().value,); + assert_eq!(b"a6", &*table.get(b"a", 7, hash64(b"a"))?.unwrap().value,); + assert_eq!(b"a5", &*table.get(b"a", 6, hash64(b"a"))?.unwrap().value,); + assert_eq!(b"a4", &*table.get(b"a", 5, hash64(b"a"))?.unwrap().value,); + assert_eq!(b"a3", &*table.get(b"a", 4, hash64(b"a"))?.unwrap().value,); + assert_eq!(b"b5", &*table.get(b"b", 6, hash64(b"b"))?.unwrap().value,); + assert_eq!(b"c8", &*table.get(b"c", 9, hash64(b"c"))?.unwrap().value,); + assert_eq!(b"d10", &*table.get(b"d", 11, hash64(b"d"))?.unwrap().value,); Ok(()) } @@ -1637,19 +1528,9 @@ fn table_global_seqno() -> crate::Result<()> { .unwrap(); // global seqno is 7, so a1 is = 8 -> can not be read by snapshot=8 - assert!( - table - .get(b"a1", 8, BloomBuilder::get_hash(b"a1"))? - .is_none() - ); + assert!(table.get(b"a1", 8, hash64(b"a1"))?.is_none()); - assert_eq!( - b"a0", - &*table - .get(b"a0", 8, BloomBuilder::get_hash(b"a0"))? - .unwrap() - .value, - ); + assert_eq!(b"a0", &*table.get(b"a0", 8, hash64(b"a0"))?.unwrap().value,); Ok(()) } @@ -2135,8 +2016,8 @@ fn bloom_may_contain_key_full_filter() -> crate::Result<()> { test_with_table( &items, |table| { - let hash_a = BloomBuilder::get_hash(b"a"); - let hash_b = BloomBuilder::get_hash(b"b"); + let hash_a = hash64(b"a"); + let hash_b = hash64(b"b"); // Existing key: both methods must accept assert!( @@ -2183,7 +2064,7 @@ fn bloom_may_contain_key_partitioned_filter() -> crate::Result<()> { &items, |table| { // Key that exists: both methods must accept - let hash_exist = BloomBuilder::get_hash(b"key_0050"); + let hash_exist = hash64(b"key_0050"); assert!( table.bloom_may_contain_key(b"key_0050", hash_exist)?, "bloom must not reject existing key in partitioned filter" @@ -2193,7 +2074,7 @@ fn bloom_may_contain_key_partitioned_filter() -> crate::Result<()> { // seek finds no ceiling and must return Ok(false). // Note: pinned_filter_index is always loaded when filter_tli exists // (unconditional in Table::recover), so this is always the partition-aware path. - let hash_beyond = BloomBuilder::get_hash(b"zzz_beyond"); + let hash_beyond = hash64(b"zzz_beyond"); assert!( !table.bloom_may_contain_key(b"zzz_beyond", hash_beyond)?, "key beyond all partitions should be rejected when partition index is available" diff --git a/src/table/util.rs b/src/table/util.rs index 5bbd56574..e9c48d02b 100644 --- a/src/table/util.rs +++ b/src/table/util.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2025-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use super::{Block, BlockHandle, GlobalTableId}; use crate::{ diff --git a/src/table/writer/filter/full.rs b/src/table/writer/filter/full.rs index a0fa185b9..6ab5c54db 100644 --- a/src/table/writer/filter/full.rs +++ b/src/table/writer/filter/full.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use super::FilterWriter; use crate::{ @@ -9,7 +9,7 @@ use crate::{ config::BloomConstructionPolicy, encryption::EncryptionProvider, prefix::PrefixExtractor, - table::{Block, filter::standard_bloom::Builder}, + table::{Block, filter::build_burr_filter_bytes}, }; use std::sync::Arc; @@ -69,15 +69,15 @@ impl FilterWriter for FullFilterWriter { } fn register_key(&mut self, key: &UserKey) -> crate::Result<()> { - self.bloom_hash_buffer.push(Builder::get_hash(key)); + self.bloom_hash_buffer.push(crate::hash::hash64(key)); - // NOTE: Prefix hashes are intentionally not deduplicated — duplicate - // hashes set the same bloom bits (idempotent). This can significantly - // inflate the bloom entry count when many keys share few prefixes, but - // in exchange it lowers effective FPR and keeps construction simple. + // Prefix hashes are intentionally not deduplicated. The filter + // treats each hash as an independent membership token; duplicates + // inflate the entry count but keep construction simple and lower + // effective FPR slightly. if let Some(extractor) = &self.prefix_extractor { for prefix in extractor.prefixes(key.as_ref()) { - self.bloom_hash_buffer.push(Builder::get_hash(prefix)); + self.bloom_hash_buffer.push(crate::hash::hash64(prefix)); } } @@ -90,45 +90,49 @@ impl FilterWriter for FullFilterWriter { ) -> crate::Result { if self.bloom_hash_buffer.is_empty() { log::trace!("Filter writer has no buffered hashes - not building filter"); - } else { - file_writer.start("filter")?; - - let n = self.bloom_hash_buffer.len(); - - log::trace!( - "Constructing Bloom filter with {n} entries: {:?}", - self.bloom_policy, - ); - - let start = std::time::Instant::now(); - - let filter_bytes = { - let mut builder = self.bloom_policy.init(n); - - for hash in self.bloom_hash_buffer { - builder.set_with_hash(hash); - } - - builder.build() - }; - - log::trace!( - "Built Bloom filter ({}B) in {:?}", - filter_bytes.len(), - start.elapsed(), - ); - - Block::write_into( - file_writer, - &filter_bytes, - crate::table::block::BlockType::Filter, - CompressionType::None, - self.encryption.as_deref(), - #[cfg(zstd_any)] - None, - )?; + return Ok(0); } + let n = self.bloom_hash_buffer.len(); + + log::trace!( + "Constructing BuRR filter with {n} entries: {:?}", + self.bloom_policy, + ); + + let start = std::time::Instant::now(); + // Build BEFORE opening the archive section. An invalid policy + // can produce empty bytes; opening start("filter") and then + // bailing out would leave an empty unfinished section in the + // output and desynchronise the reported block count from what + // was actually written. + // `finish` consumes `Box`, so we can move `bloom_hash_buffer` + // into the BuRR builder directly — no `to_vec()` clone. + let filter_bytes = build_burr_filter_bytes(self.bloom_policy, self.bloom_hash_buffer)?; + + if filter_bytes.is_empty() { + log::trace!("BuRR policy produced empty filter — skipping block write"); + return Ok(0); + } + + file_writer.start("filter")?; + + log::trace!( + "Built BuRR filter ({}B) in {:?}", + filter_bytes.len(), + start.elapsed(), + ); + + Block::write_into( + file_writer, + &filter_bytes, + crate::table::block::BlockType::Filter, + CompressionType::None, + self.encryption.as_deref(), + #[cfg(zstd_any)] + None, + )?; + Ok(1) } } diff --git a/src/table/writer/filter/mod.rs b/src/table/writer/filter/mod.rs index 1a784f722..78f301b74 100644 --- a/src/table/writer/filter/mod.rs +++ b/src/table/writer/filter/mod.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation mod full; mod partitioned; diff --git a/src/table/writer/filter/partitioned.rs b/src/table/writer/filter/partitioned.rs index fa0933d4f..cb03c466a 100644 --- a/src/table/writer/filter/partitioned.rs +++ b/src/table/writer/filter/partitioned.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use super::FilterWriter; use crate::{ @@ -11,7 +11,7 @@ use crate::{ prefix::PrefixExtractor, table::{ Block, BlockHandle, BlockOffset, IndexBlock, KeyedBlockHandle, - block::Header as BlockHeader, filter::standard_bloom::Builder, + block::Header as BlockHeader, filter::build_burr_filter_bytes, }, }; use std::{ @@ -71,15 +71,39 @@ impl PartitionedFilterWriter { } fn spill_filter_partition(&mut self, key: &UserKey) -> crate::Result<()> { - let filter_bytes = { - let mut builder = self.bloom_policy.init(self.bloom_hash_buffer.len()); - - for hash in self.bloom_hash_buffer.drain(..) { - builder.set_with_hash(hash); - } - - builder.build() - }; + let hash_count = self.bloom_hash_buffer.len(); + let partition_index = self.tli_handles.len(); + // mem::replace (rather than mem::take) preserves the buffer's + // grown capacity for the next partition. `take` leaves a + // capacity-0 Vec behind, which would force a reallocation on + // every register_key call following a spill. Tables with many + // partitions can spill thousands of times during a single + // flush/compaction, so the saved reallocations matter on the + // write hot path. + let old_cap = self.bloom_hash_buffer.capacity(); + let hashes = std::mem::replace(&mut self.bloom_hash_buffer, Vec::with_capacity(old_cap)); + let filter_bytes = build_burr_filter_bytes(self.bloom_policy, hashes)?; + + // An empty BuRR build result means the policy is inactive for + // this key population (e.g. fpr <= 0 or bpk out of [1, 64]). + // For PARTITIONED filters, silently skipping a partition AND + // its TLI entry causes false negatives at read time: keys in + // this range would binary-search to a later partition's + // filter, which doesn't contain them, and Table::check_bloom + // would report "definitely not present" → false negative on a + // live key. + // + // Fail closed: return Unrecoverable so the writer aborts table + // creation rather than persisting a partially-filtered table. + // In practice this path is unreachable — BloomConstructionPolicy:: + // is_active() is checked upstream before any keys are buffered. + if filter_bytes.is_empty() { + log::error!( + "BuRR partitioned writer received empty filter bytes for partition {partition_index} \ + ({hash_count} hashes) — policy likely inactive (silent skip would cause false negatives)", + ); + return Err(crate::Error::Unrecoverable); + } let header = Block::write_into( &mut self.final_filter_buffer, @@ -104,12 +128,11 @@ impl PartitionedFilterWriter { )); log::trace!( - "Built Bloom filter partition ({}B) with end_key={key:?} at +{:#X?}", + "Built BuRR filter partition ({}B) with end_key={key:?} at +{:#X?}", filter_bytes.len(), self.relative_file_pos, ); - self.bloom_hash_buffer.clear(); self.approx_filter_size = 0; self.relative_file_pos += u64::from(bytes_written); @@ -196,7 +219,7 @@ impl FilterWriter for PartitionedFilterWri } fn register_key(&mut self, key: &UserKey) -> crate::Result<()> { - self.bloom_hash_buffer.push(Builder::get_hash(key)); + self.bloom_hash_buffer.push(crate::hash::hash64(key)); // NOTE: Prefix hashes are NOT inserted for partitioned filters. // Table::maybe_contains_prefix returns Ok(true) for partitioned/TLI diff --git a/src/table/writer/index/full.rs b/src/table/writer/index/full.rs index 66a0e1872..e4641a1c4 100644 --- a/src/table/writer/index/full.rs +++ b/src/table/writer/index/full.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use crate::{ CompressionType, diff --git a/src/table/writer/index/mod.rs b/src/table/writer/index/mod.rs index b2a4e1b41..fcc80b147 100644 --- a/src/table/writer/index/mod.rs +++ b/src/table/writer/index/mod.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation mod full; mod partitioned; diff --git a/src/table/writer/index/partitioned.rs b/src/table/writer/index/partitioned.rs index d0a979556..1841ff841 100644 --- a/src/table/writer/index/partitioned.rs +++ b/src/table/writer/index/partitioned.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use crate::{ CompressionType, diff --git a/src/table/writer/meta.rs b/src/table/writer/meta.rs index e64de0cf3..016929849 100644 --- a/src/table/writer/meta.rs +++ b/src/table/writer/meta.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use crate::{SeqNo, UserKey, table::BlockOffset}; diff --git a/src/table/writer/mod.rs b/src/table/writer/mod.rs index 5b02815ca..fbc1c04d1 100644 --- a/src/table/writer/mod.rs +++ b/src/table/writer/mod.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation mod filter; mod index; diff --git a/src/time.rs b/src/time.rs index 20d3c1dd0..59911832e 100644 --- a/src/time.rs +++ b/src/time.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation /// Gets the unix timestamp as a duration pub fn unix_timestamp() -> std::time::Duration { diff --git a/src/tree/ingest.rs b/src/tree/ingest.rs index 8e449f2c4..0787352e2 100644 --- a/src/tree/ingest.rs +++ b/src/tree/ingest.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use super::Tree; use crate::{ diff --git a/src/tree/inner.rs b/src/tree/inner.rs index 0fc38b53a..fcb74bc53 100644 --- a/src/tree/inner.rs +++ b/src/tree/inner.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use crate::{ SequenceNumberCounter, TableId, diff --git a/src/tree/mod.rs b/src/tree/mod.rs index 905c787e0..e3e02ea97 100644 --- a/src/tree/mod.rs +++ b/src/tree/mod.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation pub mod ingest; pub mod inner; @@ -861,8 +861,7 @@ impl AbstractTree for Tree { let miss_keys: Vec<(usize, u64)> = remaining .iter() .map(|&idx| { - let hash = - crate::table::filter::standard_bloom::Builder::get_hash(keys[idx].as_ref()); + let hash = crate::hash::hash64(keys[idx].as_ref()); (idx, hash) }) .collect(); @@ -1068,7 +1067,7 @@ impl Tree { } // Tables — Pinned (value shares decompressed block buffer) - let key_hash = crate::table::filter::standard_bloom::Builder::get_hash(key); + let key_hash = crate::hash::hash64(key); if let Some((entry, block)) = Self::get_internal_entry_with_block_from_tables( &super_version.version, @@ -1120,7 +1119,7 @@ impl Tree { ) -> crate::Result> { use crate::range::{IterState, TreeIter}; - let key_hash = crate::table::filter::standard_bloom::Builder::get_hash(key); + let key_hash = crate::hash::hash64(key); // NOTE: Slice::from(&[u8]) copies the key (small, typically < 100 bytes). // This runs once per merge resolution, not per-table — cost is negligible // compared to the I/O saved by partition-aware bloom filtering. @@ -1493,7 +1492,7 @@ impl Tree { seqno: SeqNo, comparator: &dyn crate::comparator::UserComparator, ) -> crate::Result> { - let key_hash = crate::table::filter::standard_bloom::Builder::get_hash(key); + let key_hash = crate::hash::hash64(key); Self::find_in_tables::(version, key, seqno, key_hash, comparator) } @@ -1822,7 +1821,7 @@ impl Tree { let reader = sfa::Reader::from_reader(&mut manifest_file)?; let manifest = Manifest::decode_from(&manifest_path, &reader, &*config.fs)?; - if !matches!(manifest.version, FormatVersion::V3 | FormatVersion::V4) { + if !matches!(manifest.version, FormatVersion::V5) { return Err(crate::Error::InvalidVersion(manifest.version.into())); } diff --git a/src/tree/sealed.rs b/src/tree/sealed.rs index 4673c4bef..689d4c73a 100644 --- a/src/tree/sealed.rs +++ b/src/tree/sealed.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use crate::{Memtable, tree::inner::MemtableId}; use std::sync::Arc; diff --git a/src/util.rs b/src/util.rs index f6931df5d..4a67205fa 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use crate::UserKey; use crate::range::prefix_upper_range; diff --git a/src/value.rs b/src/value.rs index 89fbe3d9b..35b423d9e 100644 --- a/src/value.rs +++ b/src/value.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use crate::{Slice, ValueType, key::InternalKey}; diff --git a/src/value_type.rs b/src/value_type.rs index a4656059e..e24fa4919 100644 --- a/src/value_type.rs +++ b/src/value_type.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation /// Value type (regular value or tombstone) #[derive(Copy, Clone, Debug, Eq, PartialEq)] diff --git a/src/verify.rs b/src/verify.rs index 7ff68a385..1015de60b 100644 --- a/src/verify.rs +++ b/src/verify.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use crate::{checksum::Checksum, table::TableId}; use std::path::PathBuf; diff --git a/src/version/mod.rs b/src/version/mod.rs index dc44ea51d..9480f7bfb 100644 --- a/src/version/mod.rs +++ b/src/version/mod.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation mod blob_file_list; mod optimize; @@ -712,7 +712,7 @@ impl Version { // writer.start("format_version")?; - writer.write_u8(FormatVersion::V4.into())?; + writer.write_u8(FormatVersion::V5.into())?; writer.start("crate_version")?; writer.write_all(env!("CARGO_PKG_VERSION").as_bytes())?; diff --git a/src/version/optimize.rs b/src/version/optimize.rs index 32a684244..1c195e2e6 100644 --- a/src/version/optimize.rs +++ b/src/version/optimize.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use super::run::Ranged; use crate::comparator::UserComparator; diff --git a/src/version/recovery.rs b/src/version/recovery.rs index 5258d338a..521f08864 100644 --- a/src/version/recovery.rs +++ b/src/version/recovery.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use crate::{ Checksum, SeqNo, TableId, TreeType, diff --git a/src/version/run.rs b/src/version/run.rs index e9160eed8..9ec598832 100644 --- a/src/version/run.rs +++ b/src/version/run.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use crate::KeyRange; use crate::comparator::UserComparator; diff --git a/src/version/super_version.rs b/src/version/super_version.rs index 0933acdb6..7c27dbf6b 100644 --- a/src/version/super_version.rs +++ b/src/version/super_version.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use crate::{ MAX_SEQNO, SeqNo, SharedSequenceNumberGenerator, diff --git a/src/vlog/accessor.rs b/src/vlog/accessor.rs index c0415e383..7b36b9d67 100644 --- a/src/vlog/accessor.rs +++ b/src/vlog/accessor.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use crate::{ Cache, GlobalTableId, TreeId, UserValue, diff --git a/src/vlog/blob_file/merge.rs b/src/vlog/blob_file/merge.rs index 6dd170acc..580b657d2 100644 --- a/src/vlog/blob_file/merge.rs +++ b/src/vlog/blob_file/merge.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use super::scanner::Scanner as BlobFileScanner; use crate::vlog::{BlobFileId, blob_file::scanner::ScanEntry}; diff --git a/src/vlog/blob_file/meta.rs b/src/vlog/blob_file/meta.rs index c1a68f9a0..a7a96d056 100644 --- a/src/vlog/blob_file/meta.rs +++ b/src/vlog/blob_file/meta.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use crate::{ CompressionType, InternalValue, KeyRange, SeqNo, Slice, diff --git a/src/vlog/blob_file/mod.rs b/src/vlog/blob_file/mod.rs index b738dbf9d..1ab26e280 100644 --- a/src/vlog/blob_file/mod.rs +++ b/src/vlog/blob_file/mod.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation pub mod merge; pub mod meta; diff --git a/src/vlog/blob_file/multi_writer.rs b/src/vlog/blob_file/multi_writer.rs index b43aeb779..47e0efd82 100644 --- a/src/vlog/blob_file/multi_writer.rs +++ b/src/vlog/blob_file/multi_writer.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use super::writer::Writer; use crate::fs::FsFile; diff --git a/src/vlog/blob_file/reader.rs b/src/vlog/blob_file/reader.rs index f753ab398..0fc5bb9b4 100644 --- a/src/vlog/blob_file/reader.rs +++ b/src/vlog/blob_file/reader.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation #[cfg(zstd_any)] use crate::compression::CompressionProvider as _; diff --git a/src/vlog/blob_file/scanner.rs b/src/vlog/blob_file/scanner.rs index c8e94281d..1090ea6df 100644 --- a/src/vlog/blob_file/scanner.rs +++ b/src/vlog/blob_file/scanner.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation // Format constants live in writer (the format definition site). // Extracting to a shared module is an upstream structural decision. diff --git a/src/vlog/blob_file/writer.rs b/src/vlog/blob_file/writer.rs index 8e3a0f732..abbbad871 100644 --- a/src/vlog/blob_file/writer.rs +++ b/src/vlog/blob_file/writer.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation #[cfg(zstd_any)] use crate::compression::CompressionProvider as _; diff --git a/src/vlog/handle.rs b/src/vlog/handle.rs index d0888147b..947c5f55e 100644 --- a/src/vlog/handle.rs +++ b/src/vlog/handle.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation use crate::{ coding::{Decode, Encode}, diff --git a/src/vlog/mod.rs b/src/vlog/mod.rs index 1e4d018f4..8c0e77b09 100644 --- a/src/vlog/mod.rs +++ b/src/vlog/mod.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation mod accessor; pub mod blob_file; diff --git a/src/write_batch.rs b/src/write_batch.rs index d6f2089ce..d8d1986a9 100644 --- a/src/write_batch.rs +++ b/src/write_batch.rs @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 // Copyright (c) 2024-present, fjall-rs -// This source code is licensed under both the Apache 2.0 and MIT License -// (found in the LICENSE-* files in the repository) +// Copyright (c) 2026-present, Structured World Foundation //! Write batch for bulk memtable insertion with shared seqno. //! diff --git a/tests/burr_filter_end_to_end.rs b/tests/burr_filter_end_to_end.rs new file mode 100644 index 000000000..0797e21fb --- /dev/null +++ b/tests/burr_filter_end_to_end.rs @@ -0,0 +1,71 @@ +//! End-to-end smoke test that exercises the BuRR filter through the +//! actual LSM table write + read path. +//! +//! Writes a table, flushes to disk, reopens via the same Config, and +//! verifies: +//! - every inserted key resolves via `tree.get` (no false negatives — +//! the filter must never report "definitely absent" for an +//! inserted key) +//! - unknown keys never resolve to `Some` (filter false positives +//! are acceptable — they trigger a wasted index lookup — but the +//! table read path must not return a value for a key we never +//! inserted) +//! +//! The integration value is that the table writer's BuRR builder, the +//! on-disk wire format, and the filter block reader all interoperate +//! without the BuRR-specific unit tests' shortcuts (those construct +//! filters in-process and probe in the same process). + +use lsm_tree::{AbstractTree, Config, SeqNo, SequenceNumberCounter, get_tmp_folder}; +use test_log::test; + +#[test] +fn burr_filter_persists_across_table_write_and_reopen() -> lsm_tree::Result<()> { + let folder = get_tmp_folder(); + let path = folder.path().to_owned(); + + let inserted: Vec<[u8; 8]> = (0_u64..5_000).map(u64::to_be_bytes).collect(); + + { + let seqno = SequenceNumberCounter::default(); + let tree = Config::new(&path, seqno.clone(), SequenceNumberCounter::default()).open()?; + for k in &inserted { + tree.insert(k, b"v", seqno.next()); + } + tree.flush_active_memtable(0)?; + tree.major_compact(u64::MAX, 0)?; + + for k in &inserted { + assert!( + tree.get(k, SeqNo::MAX)?.is_some(), + "inserted key {k:?} missing pre-reopen", + ); + } + } + + // Reopen — forces filter block reads from disk via the BuRR wire + // format. If the wire encoder / decoder disagree, point reads on + // the reopened tree return false negatives. + { + let seqno = SequenceNumberCounter::default(); + let tree = Config::new(&path, seqno.clone(), SequenceNumberCounter::default()).open()?; + for k in &inserted { + assert!( + tree.get(k, SeqNo::MAX)?.is_some(), + "inserted key {k:?} missing after reopen — BuRR FN regression", + ); + } + // Probe a disjoint key universe. False-positive hits cost CPU + // but must never cause a returned `Some` for a key we never + // inserted (the table read path falls through to the index + // block; FPR drives only false index lookups, not bad answers). + for i in 100_000_u64..101_000 { + assert!( + tree.get(i.to_be_bytes(), SeqNo::MAX)?.is_none(), + "unknown key {i} returned Some — index/key path bug", + ); + } + } + + Ok(()) +} diff --git a/tests/tree_recovery_versions.rs b/tests/tree_recovery_versions.rs index bc0fb7b84..0b6729b08 100644 --- a/tests/tree_recovery_versions.rs +++ b/tests/tree_recovery_versions.rs @@ -51,7 +51,7 @@ fn rewrite_manifest_format_version(path: &Path, version: u8) -> lsm_tree::Result } #[test] -fn tree_writes_v4_manifest_and_recovers_it() -> lsm_tree::Result<()> { +fn tree_writes_v5_manifest_and_recovers_it() -> lsm_tree::Result<()> { let folder = get_tmp_folder(); let path = folder.path(); @@ -66,7 +66,7 @@ fn tree_writes_v4_manifest_and_recovers_it() -> lsm_tree::Result<()> { tree.insert("a", "a", 0); tree.flush_active_memtable(0)?; - assert_eq!(4, read_manifest_format_version(path)?); + assert_eq!(5, read_manifest_format_version(path)?); } { @@ -78,43 +78,58 @@ fn tree_writes_v4_manifest_and_recovers_it() -> lsm_tree::Result<()> { .open()?; assert_eq!(Some("a".as_bytes().into()), tree.get("a", 1)?); - assert_eq!(4, read_manifest_format_version(path)?); + assert_eq!(5, read_manifest_format_version(path)?); } Ok(()) } #[test] -fn tree_recovers_safe_v3_manifest() -> lsm_tree::Result<()> { - let folder = get_tmp_folder(); - let path = folder.path(); - - { - let tree = Config::new( - path, - SequenceNumberCounter::default(), - SequenceNumberCounter::default(), - ) - .open()?; - - tree.insert("a", "a", 0); - tree.flush_active_memtable(0)?; - - assert_eq!(4, read_manifest_format_version(path)?); - rewrite_manifest_format_version(path, 3)?; - assert_eq!(3, read_manifest_format_version(path)?); - } +fn tree_rejects_pre_v5_manifest() -> lsm_tree::Result<()> { + // V5 introduced a wire-format break for filter blocks (BuRR replaces + // Bloom). V3/V4 tables on disk cannot be read by this version and + // vice versa — opening a manifest tagged with either pre-V5 version + // must fail with InvalidVersion at recovery time rather than + // silently misreading filter bytes later. We assert BOTH V3 and V4 + // explicitly so the boundary stays exact and a future "accept V4 if + // …" relaxation lights up the test rather than passing quietly. + for pre_v5 in [3_u8, 4_u8] { + let folder = get_tmp_folder(); + let path = folder.path(); + + { + let tree = Config::new( + path, + SequenceNumberCounter::default(), + SequenceNumberCounter::default(), + ) + .open()?; + + tree.insert("a", "a", 0); + tree.flush_active_memtable(0)?; + + assert_eq!(5, read_manifest_format_version(path)?); + rewrite_manifest_format_version(path, pre_v5)?; + assert_eq!(pre_v5, read_manifest_format_version(path)?); + } - { - let tree = Config::new( + let reopened = Config::new( path, SequenceNumberCounter::default(), SequenceNumberCounter::default(), ) - .open()?; - - assert_eq!(Some("a".as_bytes().into()), tree.get("a", 1)?); - assert_eq!(3, read_manifest_format_version(path)?); + .open(); + + match reopened { + Err(lsm_tree::Error::InvalidVersion(v)) => { + assert_eq!( + v, pre_v5, + "V{pre_v5} manifest must be rejected with the right version", + ); + } + Err(other) => panic!("expected InvalidVersion({pre_v5}), got: {other:?}"), + Ok(_) => panic!("V{pre_v5} manifest must be rejected by V5 binary"), + } } Ok(()) @@ -136,7 +151,7 @@ fn tree_rejects_unsupported_manifest_version() -> lsm_tree::Result<()> { tree.insert("a", "a", 0); tree.flush_active_memtable(0)?; - assert_eq!(4, read_manifest_format_version(path)?); + assert_eq!(5, read_manifest_format_version(path)?); rewrite_manifest_format_version(path, 99)?; assert_eq!(99, read_manifest_format_version(path)?); }