Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
dbb4ee4
feat: add optimized contains_prefix() method
polaz Mar 14, 2026
453e729
refactor(contains_prefix): accurate doc wording and test corrections
polaz Mar 14, 2026
c25e693
refactor(blob_tree): accurate contains_prefix override note
polaz Mar 14, 2026
1962eb5
perf: seqno-aware seek in data block point reads
polaz Mar 14, 2026
c52ec80
docs(test): clarify seqno snapshot visibility in test comment
polaz Mar 14, 2026
0513f33
docs(data_block): precise seek_to_key_seqno guarantees
polaz Mar 14, 2026
42d2c64
perf(data_block): single cmp in seek_to_key_seqno predicate
polaz Mar 14, 2026
cbf88d3
docs(test): describe restart_interval loop coverage
polaz Mar 14, 2026
1fddda0
perf(data_block): seqno-aware seek for iterator bounds
polaz Mar 15, 2026
2b0b265
refactor(data_block): dedup seek predicate, harden seqno tests
polaz Mar 15, 2026
95ae8ab
fix(docs): add backticks around identifiers in seek_to_key_seqno doc
polaz Mar 15, 2026
a03b0de
ci: add CoordiNode CI and upstream monitor workflows
polaz Mar 15, 2026
2462f33
docs: add maintained fork notice and support section
polaz Mar 15, 2026
d456379
ci: add dependabot configuration for cargo and actions
polaz Mar 15, 2026
68faa56
ci: add release-plz workflow for automated changelog and releases
polaz Mar 15, 2026
9bf3cf8
ci: split PR checks from full matrix, reduce PR to lint + ubuntu test
polaz Mar 15, 2026
3c7368c
Merge branch 'main' into feat/#138-optimized-containsprefix
polaz Mar 15, 2026
2f119dd
Merge branch 'main' into feat/#237-data-block-seqno-aware-seek
polaz Mar 15, 2026
994436c
fix: resolve all clippy warnings for strict -D warnings CI
polaz Mar 15, 2026
e16fce2
Merge remote-tracking branch 'origin/main' into fix/#2-clippy-warnings
polaz Mar 15, 2026
c21d272
fix(decompress): use runtime validation instead of debug_assert for b…
polaz Mar 15, 2026
cb85fd4
test(block): add corruption test for lz4 byte count validation
polaz Mar 15, 2026
a6a675a
test(vlog): add corruption test for lz4 blob reader byte count valida…
polaz Mar 15, 2026
8f8a154
fix(filter,vlog): guard zero-key division and use checked cast
polaz Mar 15, 2026
5607259
fix(test): use lz4_flex::compress instead of compress_prepend_size
polaz Mar 15, 2026
0376989
docs: add Copilot review instructions with scope and issue-suggestion…
polaz Mar 15, 2026
e967130
Merge remote-tracking branch 'origin/main' into fix/#2-clippy-warnings
polaz Mar 15, 2026
b22f937
ci: add Copilot code review instructions with scope rules
polaz Mar 15, 2026
a677f03
Merge remote-tracking branch 'origin/main' into fix/#2-clippy-warnings
polaz Mar 15, 2026
dbb763a
refactor: upgrade #[allow] to #[expect] with reasons on all suppressions
polaz Mar 15, 2026
5a0575e
docs(table): expand get_highest_seqno docstring, add mixed insert+ing…
polaz Mar 15, 2026
84562fa
Merge remote-tracking branch 'upstream/main'
polaz Mar 15, 2026
3f65399
refactor: compute add_size as usize, remove unreachable wildcard arms
polaz Mar 15, 2026
fc10b94
Merge branch 'main' into fix/#2-clippy-warnings
polaz Mar 15, 2026
364f366
Merge branch 'fix/#2-clippy-warnings' of github.com:structured-world/…
polaz Mar 15, 2026
1a7995a
fix(blob,block): use checked_add for read_len, document size cap scope
polaz Mar 15, 2026
0cee933
Merge pull request #12 from structured-world/fix/#2-clippy-warnings
polaz Mar 15, 2026
d811d02
Merge branch 'main' into docs/#265-seqno-docstring-and-test
polaz Mar 15, 2026
0ea0654
Merge branch 'main' into feat/#237-data-block-seqno-aware-seek
polaz Mar 15, 2026
80283a2
Merge branch 'main' into feat/#138-optimized-containsprefix
polaz Mar 15, 2026
cccff65
Merge pull request #14 from structured-world/docs/#265-seqno-docstrin…
polaz Mar 15, 2026
31fdb57
Merge branch 'main' into feat/#237-data-block-seqno-aware-seek
polaz Mar 15, 2026
b374e6d
Merge branch 'main' into feat/#138-optimized-containsprefix
polaz Mar 15, 2026
4d71fb1
fix: address review feedback on contains_prefix
polaz Mar 15, 2026
2590eb9
docs(data_block): document why reverse seeks accept but ignore seqno
polaz Mar 15, 2026
4a7d0ae
Merge pull request #6 from structured-world/feat/#138-optimized-conta…
polaz Mar 15, 2026
4c40606
Merge remote-tracking branch 'origin/main' into feat/#237-data-block-…
polaz Mar 15, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion src/table/data_block/iter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use crate::{
block::{Decoder, ParsedItem},
data_block::DataBlockParsedItem,
},
InternalValue,
InternalValue, SeqNo,
};

/// The data block iterator handles double-ended scans over a data block
Expand All @@ -34,6 +34,17 @@ impl<'a> Iter<'a> {
true
}

/// Seeks to the restart interval containing the target (needle, seqno) pair.
///
/// Exploits internal key ordering (user_key ASC, seqno DESC) to skip
/// restart intervals containing only versions newer than the target seqno.
Comment thread
polaz marked this conversation as resolved.
Outdated
pub fn seek_to_key_seqno(&mut self, needle: &[u8], seqno: SeqNo) -> bool {
self.decoder.inner_mut().seek(
|head_key, head_seqno| head_key < needle || (head_key == needle && head_seqno >= seqno),
Comment thread
polaz marked this conversation as resolved.
Outdated
false,
)
Comment thread
polaz marked this conversation as resolved.
}

pub fn seek(&mut self, needle: &[u8]) -> bool {
// Find the restart interval whose head key is the last one strictly below `needle`.
// The decoder then performs a linear scan within that interval; we stop as soon as we
Expand Down
130 changes: 123 additions & 7 deletions src/table/data_block/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,6 @@ impl DataBlock {
.map(|reader| reader.bucket_count())
}

// TODO: handle seqno more nicely (make Key generic, so we can do binary search over (key, seqno))
#[must_use]
pub fn point_read(&self, needle: &[u8], seqno: SeqNo) -> Option<InternalValue> {
let iter = if let Some(hash_index_reader) = self.get_hash_index_reader() {
Expand All @@ -416,10 +415,10 @@ impl DataBlock {
return None;
}
MARKER_CONFLICT => {
// NOTE: Fallback to binary search
// NOTE: Fallback to seqno-aware binary search
let mut iter = self.iter();

if !iter.seek(needle) {
if !iter.seek_to_key_seqno(needle, seqno) {
return None;
}

Expand All @@ -437,8 +436,9 @@ impl DataBlock {
} else {
let mut iter = self.iter();

// NOTE: Fallback to binary search
if !iter.seek(needle) {
// NOTE: Seqno-aware binary search skips restart intervals
// containing only versions newer than the target seqno
Comment thread
polaz marked this conversation as resolved.
Outdated
if !iter.seek_to_key_seqno(needle, seqno) {
return None;
}

Expand All @@ -449,14 +449,14 @@ impl DataBlock {
for item in iter {
match item.compare_key(needle, &self.inner.data) {
std::cmp::Ordering::Greater => {
// We are before our searched key/seqno
// We are past our searched key
return None;
}
std::cmp::Ordering::Equal => {
// If key is same as needle, check sequence number
}
std::cmp::Ordering::Less => {
// We are past our searched key
// We are before our searched key
continue;
}
}
Expand Down Expand Up @@ -1233,4 +1233,120 @@ mod tests {

Ok(())
}

#[test]
fn data_block_point_read_seqno_aware_seek() -> crate::Result<()> {
// Key "a" with seqno 5,4,3,2,1 — point_read("a", seqno=3) should return v3
Comment thread
polaz marked this conversation as resolved.
Outdated
let items = [
InternalValue::from_components(b"a", b"a5", 5, Value),
InternalValue::from_components(b"a", b"a4", 4, Value),
InternalValue::from_components(b"a", b"a3", 3, Value),
InternalValue::from_components(b"a", b"a2", 2, Value),
InternalValue::from_components(b"a", b"a1", 1, Value),
];

// With restart_interval=1, every item is a restart head,
// so seqno-aware binary search can skip directly to the target version
Comment thread
polaz marked this conversation as resolved.
Outdated
for restart_interval in 1..=4 {
let bytes = DataBlock::encode_into_vec(&items, restart_interval, 0.0)?;

let data_block = DataBlock::new(Block {
data: bytes.into(),
header: Header {
block_type: BlockType::Data,
checksum: Checksum::from_raw(0),
data_length: 0,
uncompressed_length: 0,
},
});

// seqno=4 → should see version with seqno=3 (first with seqno < 4)
assert_eq!(
Some(items[2].clone()),
data_block.point_read(b"a", 4),
"restart_interval={restart_interval}: seqno=4 should return v3",
);

// seqno=3 → should see version with seqno=2
assert_eq!(
Some(items[3].clone()),
data_block.point_read(b"a", 3),
"restart_interval={restart_interval}: seqno=3 should return v2",
);

// seqno=6 → should see latest version (seqno=5)
assert_eq!(
Some(items[0].clone()),
data_block.point_read(b"a", 6),
"restart_interval={restart_interval}: seqno=6 should return v5",
);

// seqno=1 → no visible version (all seqno >= 1)
assert!(
data_block.point_read(b"a", 1).is_none(),
"restart_interval={restart_interval}: seqno=1 should return None",
);

// Non-existent key
assert!(
data_block.point_read(b"b", SeqNo::MAX).is_none(),
"restart_interval={restart_interval}: key 'b' should not exist",
);
}

Ok(())
}

#[test]
fn data_block_point_read_seqno_aware_seek_mixed_keys() -> crate::Result<()> {
// Multiple keys with multiple versions
let items = [
InternalValue::from_components(b"a", b"a3", 3, Value),
InternalValue::from_components(b"a", b"a2", 2, Value),
InternalValue::from_components(b"a", b"a1", 1, Value),
InternalValue::from_components(b"b", b"b5", 5, Value),
InternalValue::from_components(b"b", b"b4", 4, Value),
InternalValue::from_components(b"b", b"b3", 3, Value),
InternalValue::from_components(b"b", b"b2", 2, Value),
InternalValue::from_components(b"b", b"b1", 1, Value),
InternalValue::from_components(b"c", b"c1", 1, Value),
];

for restart_interval in 1..=4 {
let bytes = DataBlock::encode_into_vec(&items, restart_interval, 0.0)?;

let data_block = DataBlock::new(Block {
data: bytes.into(),
header: Header {
block_type: BlockType::Data,
checksum: Checksum::from_raw(0),
data_length: 0,
uncompressed_length: 0,
},
});

// Read "b" at seqno=4 → should return version with seqno=3
assert_eq!(
Some(items[5].clone()),
data_block.point_read(b"b", 4),
"restart_interval={restart_interval}: b@4 should return b3",
);

// Read "a" at seqno=2 → should return version with seqno=1
assert_eq!(
Some(items[2].clone()),
data_block.point_read(b"a", 2),
"restart_interval={restart_interval}: a@2 should return a1",
);

// Read "c" at seqno=2 → should return version with seqno=1
assert_eq!(
Some(items[8].clone()),
data_block.point_read(b"c", 2),
"restart_interval={restart_interval}: c@2 should return c1",
);
}

Ok(())
}
}
Loading