Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 19 additions & 11 deletions src/compaction/leveled/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -751,21 +751,29 @@ impl CompactionStrategy for Strategy {
// Include ALL L1 tables (we're emptying L1 into L2)
table_ids.extend(target_level.list_ids());

// Include overlapping L2 tables — query per input
// table range instead of one coarse aggregate (#72).
// Include overlapping L2 tables — query per merged
// interval instead of one coarse aggregate (#72).
// An aggregate across disjoint tables (e.g. [a,d] and
// [x,z] → [a,z]) covers gaps and pulls in L2 tables
// that don't actually overlap any input table.
//
// Per-table queries are O(L2_runs * input_tables *
// log L2_run_size). Merging input ranges into disjoint
// intervals first would reduce queries but adds
// complexity; not worth it for typical input sizes
// (~10–30 tables). See #120.
for run in l2.iter() {
for input_run in target_level.iter().chain(first_level.iter()) {
for t in input_run.iter() {
for l2t in run.get_overlapping_cmp(t.key_range(), cmp) {
// Merge input key ranges into disjoint intervals first
// to reduce redundant queries when L0 tables overlap
// (#122 Part 2). Sort by comparator-min, then coalesce.
{
let mut input_ranges: Vec<_> = target_level
.iter()
.chain(first_level.iter())
.flat_map(|run| run.iter())
.map(|t| t.key_range().clone())
.collect();
input_ranges.sort_by(|a, b| cmp.compare(a.min(), b.min()));

let merged = crate::KeyRange::merge_sorted_cmp(input_ranges, cmp);

for run in l2.iter() {
for interval in &merged {
for l2t in run.get_overlapping_cmp(interval, cmp) {
table_ids.insert(Table::id(l2t));
}
}
Expand Down
153 changes: 153 additions & 0 deletions src/key_range.rs
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,52 @@ impl KeyRange {
lo_included && hi_included
}

/// Merges sorted key ranges into disjoint intervals using a custom comparator.
///
/// Input ranges must be sorted by min key (in comparator order). Overlapping
/// or adjacent ranges are coalesced. Returns a `Vec` of non-overlapping
/// `KeyRange`s covering exactly the union of the inputs.
///
/// Used by multi-level compaction to reduce redundant L2 overlap queries
/// when L0 tables overlap (#122 Part 2).
#[must_use]
pub(crate) fn merge_sorted_cmp(
ranges: impl IntoIterator<Item = Self>,
cmp: &dyn crate::comparator::UserComparator,
) -> Vec<Self> {
let mut out: Vec<Self> = Vec::new();

#[cfg(debug_assertions)]
let mut prev_min: Option<UserKey> = None;

for r in ranges {
#[cfg(debug_assertions)]
{
debug_assert!(
prev_min
.as_ref()
.is_none_or(|pm| cmp.compare(pm, r.min()) != std::cmp::Ordering::Greater),
"merge_sorted_cmp: input ranges must be sorted by min key in comparator order",
);
prev_min = Some(r.min().clone());
}

if let Some(last) = out.last_mut() {
// Ranges overlap or are adjacent when last.max >= r.min
if cmp.compare(last.max(), r.min()) != std::cmp::Ordering::Less {
// Extend the current interval if r.max is beyond last.max
if cmp.compare(r.max(), last.max()) == std::cmp::Ordering::Greater {
last.1 = r.1;
}
continue;
}
}
out.push(r);
}

out
}

/// Aggregates a key range.
pub fn aggregate<'a>(mut iter: impl Iterator<Item = &'a Self>) -> Self {
let Some(first) = iter.next() else {
Expand Down Expand Up @@ -538,4 +584,111 @@ mod tests {
assert!(!key_range.contains_key(b"key5x"));
assert!(!key_range.contains_key(b"key6"));
}

mod merge_sorted_cmp {
use super::*;
use crate::comparator::{DefaultUserComparator, UserComparator};
use test_log::test;

#[test]
fn empty_input() {
let result = KeyRange::merge_sorted_cmp(vec![], &DefaultUserComparator);
assert!(result.is_empty());
}

#[test]
fn single_range() {
let input = vec![string_key_range("a", "d")];
let result = KeyRange::merge_sorted_cmp(input, &DefaultUserComparator);
assert_eq!(result, vec![string_key_range("a", "d")]);
}

#[test]
fn disjoint_ranges_stay_separate() {
let input = vec![
string_key_range("a", "d"),
string_key_range("f", "h"),
string_key_range("k", "z"),
];
let result = KeyRange::merge_sorted_cmp(input, &DefaultUserComparator);
assert_eq!(
result,
vec![
string_key_range("a", "d"),
string_key_range("f", "h"),
string_key_range("k", "z"),
]
);
}

#[test]
fn overlapping_ranges_merge() {
let input = vec![
string_key_range("a", "f"),
string_key_range("c", "h"),
string_key_range("g", "z"),
];
let result = KeyRange::merge_sorted_cmp(input, &DefaultUserComparator);
assert_eq!(result, vec![string_key_range("a", "z")]);
}

#[test]
fn adjacent_ranges_merge() {
// [a,d] and [d,f] touch at "d" — should merge
let input = vec![string_key_range("a", "d"), string_key_range("d", "f")];
let result = KeyRange::merge_sorted_cmp(input, &DefaultUserComparator);
assert_eq!(result, vec![string_key_range("a", "f")]);
}

#[test]
fn contained_range_absorbed() {
// [a,z] fully contains [c,d]
let input = vec![string_key_range("a", "z"), string_key_range("c", "d")];
let result = KeyRange::merge_sorted_cmp(input, &DefaultUserComparator);
assert_eq!(result, vec![string_key_range("a", "z")]);
}

#[test]
fn mixed_disjoint_and_overlapping() {
// Two clusters: [a,f]+[c,h] merge; [x,z] stays separate
let input = vec![
string_key_range("a", "f"),
string_key_range("c", "h"),
string_key_range("x", "z"),
];
let result = KeyRange::merge_sorted_cmp(input, &DefaultUserComparator);
assert_eq!(
result,
vec![string_key_range("a", "h"), string_key_range("x", "z")]
);
}

#[test]
fn reverse_comparator() {
struct ReverseCmp;
impl UserComparator for ReverseCmp {
fn name(&self) -> &'static str {
"reverse"
}
fn compare(&self, a: &[u8], b: &[u8]) -> std::cmp::Ordering {
b.cmp(a)
}
}

// In reverse order: z > y > ... > p > o > ... > a
// Sorted by comparator-min: [z,o], [o,k], [d,a]
// [z,o] and [o,k] touch at "o" → should merge to [z,k]
// [d,a] is separate
let input = vec![
string_key_range("z", "o"),
string_key_range("o", "k"),
string_key_range("d", "a"),
];
let result = KeyRange::merge_sorted_cmp(input, &ReverseCmp);
assert_eq!(
result,
vec![string_key_range("z", "k"), string_key_range("d", "a")]
);
}
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.
}
Loading