diff --git a/Cargo.lock b/Cargo.lock index 19905559d92..78d0fdcb1bf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2271,6 +2271,16 @@ dependencies = [ "thiserror 2.0.18", ] +[[package]] +name = "gix-reftable" +version = "0.0.0" +dependencies = [ + "crc32fast", + "flate2", + "gix-hash", + "thiserror 2.0.18", +] + [[package]] name = "gix-revision" version = "0.42.0" diff --git a/Cargo.toml b/Cargo.toml index 2dac76743dc..52ec13f4ab0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -229,6 +229,7 @@ members = [ "gix-hash", "gix-validate", "gix-ref", + "gix-reftable", "gix-command", "gix-config", "gix-config-value", diff --git a/PLAN.md b/PLAN.md new file mode 100644 index 00000000000..37e18ca80c4 --- /dev/null +++ b/PLAN.md @@ -0,0 +1,104 @@ +# Reconciled Plan: Reftable Port + Integration + +## Branch Reality +As of 2026-03-18, branch `codex/reftable-port-sequence` does not match the original "one commit per step" execution plan. + +- The branch contains one reftable-only squash commit: `94793bb6fb` from 2026-03-03. +- That commit sits on top of `e8bf096c07`, which was `main` on 2026-03-03. +- Current `origin/main` is `8e47e0f00b`, so `git diff origin/main..HEAD` mixes this branch's work with unrelated upstream changes. +- To inspect only this branch's payload, compare `HEAD^..HEAD`. + +In other words, this branch currently implements the standalone `gix-reftable` port and tests, but it does not yet contain the planned `gix-ref`/`gix` backend integration work. + +## Reconciled Scope +Implemented on this branch: +- workspace wiring for `gix-reftable` +- low-level reftable primitives +- record encoding/decoding +- block, blocksource, and single-table reader support +- merged iteration helpers +- writer support +- stack transactions, compaction, reload, and fsck support +- upstream-style `u-reftable-*` parity tests +- selected `t0610`/`t0613`/`t0614` behavior tests + +Not implemented on this branch: +- backend-agnostic `gix-ref` store activation +- reftable-backed `gix-ref` adapter +- `gix` repository opening and runtime support for reftable refs +- cross-backend regression coverage for the integrated path +- user-facing documentation of landed support + +## Planned Sequence With Current Status +1. **`workspace: add gix-reftable crate skeleton and wire it into Cargo workspace`** + Status: completed, but folded into squash commit `94793bb6fb`. + +2. **`gix-reftable: port basics/constants/error/varint primitives from git/reftable`** + Status: completed, but folded into squash commit `94793bb6fb`. + +3. **`gix-reftable: implement record model and encode/decode parity (ref/log/obj/index)`** + Status: completed, but folded into squash commit `94793bb6fb`. + +4. **`gix-reftable: implement block + blocksource + table reader`** + Status: completed, but folded into squash commit `94793bb6fb`. + +5. **`gix-reftable: implement merged table iterators, pq, and tree helpers`** + Status: completed, but folded into squash commit `94793bb6fb`. + +6. **`gix-reftable: implement writer with limits/index emission/write options`** + Status: completed, but folded into squash commit `94793bb6fb`. + +7. **`gix-reftable: implement stack transactions, auto-compaction, reload, and fsck`** + Status: completed, but folded into squash commit `94793bb6fb`. + +8. **`gix-reftable/tests: port upstream u-reftable-* unit suites with 1:1 case mapping`** + Status: completed, but folded into squash commit `94793bb6fb`. + +9. **`gix-reftable/tests: add selected t0610/t0613/t0614 behavior parity integration tests`** + Status: completed, but folded into squash commit `94793bb6fb`. + +10. **`gix-ref: activate backend-agnostic store abstraction (files + reftable state)`** + Status: not implemented on this branch. + +11. **`gix-ref: add reftable-backed store adapter and route find/iter/transaction operations`** + Status: not implemented on this branch. + +12. **`gix: switch RefStore to backend-capable store and detect extensions.refStorage=reftable`** + Status: not implemented on this branch. + +13. **`gix: make reference iteration/peeling/fetch update paths backend-agnostic`** + Status: not implemented on this branch. + +14. **`tests: update reftable open/head expectations and add cross-backend regression coverage`** + Status: not implemented on this branch. + +15. **`docs/status: document reftable support, sha256 boundary, and update crate-status`** + Status: not implemented on this branch. + +## What Must Happen Next To Match The Original Plan +1. Recreate or rebase this branch on top of current `origin/main` instead of comparing it directly from the old 2026-03-03 base. +2. Decide whether steps 1 through 9 must be restored as nine reviewable commits or can remain as one squash commit with documented scope. +3. Implement steps 10 through 15 as follow-up commits. +4. Update the existing `gix` reftable-open test once end-to-end support is actually present. + +## Validation Guidance +For the work already present here, the relevant validation is: +- `gix-reftable` unit and behavior parity suites +- targeted workspace build/test coverage for the new crate wiring + +For the remaining planned work, validation should expand to: +- `gix-ref` targeted tests +- `gix` targeted repository/reference tests +- reftable fixture coverage in repository-open and reference workflows + +## Commit Message Rule For Remaining Work +Every remaining commit should still include: +- **Why now** +- **What changed** +- **Why this order** +- **What it unlocks next** + +## Assumptions +- Source parity target is Git's in-tree reftable C implementation and tests. +- `gix-reftable` supports SHA-1 and SHA-256 in isolation. +- End-to-end `gix` reftable support is still outstanding in this branch until steps 10 through 15 land. diff --git a/gix-reftable/CHANGELOG.md b/gix-reftable/CHANGELOG.md new file mode 100644 index 00000000000..32eae03e5b2 --- /dev/null +++ b/gix-reftable/CHANGELOG.md @@ -0,0 +1,5 @@ +# Changelog + +## Unreleased + +- Initial crate skeleton. diff --git a/gix-reftable/Cargo.toml b/gix-reftable/Cargo.toml new file mode 100644 index 00000000000..a09efd6ed42 --- /dev/null +++ b/gix-reftable/Cargo.toml @@ -0,0 +1,22 @@ +lints.workspace = true + +[package] +name = "gix-reftable" +version = "0.0.0" +repository = "https://github.com/GitoxideLabs/gitoxide" +license = "MIT OR Apache-2.0" +description = "Read and write Git reftable storage" +authors = ["Sebastian Thiel "] +edition = "2021" +include = ["src/**/*", "LICENSE-*"] +rust-version = "1.82" + +[lib] +doctest = false +test = true + +[dependencies] +crc32fast = "1.5.0" +flate2 = "1.1.5" +gix-hash = { version = "^0.22.1", path = "../gix-hash", features = ["sha1", "sha256"] } +thiserror = "2.0.18" diff --git a/gix-reftable/LICENSE-APACHE b/gix-reftable/LICENSE-APACHE new file mode 120000 index 00000000000..965b606f331 --- /dev/null +++ b/gix-reftable/LICENSE-APACHE @@ -0,0 +1 @@ +../LICENSE-APACHE \ No newline at end of file diff --git a/gix-reftable/LICENSE-MIT b/gix-reftable/LICENSE-MIT new file mode 120000 index 00000000000..76219eb72e8 --- /dev/null +++ b/gix-reftable/LICENSE-MIT @@ -0,0 +1 @@ +../LICENSE-MIT \ No newline at end of file diff --git a/gix-reftable/src/basics.rs b/gix-reftable/src/basics.rs new file mode 100644 index 00000000000..e4b55ab6d31 --- /dev/null +++ b/gix-reftable/src/basics.rs @@ -0,0 +1,171 @@ +use crate::error::Error; + +/// Hash identifiers used by reftable. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)] +pub enum HashId { + /// SHA-1 object IDs. + Sha1, + /// SHA-256 object IDs. + Sha256, +} + +impl HashId { + /// Return the byte-size of object IDs for this hash. + pub const fn size(self) -> usize { + match self { + HashId::Sha1 => 20, + HashId::Sha256 => 32, + } + } + + /// Return the [gix_hash::Kind] if this hash ID is supported by `gix-hash`. + pub const fn to_gix(self) -> gix_hash::Kind { + match self { + HashId::Sha1 => gix_hash::Kind::Sha1, + HashId::Sha256 => gix_hash::Kind::Sha256, + } + } +} + +/// Return the shared-prefix size between `a` and `b`. +pub fn common_prefix_size(a: &[u8], b: &[u8]) -> usize { + a.iter().zip(b.iter()).take_while(|(a, b)| a == b).count() +} + +/// Put a big-endian 64-bit integer into `out`. +pub fn put_be64(out: &mut [u8; 8], value: u64) { + *out = value.to_be_bytes(); +} + +/// Put a big-endian 32-bit integer into `out`. +pub fn put_be32(out: &mut [u8; 4], value: u32) { + *out = value.to_be_bytes(); +} + +/// Put a big-endian 24-bit integer into `out`. +pub fn put_be24(out: &mut [u8; 3], value: u32) { + out[0] = ((value >> 16) & 0xff) as u8; + out[1] = ((value >> 8) & 0xff) as u8; + out[2] = (value & 0xff) as u8; +} + +/// Put a big-endian 16-bit integer into `out`. +pub fn put_be16(out: &mut [u8; 2], value: u16) { + *out = value.to_be_bytes(); +} + +/// Read a big-endian 64-bit integer. +pub fn get_be64(input: &[u8; 8]) -> u64 { + u64::from_be_bytes(*input) +} + +/// Read a big-endian 32-bit integer. +pub fn get_be32(input: &[u8; 4]) -> u32 { + u32::from_be_bytes(*input) +} + +/// Read a big-endian 24-bit integer. +pub fn get_be24(input: &[u8; 3]) -> u32 { + ((input[0] as u32) << 16) | ((input[1] as u32) << 8) | (input[2] as u32) +} + +/// Read a big-endian 16-bit integer. +pub fn get_be16(input: &[u8; 2]) -> u16 { + u16::from_be_bytes(*input) +} + +/// Encode a reftable varint. +/// +/// The format is the same as reftable's/ofs-delta's encoding. +pub fn encode_varint(mut value: u64, out: &mut [u8; 10]) -> usize { + let mut tmp = [0u8; 10]; + let mut n = 0usize; + tmp[n] = (value & 0x7f) as u8; + n += 1; + while value >= 0x80 { + value = (value >> 7) - 1; + tmp[n] = 0x80 | (value & 0x7f) as u8; + n += 1; + } + // reverse + for (dst, src) in out.iter_mut().take(n).zip(tmp[..n].iter().rev()) { + *dst = *src; + } + n +} + +/// Decode a reftable varint from `input`. +/// +/// Returns `(value, consumed_bytes)`. +pub fn decode_varint(input: &[u8]) -> Result<(u64, usize), Error> { + if input.is_empty() { + return Err(Error::Truncated); + } + let mut i = 0usize; + let mut c = input[i]; + i += 1; + let mut value = u64::from(c & 0x7f); + while c & 0x80 != 0 { + if i >= input.len() { + return Err(Error::Truncated); + } + c = input[i]; + i += 1; + value = value + .checked_add(1) + .ok_or(Error::VarintOverflow)? + .checked_shl(7) + .ok_or(Error::VarintOverflow)? + .checked_add(u64::from(c & 0x7f)) + .ok_or(Error::VarintOverflow)?; + } + Ok((value, i)) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn hash_sizes() { + assert_eq!(HashId::Sha1.size(), 20); + assert_eq!(HashId::Sha256.size(), 32); + } + + #[test] + fn common_prefix() { + assert_eq!(common_prefix_size(b"refs/heads/a", b"refs/heads/b"), 11); + assert_eq!(common_prefix_size(b"x", b"y"), 0); + assert_eq!(common_prefix_size(b"", b"abc"), 0); + } + + #[test] + fn be_roundtrip() { + let mut be64 = [0u8; 8]; + put_be64(&mut be64, 0x0102_0304_0506_0708); + assert_eq!(get_be64(&be64), 0x0102_0304_0506_0708); + + let mut be32 = [0u8; 4]; + put_be32(&mut be32, 0x0102_0304); + assert_eq!(get_be32(&be32), 0x0102_0304); + + let mut be24 = [0u8; 3]; + put_be24(&mut be24, 0x01_02_03); + assert_eq!(get_be24(&be24), 0x01_02_03); + + let mut be16 = [0u8; 2]; + put_be16(&mut be16, 0x0102); + assert_eq!(get_be16(&be16), 0x0102); + } + + #[test] + fn varint_roundtrip() { + let mut storage = [0u8; 10]; + for value in [0, 1, 2, 126, 127, 128, 129, 16_384, u32::MAX as u64, u64::MAX] { + let n = encode_varint(value, &mut storage); + let (decoded, consumed) = decode_varint(&storage[..n]).expect("valid"); + assert_eq!(consumed, n); + assert_eq!(decoded, value); + } + } +} diff --git a/gix-reftable/src/block.rs b/gix-reftable/src/block.rs new file mode 100644 index 00000000000..dd63fce4124 --- /dev/null +++ b/gix-reftable/src/block.rs @@ -0,0 +1,306 @@ +use std::cmp::Ordering; + +use flate2::{FlushDecompress, Status}; + +use crate::{ + basics::{get_be16, get_be24}, + blocksource::BlockSource, + constants, + error::Error, + record::{decode_key, decode_key_len, Record}, +}; + +/// Size of the file header for a reftable `version`. +pub fn header_size(version: u8) -> Result { + match version { + 1 => Ok(24), + 2 => Ok(28), + _ => Err(Error::Malformed("unsupported reftable version")), + } +} + +/// Size of the file footer for a reftable `version`. +pub fn footer_size(version: u8) -> Result { + match version { + 1 => Ok(68), + 2 => Ok(72), + _ => Err(Error::Malformed("unsupported reftable version")), + } +} + +/// A decoded reftable block. +#[derive(Debug, Clone)] +pub struct Block { + /// Offset of a file header in this block (non-zero only for first block). + pub header_off: u32, + /// Decoded block bytes. + pub data: Vec, + /// Hash size used for records in this table. + pub hash_size: usize, + /// Number of restart points. + pub restart_count: u16, + /// Start of restart table (relative to this block start). + pub restart_off: u32, + /// Number of bytes consumed in the source file for this block. + pub full_block_size: u32, + /// Block type. + pub block_type: u8, +} + +impl Block { + /// Decode a block at `offset`. + /// + /// Returns `Ok(None)` when no block exists at `offset` or the type does not match `want_type`. + pub fn init( + source: &BlockSource, + offset: u64, + header_off: u32, + table_block_size: u32, + hash_size: usize, + want_type: u8, + ) -> Result, Error> { + let guess_block_size = if table_block_size > 0 { + table_block_size as usize + } else { + constants::DEFAULT_BLOCK_SIZE + }; + + let mut data = source.read(offset, guess_block_size as u32)?.to_vec(); + if data.is_empty() { + return Ok(None); + } + + let header_off_usize = header_off as usize; + if data.len() < header_off_usize + 4 { + return Err(Error::Truncated); + } + + let block_type = data[header_off_usize]; + if !is_block_type(block_type) { + return Err(Error::Malformed("invalid block type")); + } + if want_type != constants::BLOCK_TYPE_ANY && want_type != block_type { + return Ok(None); + } + + let mut block_size_buf = [0u8; 3]; + block_size_buf.copy_from_slice(&data[header_off_usize + 1..header_off_usize + 4]); + let block_size = get_be24(&block_size_buf) as usize; + if block_size < header_off_usize + 4 { + return Err(Error::Malformed("invalid block size")); + } + + if block_size > data.len() { + data = source.read(offset, block_size as u32)?.to_vec(); + } + + let (decoded_data, full_block_size) = if block_type == constants::BLOCK_TYPE_LOG { + let block_header_skip = header_off_usize + 4; + if block_size < block_header_skip || data.len() < block_header_skip { + return Err(Error::Malformed("invalid log block size")); + } + + let mut uncompressed = vec![0u8; block_size - block_header_skip]; + let mut decompressor = flate2::Decompress::new(true); + let status = decompressor + .decompress(&data[block_header_skip..], &mut uncompressed, FlushDecompress::Finish) + .map_err(|_| Error::Zlib)?; + if status != Status::StreamEnd || decompressor.total_out() as usize != uncompressed.len() { + return Err(Error::Zlib); + } + + let mut out = Vec::with_capacity(block_size); + out.extend_from_slice(&data[..block_header_skip]); + out.extend_from_slice(&uncompressed); + (out, (block_header_skip + decompressor.total_in() as usize) as u32) + } else { + if data.len() < block_size { + return Err(Error::Truncated); + } + let mut full_block_size = if table_block_size == 0 { + block_size as u32 + } else { + table_block_size + }; + if block_size < full_block_size as usize && block_size < data.len() && data[block_size] != 0 { + full_block_size = block_size as u32; + } + (data, full_block_size) + }; + + if decoded_data.len() < block_size { + return Err(Error::Truncated); + } + if block_size < 2 { + return Err(Error::Malformed("block too small")); + } + + let mut restart_count_buf = [0u8; 2]; + restart_count_buf.copy_from_slice(&decoded_data[block_size - 2..block_size]); + let restart_count = get_be16(&restart_count_buf); + let restart_off = block_size + .checked_sub(2 + 3 * restart_count as usize) + .ok_or(Error::Malformed("invalid restart table"))? as u32; + + Ok(Some(Self { + header_off, + data: decoded_data, + hash_size, + restart_count, + restart_off, + full_block_size, + block_type, + })) + } + + /// Returns the first key in this block. + pub fn first_key(&self) -> Result, Error> { + let mut key = Vec::new(); + let off = self.header_off as usize + 4; + let end = self.restart_off as usize; + if off >= end || end > self.data.len() { + return Err(Error::Malformed("block has no record payload")); + } + let (consumed, _extra) = decode_key(&mut key, &self.data[off..end])?; + if consumed == 0 || key.is_empty() { + return Err(Error::Malformed("invalid first key")); + } + Ok(key) + } + + fn restart_offset(&self, idx: usize) -> Result { + if idx >= self.restart_count as usize { + return Err(Error::Malformed("restart index out of bounds")); + } + let off = self.restart_off as usize + 3 * idx; + let mut buf = [0u8; 3]; + buf.copy_from_slice(&self.data[off..off + 3]); + Ok(get_be24(&buf)) + } +} + +/// Iterator over records in a single block. +#[derive(Debug, Clone)] +pub struct BlockIter { + pub(crate) block: Block, + next_off: u32, + last_key: Vec, +} + +impl BlockIter { + /// Initialize an iterator over `block` at the first record. + pub fn new(block: Block) -> Self { + Self { + next_off: block.header_off + 4, + block, + last_key: Vec::new(), + } + } + + /// Seek to the first key >= `want`. + pub fn seek_key(&mut self, want: &[u8]) -> Result<(), Error> { + let restart_index = self.find_first_restart_greater_than(want)?; + if restart_index > 0 { + self.next_off = self.block.restart_offset(restart_index - 1)?; + } else { + self.next_off = self.block.header_off + 4; + } + self.last_key.clear(); + + loop { + let prev_off = self.next_off; + let Some(record) = self.next_record()? else { + self.next_off = prev_off; + return Ok(()); + }; + + let key = record.key(); + if key.as_slice() >= want { + self.next_off = prev_off; + self.last_key = key; + return Ok(()); + } + } + } + + /// Decode and return the next record. + pub fn next_record(&mut self) -> Result, Error> { + if self.next_off >= self.block.restart_off { + return Ok(None); + } + + let start = self.next_off as usize; + let end = self.block.restart_off as usize; + if end > self.block.data.len() || start > end { + return Err(Error::Malformed("invalid record boundaries")); + } + + let input = &self.block.data[start..end]; + let (key_bytes_consumed, extra) = decode_key(&mut self.last_key, input)?; + if self.last_key.is_empty() { + return Err(Error::Malformed("empty record key")); + } + + let payload = &input[key_bytes_consumed..]; + let (record, payload_consumed) = Record::decode_consuming( + self.block.block_type, + &self.last_key, + extra, + payload, + self.block.hash_size, + )?; + + self.next_off = self + .next_off + .checked_add((key_bytes_consumed + payload_consumed) as u32) + .ok_or(Error::Malformed("offset overflow"))?; + + Ok(Some(record)) + } + + /// Access the currently iterated block. + pub fn block(&self) -> &Block { + &self.block + } + + fn find_first_restart_greater_than(&self, want: &[u8]) -> Result { + let mut low = 0usize; + let mut high = self.block.restart_count as usize; + + while low < high { + let mid = low + (high - low) / 2; + match self.restart_key_cmp(mid, want)? { + Ordering::Greater => high = mid, + Ordering::Equal | Ordering::Less => low = mid + 1, + } + } + + Ok(low) + } + + fn restart_key_cmp(&self, idx: usize, want: &[u8]) -> Result { + let off = self.block.restart_offset(idx)? as usize; + let restart_off = self.block.restart_off as usize; + if off >= restart_off { + return Err(Error::Malformed("restart points outside payload")); + } + let in_block = &self.block.data[off..restart_off]; + + let (prefix_len, suffix_len, _extra, consumed) = decode_key_len(in_block)?; + if prefix_len != 0 { + return Err(Error::Malformed("restart key must have empty prefix")); + } + if in_block.len().saturating_sub(consumed) < suffix_len { + return Err(Error::Truncated); + } + let key = &in_block[consumed..consumed + suffix_len]; + Ok(key.cmp(want)) + } +} + +fn is_block_type(typ: u8) -> bool { + matches!( + typ, + constants::BLOCK_TYPE_REF | constants::BLOCK_TYPE_LOG | constants::BLOCK_TYPE_OBJ | constants::BLOCK_TYPE_INDEX + ) +} diff --git a/gix-reftable/src/blocksource.rs b/gix-reftable/src/blocksource.rs new file mode 100644 index 00000000000..3c9e516feff --- /dev/null +++ b/gix-reftable/src/blocksource.rs @@ -0,0 +1,46 @@ +use std::{path::Path, sync::Arc}; + +use crate::error::Error; + +/// In-memory source for reading reftable blocks. +#[derive(Clone, Debug)] +pub struct BlockSource { + data: Arc<[u8]>, +} + +impl BlockSource { + /// Open a block source from the given file. + pub fn from_file(path: impl AsRef) -> Result { + let data = std::fs::read(path)?; + Ok(Self { + data: Arc::from(data.into_boxed_slice()), + }) + } + + /// Create a source from owned bytes. + pub fn from_bytes(data: Vec) -> Self { + Self { + data: Arc::from(data.into_boxed_slice()), + } + } + + /// Size in bytes. + pub fn size(&self) -> u64 { + self.data.len() as u64 + } + + /// Read a byte range. + pub fn read(&self, offset: u64, size: u32) -> Result<&[u8], Error> { + let start = usize::try_from(offset).map_err(|_| Error::Malformed("offset overflow"))?; + if start >= self.data.len() { + return Ok(&[]); + } + let end = start.saturating_add(size as usize).min(self.data.len()); + Ok(&self.data[start..end]) + } + + /// Access raw bytes. + pub fn bytes(&self) -> &[u8] { + &self.data + } +} diff --git a/gix-reftable/src/constants.rs b/gix-reftable/src/constants.rs new file mode 100644 index 00000000000..75466bb84fe --- /dev/null +++ b/gix-reftable/src/constants.rs @@ -0,0 +1,31 @@ +/// Reftable block type used for log records. +pub const BLOCK_TYPE_LOG: u8 = b'g'; +/// Reftable block type used for index records. +pub const BLOCK_TYPE_INDEX: u8 = b'i'; +/// Reftable block type used for ref records. +pub const BLOCK_TYPE_REF: u8 = b'r'; +/// Reftable block type used for object-index records. +pub const BLOCK_TYPE_OBJ: u8 = b'o'; +/// Wildcard block type. +pub const BLOCK_TYPE_ANY: u8 = 0; + +/// Ref record value type for tombstones. +pub const REF_VAL_DELETION: u8 = 0; +/// Ref record value type for direct object ids. +pub const REF_VAL_VAL1: u8 = 1; +/// Ref record value type for direct+peeled object ids. +pub const REF_VAL_VAL2: u8 = 2; +/// Ref record value type for symbolic refs. +pub const REF_VAL_SYMREF: u8 = 3; + +/// Log record value type for tombstones. +pub const LOG_VAL_DELETION: u8 = 0; +/// Log record value type for updates. +pub const LOG_VAL_UPDATE: u8 = 1; + +/// Default reftable block size. +pub const DEFAULT_BLOCK_SIZE: usize = 4096; +/// Maximum reftable block size. +pub const MAX_BLOCK_SIZE: usize = 16_777_215; +/// Maximum restart interval per block. +pub const MAX_RESTART_INTERVAL: usize = u16::MAX as usize; diff --git a/gix-reftable/src/error.rs b/gix-reftable/src/error.rs new file mode 100644 index 00000000000..36c4886c9cf --- /dev/null +++ b/gix-reftable/src/error.rs @@ -0,0 +1,25 @@ +/// Errors produced by reftable parsing and encoding. +#[derive(Debug, thiserror::Error)] +pub enum Error { + /// I/O failure while accessing block data. + #[error(transparent)] + Io(#[from] std::io::Error), + /// Input ended unexpectedly. + #[error("input ended unexpectedly")] + Truncated, + /// Data has an invalid checksum. + #[error("checksum mismatch")] + ChecksumMismatch, + /// API misuse by caller. + #[error("api error: {0}")] + Api(&'static str), + /// A compressed log block could not be decoded. + #[error("invalid compressed log block")] + Zlib, + /// A varint could not be represented in `u64`. + #[error("varint overflow")] + VarintOverflow, + /// Input data is malformed. + #[error("malformed data: {0}")] + Malformed(&'static str), +} diff --git a/gix-reftable/src/lib.rs b/gix-reftable/src/lib.rs new file mode 100644 index 00000000000..4a1819afdee --- /dev/null +++ b/gix-reftable/src/lib.rs @@ -0,0 +1,31 @@ +//! Read and write Git reftables. +//! +//! This crate provides a Rust implementation of Git's reftable storage format. + +#![deny(missing_docs, rust_2018_idioms)] +#![forbid(unsafe_code)] + +/// +pub mod basics; +/// +pub mod block; +/// +pub mod blocksource; +/// +pub mod constants; +/// +pub mod error; +/// +pub mod merged; +/// +pub mod pq; +/// +pub mod record; +/// +pub mod stack; +/// +pub mod table; +/// +pub mod tree; +/// +pub mod writer; diff --git a/gix-reftable/src/merged.rs b/gix-reftable/src/merged.rs new file mode 100644 index 00000000000..2df9b80fc95 --- /dev/null +++ b/gix-reftable/src/merged.rs @@ -0,0 +1,254 @@ +use crate::{ + basics::HashId, + constants, + error::Error, + pq::{MergedIterPQueue, PqEntry}, + record::Record, + table::{Table, TableIter}, +}; + +/// A merged view over multiple tables, typically oldest to newest. +#[derive(Debug, Clone)] +pub struct MergedTable { + /// Source tables. + pub tables: Vec, + /// Hash in use by all tables. + pub hash_id: HashId, + /// Smallest update index across all tables. + pub min_update_index: u64, + /// Largest update index across all tables. + pub max_update_index: u64, + /// Whether deletions should be filtered while iterating. + pub suppress_deletions: bool, +} + +impl MergedTable { + /// Create a merged table from `tables`. + pub fn new(tables: Vec
) -> Result { + let mut hash_id = HashId::Sha1; + let mut min_update_index = 0; + let mut max_update_index = 0; + + for (idx, table) in tables.iter().enumerate() { + if idx == 0 { + hash_id = table.hash_id; + min_update_index = table.min_update_index; + max_update_index = table.max_update_index; + } else { + if table.hash_id != hash_id { + return Err(Error::Malformed("all merged tables must share hash id")); + } + min_update_index = min_update_index.min(table.min_update_index); + max_update_index = max_update_index.max(table.max_update_index); + } + } + + Ok(Self { + tables, + hash_id, + min_update_index, + max_update_index, + suppress_deletions: false, + }) + } + + /// Create an iterator over merged refs. + pub fn ref_iter(&self) -> Result, Error> { + self.iter(constants::BLOCK_TYPE_REF) + } + + /// Create an iterator over merged logs. + pub fn log_iter(&self) -> Result, Error> { + self.iter(constants::BLOCK_TYPE_LOG) + } + + /// Create an iterator over records of the given block type. + pub fn iter(&self, typ: u8) -> Result, Error> { + MergedIter::new(self, typ) + } +} + +/// Iterator over merged table records. +pub struct MergedIter<'a> { + subiters: Vec>, + pq: MergedIterPQueue, + suppress_deletions: bool, +} + +impl<'a> MergedIter<'a> { + fn new(table: &'a MergedTable, typ: u8) -> Result { + let mut subiters = Vec::with_capacity(table.tables.len()); + for t in &table.tables { + subiters.push(t.iter(typ)?); + } + + let mut out = Self { + subiters, + pq: MergedIterPQueue::default(), + suppress_deletions: table.suppress_deletions, + }; + out.rebuild_pq()?; + Ok(out) + } + + fn rebuild_pq(&mut self) -> Result<(), Error> { + self.pq.clear(); + for idx in 0..self.subiters.len() { + self.advance_subiter(idx)?; + } + Ok(()) + } + + fn advance_subiter(&mut self, idx: usize) -> Result<(), Error> { + if let Some(record) = self.subiters[idx].next_record()? { + self.pq.push(PqEntry { index: idx, record }); + } + Ok(()) + } + + /// Seek all subiterators to `key`. + pub fn seek_key(&mut self, key: &[u8]) -> Result<(), Error> { + for subiter in &mut self.subiters { + subiter.seek_key(key)?; + } + self.rebuild_pq() + } + + /// Return the next merged record. + pub fn next_record(&mut self) -> Result, Error> { + loop { + let Some(entry) = self.pq.pop() else { + return Ok(None); + }; + + self.advance_subiter(entry.index)?; + + while let Some(top) = self.pq.peek() { + if top.record.cmp_key(&entry.record)? != std::cmp::Ordering::Equal { + break; + } + let dup = self.pq.pop().expect("just peeked"); + self.advance_subiter(dup.index)?; + } + + if self.suppress_deletions && entry.record.is_deletion() { + continue; + } + return Ok(Some(entry.record)); + } + } +} + +#[cfg(test)] +mod tests { + use std::{ + collections::BTreeSet, + fs, + path::{Path, PathBuf}, + process::Command, + time::{SystemTime, UNIX_EPOCH}, + }; + + use crate::{constants, record::Record}; + + use super::{MergedTable, Table}; + + struct TempDir { + path: PathBuf, + } + + impl TempDir { + fn new() -> Self { + let stamp = SystemTime::now() + .duration_since(UNIX_EPOCH) + .expect("valid time") + .as_nanos(); + let path = std::env::temp_dir().join(format!("gix-reftable-merged-{stamp}")); + fs::create_dir_all(&path).expect("temp dir"); + Self { path } + } + } + + impl Drop for TempDir { + fn drop(&mut self) { + let _ = fs::remove_dir_all(&self.path); + } + } + + fn run(cwd: &Path, args: &[&str]) { + let status = Command::new(args[0]) + .args(&args[1..]) + .current_dir(cwd) + .status() + .expect("command executed"); + assert!(status.success(), "command failed: {args:?}"); + } + + fn create_table() -> Option<(TempDir, Table)> { + let tmp = TempDir::new(); + let source = tmp.path.join("source"); + let clone = tmp.path.join("clone"); + fs::create_dir_all(&source).expect("source dir"); + + run(&source, &["git", "init", "-q"]); + run(&source, &["git", "config", "user.name", "committer"]); + run(&source, &["git", "config", "user.email", "committer@example.com"]); + run(&source, &["git", "config", "commit.gpgSign", "false"]); + fs::write(source.join("file"), "hello\n").expect("write file"); + run(&source, &["git", "add", "file"]); + run(&source, &["git", "commit", "-q", "-m", "c1"]); + let clone_status = Command::new("git") + .args(["clone", "--ref-format=reftable"]) + .arg(source.to_str().expect("utf-8 path")) + .arg(clone.to_str().expect("utf-8 path")) + .current_dir(&tmp.path) + .status() + .ok()?; + if !clone_status.success() { + return None; + } + + let list = fs::read_to_string(clone.join(".git/reftable/tables.list")).ok()?; + let table_name = list.lines().next()?; + let table = Table::open(clone.join(".git/reftable").join(table_name)).ok()?; + Some((tmp, table)) + } + + #[test] + fn merged_iterator_deduplicates_by_key_with_recency_preference() { + let Some((_tmp, table)) = create_table() else { + return; + }; + let merged = MergedTable::new(vec![table.clone(), table]).expect("merged"); + + let mut iter = merged.ref_iter().expect("iter"); + let mut ref_names = Vec::new(); + while let Some(rec) = iter.next_record().expect("next") { + let Record::Ref(rec) = rec else { + panic!("expected ref"); + }; + ref_names.push(rec.refname); + } + + let unique_names = ref_names.iter().collect::>(); + assert_eq!(unique_names.len(), ref_names.len()); + assert!(ref_names.iter().any(|name| name == "HEAD")); + assert!(ref_names.iter().any(|name| name.starts_with("refs/heads/"))); + } + + #[test] + fn merged_seek_key() { + let Some((_tmp, table)) = create_table() else { + return; + }; + let merged = MergedTable::new(vec![table.clone(), table]).expect("merged"); + + let mut iter = merged.iter(constants::BLOCK_TYPE_REF).expect("iter"); + iter.seek_key(b"refs/heads/").expect("seek"); + let rec = iter.next_record().expect("next").expect("record"); + let Record::Ref(rec) = rec else { + panic!("expected ref"); + }; + assert!(rec.refname.starts_with("refs/heads/")); + } +} diff --git a/gix-reftable/src/pq.rs b/gix-reftable/src/pq.rs new file mode 100644 index 00000000000..5bc50e9bbd3 --- /dev/null +++ b/gix-reftable/src/pq.rs @@ -0,0 +1,110 @@ +use std::{cmp::Ordering, collections::BinaryHeap}; + +use crate::{error::Error, record::Record}; + +/// Entry in merged-table priority queues. +#[derive(Debug, Clone)] +pub struct PqEntry { + /// Sub-iterator index. + pub index: usize, + /// Current record at this iterator head. + pub record: Record, +} + +impl PqEntry { + fn try_cmp(&self, other: &Self) -> Result { + let key_cmp = self.record.cmp_key(&other.record)?; + Ok(match key_cmp { + Ordering::Less => Ordering::Greater, + Ordering::Greater => Ordering::Less, + Ordering::Equal => self.index.cmp(&other.index), + }) + } +} + +impl PartialEq for PqEntry { + fn eq(&self, other: &Self) -> bool { + self.index == other.index && self.record == other.record + } +} + +impl Eq for PqEntry {} + +impl PartialOrd for PqEntry { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for PqEntry { + fn cmp(&self, other: &Self) -> Ordering { + self.try_cmp(other).unwrap_or(Ordering::Equal) + } +} + +/// Priority queue for merged iteration. +#[derive(Default, Debug, Clone)] +pub struct MergedIterPQueue { + heap: BinaryHeap, +} + +impl MergedIterPQueue { + /// Add an entry. + pub fn push(&mut self, entry: PqEntry) { + self.heap.push(entry); + } + + /// Pop top entry. + pub fn pop(&mut self) -> Option { + self.heap.pop() + } + + /// Peek top entry. + pub fn peek(&self) -> Option<&PqEntry> { + self.heap.peek() + } + + /// True if empty. + pub fn is_empty(&self) -> bool { + self.heap.is_empty() + } + + /// Clear all entries. + pub fn clear(&mut self) { + self.heap.clear(); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::record::{RefRecord, RefValue}; + + fn ref_record(name: &str, index: usize) -> PqEntry { + PqEntry { + index, + record: Record::Ref(RefRecord { + refname: name.into(), + update_index: index as u64, + value: RefValue::Deletion, + }), + } + } + + #[test] + fn ordering_matches_reftable_semantics() { + let mut pq = MergedIterPQueue::default(); + pq.push(ref_record("refs/heads/b", 0)); + pq.push(ref_record("refs/heads/a", 1)); + pq.push(ref_record("refs/heads/a", 0)); + + let first = pq.pop().expect("first"); + let second = pq.pop().expect("second"); + let third = pq.pop().expect("third"); + + // key order first, then prefer larger subtable index for equal keys. + assert_eq!(first.index, 1); + assert_eq!(second.index, 0); + assert_eq!(third.record.key(), b"refs/heads/b".to_vec()); + } +} diff --git a/gix-reftable/src/record.rs b/gix-reftable/src/record.rs new file mode 100644 index 00000000000..b2308627b53 --- /dev/null +++ b/gix-reftable/src/record.rs @@ -0,0 +1,766 @@ +use std::cmp::Ordering; + +use crate::{ + basics::{common_prefix_size, decode_varint, encode_varint, get_be16, get_be64, put_be16, put_be64}, + constants, + error::Error, +}; + +/// Variants of values stored in [`RefRecord`]. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum RefValue { + /// Tombstone entry. + Deletion, + /// A single object id. + Val1(Vec), + /// A peeled tag with object and target object ids. + Val2 { + /// Direct value. + value: Vec, + /// Peeled target value. + target_value: Vec, + }, + /// Symbolic reference. + Symref(String), +} + +/// A reference record (`r` block). +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct RefRecord { + /// Full refname. + pub refname: String, + /// Logical update index. + pub update_index: u64, + /// Associated value. + pub value: RefValue, +} + +/// Variants of values stored in [`LogRecord`]. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum LogValue { + /// Tombstone entry. + Deletion, + /// Standard reflog update. + Update { + /// Previous object id. + old_hash: Vec, + /// New object id. + new_hash: Vec, + /// Committer name. + name: String, + /// Committer email. + email: String, + /// Commit time (seconds since epoch). + time: u64, + /// Timezone offset in minutes. + tz_offset: i16, + /// Reflog message. + message: String, + }, +} + +impl LogValue { + fn update( + old_hash: Vec, + new_hash: Vec, + name: String, + email: String, + time: u64, + tz_offset: i16, + message: String, + ) -> Self { + Self::Update { + old_hash, + new_hash, + name, + email, + time, + tz_offset, + message, + } + } +} + +/// A reflog record (`g` block). +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct LogRecord { + /// Full refname. + pub refname: String, + /// Logical update index. + pub update_index: u64, + /// Associated value. + pub value: LogValue, +} + +/// Object index record (`o` block). +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ObjRecord { + /// Prefix of an object id. + pub hash_prefix: Vec, + /// Absolute offsets of referenced ref blocks. + pub offsets: Vec, +} + +/// Secondary index record (`i` block). +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct IndexRecord { + /// Last key in the indexed block. + pub last_key: Vec, + /// Offset of the indexed block. + pub offset: u64, +} + +/// Any typed record stored in blocks. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Record { + /// Reference record. + Ref(RefRecord), + /// Reflog record. + Log(LogRecord), + /// Object index record. + Obj(ObjRecord), + /// Secondary index record. + Index(IndexRecord), +} + +impl Record { + /// Return block type of this record. + pub fn block_type(&self) -> u8 { + match self { + Record::Ref(_) => constants::BLOCK_TYPE_REF, + Record::Log(_) => constants::BLOCK_TYPE_LOG, + Record::Obj(_) => constants::BLOCK_TYPE_OBJ, + Record::Index(_) => constants::BLOCK_TYPE_INDEX, + } + } + + /// Return record value subtype (3-bit `extra`). + pub fn val_type(&self) -> u8 { + match self { + Record::Ref(r) => match r.value { + RefValue::Deletion => constants::REF_VAL_DELETION, + RefValue::Val1(_) => constants::REF_VAL_VAL1, + RefValue::Val2 { .. } => constants::REF_VAL_VAL2, + RefValue::Symref(_) => constants::REF_VAL_SYMREF, + }, + Record::Log(l) => match l.value { + LogValue::Deletion => constants::LOG_VAL_DELETION, + LogValue::Update { .. } => constants::LOG_VAL_UPDATE, + }, + Record::Obj(o) => { + let len = o.offsets.len(); + if (1..8).contains(&len) { + len as u8 + } else { + 0 + } + } + Record::Index(_) => 0, + } + } + + /// Returns true if this is a tombstone/deletion record. + pub fn is_deletion(&self) -> bool { + matches!( + self, + Record::Ref(RefRecord { + value: RefValue::Deletion, + .. + }) | Record::Log(LogRecord { + value: LogValue::Deletion, + .. + }) + ) + } + + /// Produce sort key bytes. + pub fn key(&self) -> Vec { + match self { + Record::Ref(r) => r.refname.as_bytes().to_vec(), + Record::Log(l) => { + let mut out = Vec::with_capacity(l.refname.len() + 1 + 8); + out.extend_from_slice(l.refname.as_bytes()); + out.push(0); + let mut ts = [0u8; 8]; + put_be64(&mut ts, u64::MAX - l.update_index); + out.extend_from_slice(&ts); + out + } + Record::Obj(o) => o.hash_prefix.clone(), + Record::Index(i) => i.last_key.clone(), + } + } + + /// Encode record value bytes. + pub fn encode(&self, hash_size: usize) -> Result, Error> { + match self { + Record::Ref(r) => encode_ref_record(r, hash_size), + Record::Log(l) => encode_log_record(l, hash_size), + Record::Obj(o) => encode_obj_record(o), + Record::Index(i) => encode_index_record(i), + } + } + + /// Decode a record from type, key, val-type and value payload. + pub fn decode(block_type: u8, key: &[u8], val_type: u8, payload: &[u8], hash_size: usize) -> Result { + let (record, consumed) = Self::decode_consuming(block_type, key, val_type, payload, hash_size)?; + if consumed != payload.len() { + return Err(Error::Malformed("unexpected trailing bytes in record")); + } + Ok(record) + } + + /// Decode a record and return consumed payload bytes. + pub fn decode_consuming( + block_type: u8, + key: &[u8], + val_type: u8, + payload: &[u8], + hash_size: usize, + ) -> Result<(Self, usize), Error> { + match block_type { + constants::BLOCK_TYPE_REF => { + let (record, consumed) = decode_ref_record(key, val_type, payload, hash_size)?; + Ok((Record::Ref(record), consumed)) + } + constants::BLOCK_TYPE_LOG => { + let (record, consumed) = decode_log_record(key, val_type, payload, hash_size)?; + Ok((Record::Log(record), consumed)) + } + constants::BLOCK_TYPE_OBJ => { + let (record, consumed) = decode_obj_record(key, val_type, payload)?; + Ok((Record::Obj(record), consumed)) + } + constants::BLOCK_TYPE_INDEX => { + let (record, consumed) = decode_index_record(key, payload)?; + Ok((Record::Index(record), consumed)) + } + _ => Err(Error::Malformed("unknown block type")), + } + } + + /// Compare records of the same variant by key-order. + pub fn cmp_key(&self, other: &Self) -> Result { + match (self, other) { + (Record::Ref(a), Record::Ref(b)) => Ok(a.refname.cmp(&b.refname)), + (Record::Log(a), Record::Log(b)) => { + let by_name = a.refname.cmp(&b.refname); + if by_name != Ordering::Equal { + return Ok(by_name); + } + Ok(b.update_index.cmp(&a.update_index)) + } + (Record::Obj(a), Record::Obj(b)) => { + let common = a.hash_prefix.len().max(b.hash_prefix.len()); + for idx in 0..common { + let av = a.hash_prefix.get(idx).copied().unwrap_or(0); + let bv = b.hash_prefix.get(idx).copied().unwrap_or(0); + if av != bv { + return Ok(av.cmp(&bv)); + } + } + Ok(a.hash_prefix.len().cmp(&b.hash_prefix.len())) + } + (Record::Index(a), Record::Index(b)) => Ok(a.last_key.cmp(&b.last_key)), + _ => Err(Error::Malformed("cannot compare different record types")), + } + } +} + +/// Encode a key using the same prefix-compression format as Git reftable. +/// +/// Returns `(encoded, restart)` where `restart` is true when prefix length is zero. +pub fn encode_key(prev_key: &[u8], key: &[u8], extra: u8) -> Result<(Vec, bool), Error> { + if extra > 7 { + return Err(Error::Malformed("extra must fit in 3 bits")); + } + let prefix_len = common_prefix_size(prev_key, key); + let suffix_len = key.len() - prefix_len; + + let mut out = Vec::with_capacity(16 + suffix_len); + let mut buf = [0u8; 10]; + + let n = encode_varint(prefix_len as u64, &mut buf); + out.extend_from_slice(&buf[..n]); + + let n = encode_varint(((suffix_len as u64) << 3) | extra as u64, &mut buf); + out.extend_from_slice(&buf[..n]); + + out.extend_from_slice(&key[prefix_len..]); + Ok((out, prefix_len == 0)) +} + +/// Decode key length fields from an encoded key/value record. +pub fn decode_key_len(input: &[u8]) -> Result<(usize, usize, u8, usize), Error> { + let (prefix_len, mut consumed) = decode_varint(input)?; + let (suffix_and_extra, n2) = decode_varint(&input[consumed..])?; + consumed += n2; + + let extra = (suffix_and_extra & 0x7) as u8; + let suffix_len = (suffix_and_extra >> 3) as usize; + Ok((prefix_len as usize, suffix_len, extra, consumed)) +} + +/// Decode key bytes into `last_key`, returning `(consumed, extra)`. +pub fn decode_key(last_key: &mut Vec, input: &[u8]) -> Result<(usize, u8), Error> { + let (prefix_len, suffix_len, extra, mut consumed) = decode_key_len(input)?; + if prefix_len > last_key.len() { + return Err(Error::Malformed("prefix length exceeds previous key")); + } + if input.len().saturating_sub(consumed) < suffix_len { + return Err(Error::Truncated); + } + + last_key.truncate(prefix_len); + last_key.extend_from_slice(&input[consumed..consumed + suffix_len]); + consumed += suffix_len; + + Ok((consumed, extra)) +} + +fn encode_string(value: &str, out: &mut Vec) { + let mut buf = [0u8; 10]; + let n = encode_varint(value.len() as u64, &mut buf); + out.extend_from_slice(&buf[..n]); + out.extend_from_slice(value.as_bytes()); +} + +fn decode_string(input: &[u8], cursor: &mut usize) -> Result { + let (len, consumed) = decode_varint(&input[*cursor..])?; + *cursor += consumed; + let len = len as usize; + if input.len().saturating_sub(*cursor) < len { + return Err(Error::Truncated); + } + let bytes = &input[*cursor..*cursor + len]; + *cursor += len; + String::from_utf8(bytes.to_vec()).map_err(|_| Error::Malformed("invalid utf-8 string")) +} + +fn encode_ref_record(record: &RefRecord, hash_size: usize) -> Result, Error> { + let mut out = Vec::with_capacity(64); + let mut varint_buf = [0u8; 10]; + let n = encode_varint(record.update_index, &mut varint_buf); + out.extend_from_slice(&varint_buf[..n]); + + match &record.value { + RefValue::Deletion => {} + RefValue::Val1(value) => { + if value.len() != hash_size { + return Err(Error::Malformed("ref val1 hash has wrong size")); + } + out.extend_from_slice(value); + } + RefValue::Val2 { value, target_value } => { + if value.len() != hash_size || target_value.len() != hash_size { + return Err(Error::Malformed("ref val2 hash has wrong size")); + } + out.extend_from_slice(value); + out.extend_from_slice(target_value); + } + RefValue::Symref(target) => encode_string(target, &mut out), + } + Ok(out) +} + +fn decode_ref_record(key: &[u8], val_type: u8, payload: &[u8], hash_size: usize) -> Result<(RefRecord, usize), Error> { + let (update_index, mut cursor) = decode_varint(payload)?; + let refname = String::from_utf8(key.to_vec()).map_err(|_| Error::Malformed("invalid refname utf-8"))?; + + let value = match val_type { + constants::REF_VAL_DELETION => RefValue::Deletion, + constants::REF_VAL_VAL1 => { + if payload.len().saturating_sub(cursor) < hash_size { + return Err(Error::Truncated); + } + let v = payload[cursor..cursor + hash_size].to_vec(); + cursor += hash_size; + RefValue::Val1(v) + } + constants::REF_VAL_VAL2 => { + if payload.len().saturating_sub(cursor) < hash_size * 2 { + return Err(Error::Truncated); + } + let value = payload[cursor..cursor + hash_size].to_vec(); + cursor += hash_size; + let target_value = payload[cursor..cursor + hash_size].to_vec(); + cursor += hash_size; + RefValue::Val2 { value, target_value } + } + constants::REF_VAL_SYMREF => RefValue::Symref(decode_string(payload, &mut cursor)?), + _ => return Err(Error::Malformed("unknown ref value type")), + }; + + Ok(( + RefRecord { + refname, + update_index, + value, + }, + cursor, + )) +} + +fn encode_log_record(record: &LogRecord, hash_size: usize) -> Result, Error> { + match &record.value { + LogValue::Deletion => Ok(Vec::new()), + LogValue::Update { + old_hash, + new_hash, + name, + email, + time, + tz_offset, + message, + } => { + if old_hash.len() != hash_size || new_hash.len() != hash_size { + return Err(Error::Malformed("log hash has wrong size")); + } + let mut out = Vec::with_capacity(2 * hash_size + 64); + out.extend_from_slice(old_hash); + out.extend_from_slice(new_hash); + encode_string(name, &mut out); + encode_string(email, &mut out); + + let mut varint_buf = [0u8; 10]; + let n = encode_varint(*time, &mut varint_buf); + out.extend_from_slice(&varint_buf[..n]); + + let mut be_tz = [0u8; 2]; + put_be16(&mut be_tz, *tz_offset as u16); + out.extend_from_slice(&be_tz); + + encode_string(message, &mut out); + Ok(out) + } + } +} + +fn decode_log_record(key: &[u8], val_type: u8, payload: &[u8], hash_size: usize) -> Result<(LogRecord, usize), Error> { + if key.len() <= 9 || key[key.len() - 9] != 0 { + return Err(Error::Malformed("invalid log key")); + } + + let refname = + String::from_utf8(key[..key.len() - 9].to_vec()).map_err(|_| Error::Malformed("invalid log refname utf-8"))?; + let mut rev_ts = [0u8; 8]; + rev_ts.copy_from_slice(&key[key.len() - 8..]); + let update_index = u64::MAX - get_be64(&rev_ts); + + let (value, consumed) = match val_type { + constants::LOG_VAL_DELETION => (LogValue::Deletion, 0), + constants::LOG_VAL_UPDATE => { + let mut cursor = 0; + if payload.len() < 2 * hash_size { + return Err(Error::Truncated); + } + + let old_hash = payload[cursor..cursor + hash_size].to_vec(); + cursor += hash_size; + let new_hash = payload[cursor..cursor + hash_size].to_vec(); + cursor += hash_size; + + let name = decode_string(payload, &mut cursor)?; + let email = decode_string(payload, &mut cursor)?; + let (time, consumed) = decode_varint(&payload[cursor..])?; + cursor += consumed; + + if payload.len().saturating_sub(cursor) < 2 { + return Err(Error::Truncated); + } + let mut tz = [0u8; 2]; + tz.copy_from_slice(&payload[cursor..cursor + 2]); + cursor += 2; + let tz_offset = get_be16(&tz) as i16; + + let message = decode_string(payload, &mut cursor)?; + ( + LogValue::update(old_hash, new_hash, name, email, time, tz_offset, message), + cursor, + ) + } + _ => return Err(Error::Malformed("unknown log value type")), + }; + + Ok(( + LogRecord { + refname, + update_index, + value, + }, + consumed, + )) +} + +fn encode_obj_record(record: &ObjRecord) -> Result, Error> { + let mut out = Vec::with_capacity(32); + let mut varint_buf = [0u8; 10]; + + let offset_len = record.offsets.len(); + if offset_len == 0 || offset_len >= 8 { + let n = encode_varint(offset_len as u64, &mut varint_buf); + out.extend_from_slice(&varint_buf[..n]); + } + + if offset_len == 0 { + return Ok(out); + } + + let first = record.offsets[0]; + let n = encode_varint(first, &mut varint_buf); + out.extend_from_slice(&varint_buf[..n]); + + let mut last = first; + for &offset in &record.offsets[1..] { + let delta = offset + .checked_sub(last) + .ok_or(Error::Malformed("object offsets must be ascending"))?; + let n = encode_varint(delta, &mut varint_buf); + out.extend_from_slice(&varint_buf[..n]); + last = offset; + } + + Ok(out) +} + +fn decode_obj_record(key: &[u8], val_type: u8, payload: &[u8]) -> Result<(ObjRecord, usize), Error> { + let mut cursor = 0; + + let count = if val_type == 0 { + let (count, consumed) = decode_varint(payload)?; + cursor += consumed; + count as usize + } else { + val_type as usize + }; + + let mut offsets = Vec::with_capacity(count); + if count > 0 { + let (first, consumed) = decode_varint(&payload[cursor..])?; + cursor += consumed; + offsets.push(first); + + let mut last = first; + for _ in 1..count { + let (delta, consumed) = decode_varint(&payload[cursor..])?; + cursor += consumed; + let next = last.checked_add(delta).ok_or(Error::VarintOverflow)?; + offsets.push(next); + last = next; + } + } + + Ok(( + ObjRecord { + hash_prefix: key.to_vec(), + offsets, + }, + cursor, + )) +} + +fn encode_index_record(record: &IndexRecord) -> Result, Error> { + let mut out = Vec::with_capacity(10); + let mut varint_buf = [0u8; 10]; + let n = encode_varint(record.offset, &mut varint_buf); + out.extend_from_slice(&varint_buf[..n]); + Ok(out) +} + +fn decode_index_record(key: &[u8], payload: &[u8]) -> Result<(IndexRecord, usize), Error> { + let (offset, consumed) = decode_varint(payload)?; + Ok(( + IndexRecord { + last_key: key.to_vec(), + offset, + }, + consumed, + )) +} + +/// Compare reference names for sorting. +pub fn ref_record_compare_name(a: &RefRecord, b: &RefRecord) -> Ordering { + a.refname.cmp(&b.refname) +} + +/// Compare log records by key (`refname`, reverse `update_index`). +pub fn log_record_compare_key(a: &LogRecord, b: &LogRecord) -> Ordering { + let by_name = a.refname.cmp(&b.refname); + if by_name != Ordering::Equal { + return by_name; + } + b.update_index.cmp(&a.update_index) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn hash(seed: u8, hash_size: usize) -> Vec { + (0..hash_size).map(|idx| seed.wrapping_add(idx as u8)).collect() + } + + fn roundtrip(record: Record, hash_size: usize) { + let key = record.key(); + let payload = record.encode(hash_size).expect("encode"); + let decoded = + Record::decode(record.block_type(), &key, record.val_type(), &payload, hash_size).expect("decode"); + assert_eq!(record, decoded); + } + + #[test] + fn key_roundtrip() { + let prev = b"refs/heads/master"; + let key = b"refs/tags/v1.0"; + let extra = 6; + + let (encoded, restart) = encode_key(prev, key, extra).expect("encode"); + assert!(!restart); + + let mut decoded = prev.to_vec(); + let (consumed, decoded_extra) = decode_key(&mut decoded, &encoded).expect("decode"); + assert_eq!(consumed, encoded.len()); + assert_eq!(decoded_extra, extra); + assert_eq!(decoded, key); + } + + #[test] + fn ref_record_roundtrip() { + let hash_size = 20; + + roundtrip( + Record::Ref(RefRecord { + refname: "refs/heads/main".into(), + update_index: 1, + value: RefValue::Deletion, + }), + hash_size, + ); + + roundtrip( + Record::Ref(RefRecord { + refname: "refs/heads/main".into(), + update_index: 2, + value: RefValue::Val1(hash(1, hash_size)), + }), + hash_size, + ); + + roundtrip( + Record::Ref(RefRecord { + refname: "refs/tags/v1".into(), + update_index: 3, + value: RefValue::Val2 { + value: hash(2, hash_size), + target_value: hash(3, hash_size), + }, + }), + hash_size, + ); + + roundtrip( + Record::Ref(RefRecord { + refname: "HEAD".into(), + update_index: 4, + value: RefValue::Symref("refs/heads/main".into()), + }), + hash_size, + ); + } + + #[test] + fn log_record_roundtrip() { + let hash_size = 20; + + roundtrip( + Record::Log(LogRecord { + refname: "refs/heads/main".into(), + update_index: 5, + value: LogValue::Deletion, + }), + hash_size, + ); + + roundtrip( + Record::Log(LogRecord { + refname: "refs/heads/main".into(), + update_index: 6, + value: LogValue::Update { + old_hash: hash(10, hash_size), + new_hash: hash(20, hash_size), + name: "alice".into(), + email: "alice@example.com".into(), + time: 1_577_123_507, + tz_offset: 100, + message: "test".into(), + }, + }), + hash_size, + ); + } + + #[test] + fn obj_record_roundtrip() { + roundtrip( + Record::Obj(ObjRecord { + hash_prefix: vec![1, 2, 3, 4, 0], + offsets: vec![1, 2, 3], + }), + 20, + ); + + roundtrip( + Record::Obj(ObjRecord { + hash_prefix: vec![1, 2, 3, 4, 0], + offsets: vec![1, 2, 3, 4, 500, 600, 700, 800, 9_000], + }), + 20, + ); + + roundtrip( + Record::Obj(ObjRecord { + hash_prefix: vec![1, 2, 3, 4, 0], + offsets: vec![], + }), + 20, + ); + } + + #[test] + fn index_record_roundtrip() { + roundtrip( + Record::Index(IndexRecord { + last_key: b"refs/heads/main".to_vec(), + offset: 42, + }), + 20, + ); + } + + #[test] + fn comparisons_match_expectations() { + let a = Record::Ref(RefRecord { + refname: "refs/heads/main".into(), + update_index: 1, + value: RefValue::Val1(vec![0; 20]), + }); + let b = Record::Ref(RefRecord { + refname: "HEAD".into(), + update_index: 1, + value: RefValue::Symref("refs/heads/main".into()), + }); + assert_eq!(a.cmp_key(&b).expect("same type"), Ordering::Greater); + + let l1 = Record::Log(LogRecord { + refname: "refs/heads/main".into(), + update_index: 42, + value: LogValue::Deletion, + }); + let l2 = Record::Log(LogRecord { + refname: "refs/heads/main".into(), + update_index: 22, + value: LogValue::Deletion, + }); + assert_eq!(l1.cmp_key(&l2).expect("same type"), Ordering::Less); + } +} diff --git a/gix-reftable/src/stack.rs b/gix-reftable/src/stack.rs new file mode 100644 index 00000000000..3d3529f03ab --- /dev/null +++ b/gix-reftable/src/stack.rs @@ -0,0 +1,438 @@ +use std::{ + fs, + path::{Path, PathBuf}, +}; + +use crate::{ + basics::HashId, + error::Error, + merged::MergedTable, + record::{LogRecord, RefRecord}, + table::Table, + writer::{WriteOptions, Writer}, +}; + +/// Options controlling stack behavior. +#[derive(Debug, Clone)] +pub struct StackOptions { + /// Hash used when creating new tables. + pub hash_id: HashId, + /// Disable automatic compaction after commits. + pub disable_auto_compact: bool, + /// Minimum number of tables required before compaction. + pub auto_compaction_factor: usize, + /// Write options used for emitted tables. + pub write_options: WriteOptions, +} + +impl Default for StackOptions { + fn default() -> Self { + let write_options = WriteOptions::default(); + Self { + hash_id: write_options.hash_id, + disable_auto_compact: false, + auto_compaction_factor: 2, + write_options, + } + } +} + +/// A stack of reftable files controlled by `tables.list`. +#[derive(Debug, Clone)] +pub struct Stack { + dir: PathBuf, + opts: StackOptions, + table_names: Vec, + tables: Vec
, + merged: MergedTable, +} + +impl Stack { + /// Open or initialize a stack at `dir`. + pub fn open(dir: impl AsRef, mut opts: StackOptions) -> Result { + let dir = dir.as_ref().to_path_buf(); + fs::create_dir_all(&dir)?; + + opts.write_options.hash_id = opts.hash_id; + + let mut out = Self { + dir, + opts, + table_names: Vec::new(), + tables: Vec::new(), + merged: MergedTable::new(Vec::new())?, + }; + out.ensure_tables_list()?; + out.reload()?; + Ok(out) + } + + /// Return the stack directory. + pub fn dir(&self) -> &Path { + &self.dir + } + + /// Return loaded table names in stack order. + pub fn table_names(&self) -> &[String] { + &self.table_names + } + + /// Return loaded table handles in stack order. + pub fn tables(&self) -> &[Table] { + &self.tables + } + + /// Return merged view of all tables. + pub fn merged(&self) -> &MergedTable { + &self.merged + } + + /// Return the next update index. + pub fn next_update_index(&self) -> u64 { + self.merged.max_update_index.saturating_add(1).max(1) + } + + /// Reload stack metadata and all tables from disk. + pub fn reload(&mut self) -> Result<(), Error> { + self.ensure_tables_list()?; + let list = fs::read_to_string(self.tables_list_path())?; + + self.table_names = list + .lines() + .filter(|line| !line.trim().is_empty()) + .map(ToOwned::to_owned) + .collect(); + + self.tables.clear(); + for name in &self.table_names { + let table = Table::open(self.dir.join(name))?; + self.tables.push(table); + } + + self.merged = MergedTable::new(self.tables.clone())?; + Ok(()) + } + + /// Run basic consistency checks on all tables and merged iteration. + pub fn fsck(&self) -> Result<(), Error> { + let mut prev_max = None; + for (idx, table) in self.tables.iter().enumerate() { + let table_name = &self.table_names[idx]; + if !is_valid_table_name(table_name) { + return Err(Error::Api("invalid reftable table name")); + } + if table.hash_id != self.opts.hash_id { + return Err(Error::Api("table hash id does not match stack hash id")); + } + if table.min_update_index > table.max_update_index { + return Err(Error::Api("table has invalid update-index range")); + } + if let Some(prev) = prev_max { + if table.min_update_index <= prev { + return Err(Error::Api("table update-index ranges must be strictly increasing")); + } + } + prev_max = Some(table.max_update_index); + + let path = self.dir.join(table_name); + if !path.is_file() { + return Err(Error::Api("table listed in tables.list is missing")); + } + } + + let mut refs = self.merged.ref_iter()?; + while refs.next_record()?.is_some() {} + let mut logs = self.merged.log_iter()?; + while logs.next_record()?.is_some() {} + + Ok(()) + } + + /// Create a mutable transaction. + pub fn transaction(&mut self) -> Transaction<'_> { + Transaction { + stack: self, + refs: Vec::new(), + logs: Vec::new(), + } + } + + fn ensure_tables_list(&self) -> Result<(), Error> { + let path = self.tables_list_path(); + if !path.exists() { + fs::write(path, "")?; + } + Ok(()) + } + + fn tables_list_path(&self) -> PathBuf { + self.dir.join("tables.list") + } + + fn write_tables_list(&self, names: &[String]) -> Result<(), Error> { + let path = self.tables_list_path(); + let tmp = path.with_extension("list.lock"); + let mut content = String::new(); + for name in names { + content.push_str(name); + content.push('\n'); + } + fs::write(&tmp, content)?; + fs::rename(tmp, path)?; + Ok(()) + } + + fn write_table_bytes(&self, min: u64, max: u64, bytes: &[u8]) -> Result { + let suffix = crc32fast::hash(bytes); + let name = format!("0x{min:012x}-0x{max:012x}-{suffix:08x}.ref"); + let path = self.dir.join(&name); + let tmp = path.with_extension("lock"); + fs::write(&tmp, bytes)?; + fs::rename(tmp, path)?; + Ok(name) + } + + /// Compact all tables into one when threshold conditions are met. + pub fn maybe_auto_compact(&mut self) -> Result<(), Error> { + if self.opts.disable_auto_compact { + return Ok(()); + } + if self.tables.len() < self.opts.auto_compaction_factor { + return Ok(()); + } + + let mut refs = Vec::::new(); + let mut ref_iter = self.merged.ref_iter()?; + while let Some(rec) = ref_iter.next_record()? { + if let crate::record::Record::Ref(r) = rec { + refs.push(r); + } + } + + let mut logs = Vec::::new(); + let mut log_iter = self.merged.log_iter()?; + while let Some(rec) = log_iter.next_record()? { + if let crate::record::Record::Log(l) = rec { + logs.push(l); + } + } + + if refs.is_empty() && logs.is_empty() { + return Ok(()); + } + + let min = self.merged.min_update_index; + let max = self.merged.max_update_index; + let mut writer = Writer::new(self.opts.write_options.clone()); + writer.set_limits(min, max)?; + for r in refs { + writer.add_ref(r)?; + } + for l in logs { + writer.add_log(l)?; + } + let bytes = writer.finish()?; + let compacted = self.write_table_bytes(min, max, &bytes)?; + + let old_names = self.table_names.clone(); + self.write_tables_list(&[compacted])?; + for old in old_names { + let _ = fs::remove_file(self.dir.join(old)); + } + + self.reload() + } +} + +fn is_valid_table_name(name: &str) -> bool { + let Some(base) = name.strip_suffix(".ref") else { + return false; + }; + let mut parts = base.split('-'); + let Some(min) = parts.next() else { + return false; + }; + let Some(max) = parts.next() else { + return false; + }; + let Some(hash) = parts.next() else { + return false; + }; + if parts.next().is_some() { + return false; + } + valid_hex_component(min, 12) + && valid_hex_component(max, 12) + && hash.len() == 8 + && hash.chars().all(|c| c.is_ascii_hexdigit()) +} + +fn valid_hex_component(value: &str, width: usize) -> bool { + let Some(hex) = value.strip_prefix("0x") else { + return false; + }; + hex.len() == width && hex.chars().all(|c| c.is_ascii_hexdigit()) +} + +/// Mutable stack transaction. +pub struct Transaction<'a> { + stack: &'a mut Stack, + refs: Vec, + logs: Vec, +} + +impl Transaction<'_> { + /// Add a ref update to this transaction. + pub fn add_ref(&mut self, rec: RefRecord) { + self.refs.push(rec); + } + + /// Add a log update to this transaction. + pub fn add_log(&mut self, rec: LogRecord) { + self.logs.push(rec); + } + + /// Commit this transaction, persisting a new table and reloading the stack. + pub fn commit(mut self) -> Result<(), Error> { + let update_index = self.stack.next_update_index(); + let mut writer = Writer::new(self.stack.opts.write_options.clone()); + writer.set_limits(update_index, update_index)?; + + for mut r in self.refs.drain(..) { + r.update_index = update_index; + writer.add_ref(r)?; + } + for mut l in self.logs.drain(..) { + l.update_index = update_index; + writer.add_log(l)?; + } + + let bytes = writer.finish()?; + let name = self.stack.write_table_bytes(update_index, update_index, &bytes)?; + + let mut names = self.stack.table_names.clone(); + names.push(name); + self.stack.write_tables_list(&names)?; + self.stack.reload()?; + self.stack.maybe_auto_compact()?; + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use std::{ + fs, + path::PathBuf, + time::{SystemTime, UNIX_EPOCH}, + }; + + use crate::{error::Error, record::RefRecord, record::RefValue, writer::WriteOptions}; + + use super::{Stack, StackOptions}; + + struct TempDir { + path: PathBuf, + } + + impl TempDir { + fn new() -> Self { + let stamp = SystemTime::now() + .duration_since(UNIX_EPOCH) + .expect("valid time") + .as_nanos(); + let path = std::env::temp_dir().join(format!("gix-reftable-stack-{stamp}")); + fs::create_dir_all(&path).expect("temp dir"); + Self { path } + } + } + + impl Drop for TempDir { + fn drop(&mut self) { + let _ = fs::remove_dir_all(&self.path); + } + } + + #[test] + fn transaction_commit_and_reload() { + let tmp = TempDir::new(); + let mut stack = Stack::open( + &tmp.path, + StackOptions { + disable_auto_compact: true, + write_options: WriteOptions { + block_size: 128, + ..Default::default() + }, + ..Default::default() + }, + ) + .expect("stack"); + + let mut tx = stack.transaction(); + tx.add_ref(RefRecord { + refname: "HEAD".into(), + update_index: 0, + value: RefValue::Symref("refs/heads/main".into()), + }); + tx.add_ref(RefRecord { + refname: "refs/heads/main".into(), + update_index: 0, + value: RefValue::Val1(vec![1; 20]), + }); + tx.commit().expect("commit"); + + assert_eq!(stack.tables().len(), 1); + stack.reload().expect("reload"); + assert_eq!(stack.tables().len(), 1); + stack.fsck().expect("fsck"); + } + + #[test] + fn auto_compaction_reduces_table_count() { + let tmp = TempDir::new(); + let mut stack = Stack::open( + &tmp.path, + StackOptions { + auto_compaction_factor: 2, + write_options: WriteOptions { + block_size: 96, + ..Default::default() + }, + ..Default::default() + }, + ) + .expect("stack"); + + for idx in 0..3u8 { + let mut tx = stack.transaction(); + tx.add_ref(RefRecord { + refname: format!("refs/heads/{idx}"), + update_index: 0, + value: RefValue::Val1(vec![idx; 20]), + }); + tx.commit().expect("commit"); + } + + assert!(stack.tables().len() <= 2, "compaction should reduce table fan-out"); + } + + #[test] + fn fsck_detects_missing_tables() { + let tmp = TempDir::new(); + let mut stack = Stack::open(&tmp.path, StackOptions::default()).expect("stack"); + + let mut tx = stack.transaction(); + tx.add_ref(RefRecord { + refname: "refs/heads/main".into(), + update_index: 0, + value: RefValue::Val1(vec![1; 20]), + }); + tx.commit().expect("commit"); + + let table = stack.table_names()[0].clone(); + fs::remove_file(tmp.path.join(table)).expect("remove table"); + let err = stack.fsck().expect_err("must fail"); + assert!(matches!(err, Error::Api(_))); + } +} diff --git a/gix-reftable/src/table.rs b/gix-reftable/src/table.rs new file mode 100644 index 00000000000..c96a58499f7 --- /dev/null +++ b/gix-reftable/src/table.rs @@ -0,0 +1,502 @@ +use std::path::Path; + +use crate::{ + basics::HashId, + basics::{get_be32, get_be64}, + block::{footer_size, header_size, Block, BlockIter}, + blocksource::BlockSource, + constants, + error::Error, + record::Record, +}; + +const FORMAT_ID_SHA1: u32 = 0x7368_6131; +const FORMAT_ID_SHA256: u32 = 0x7332_3536; + +/// Metadata for a section inside a table. +#[derive(Debug, Clone, Copy, Default)] +pub struct TableOffsets { + /// True if the section is present. + pub is_present: bool, + /// Section offset in bytes. + pub offset: u64, + /// Optional index offset in bytes. + pub index_offset: u64, +} + +/// A single reftable file. +#[derive(Debug, Clone)] +pub struct Table { + /// Name of this table. + pub name: String, + /// Underlying block source. + pub source: BlockSource, + /// Data size excluding footer. + pub size: u64, + /// Hash used by this table. + pub hash_id: HashId, + /// Reftable format version. + pub version: u8, + /// Configured block size (0 for unaligned). + pub block_size: u32, + /// Minimum update index encoded in this table. + pub min_update_index: u64, + /// Maximum update index encoded in this table. + pub max_update_index: u64, + /// Object-id abbreviation length in `o` section. + pub object_id_len: u8, + /// Offsets for refs. + pub ref_offsets: TableOffsets, + /// Offsets for object index. + pub obj_offsets: TableOffsets, + /// Offsets for logs. + pub log_offsets: TableOffsets, +} + +impl Table { + /// Open a table from a file. + pub fn open(path: impl AsRef) -> Result { + let path = path.as_ref(); + let name = path + .file_name() + .map_or_else(|| path.display().to_string(), |n| n.to_string_lossy().into_owned()); + let source = BlockSource::from_file(path)?; + Self::from_block_source(name, source) + } + + /// Build a table from a block source. + pub fn from_block_source(name: String, source: BlockSource) -> Result { + let file_size = source.size(); + let max_header_size = header_size(2)? + 1; + if file_size < max_header_size as u64 { + return Err(Error::Malformed("reftable too small")); + } + + let header = source.read(0, max_header_size as u32)?.to_vec(); + if header.len() < max_header_size || &header[..4] != b"REFT" { + return Err(Error::Malformed("missing REFT header")); + } + let version = header[4]; + if version != 1 && version != 2 { + return Err(Error::Malformed("unsupported reftable version")); + } + + let footer_len = footer_size(version)? as u64; + if file_size < footer_len { + return Err(Error::Malformed("reftable too small for footer")); + } + let size = file_size - footer_len; + + let footer = source.read(size, footer_len as u32)?.to_vec(); + if footer.len() != footer_len as usize { + return Err(Error::Truncated); + } + + Self::parse(name, source, &header, &footer, version, size) + } + + fn parse( + name: String, + source: BlockSource, + header: &[u8], + footer: &[u8], + version: u8, + size: u64, + ) -> Result { + let mut pos = 0usize; + if &footer[..4] != b"REFT" { + return Err(Error::Malformed("footer magic mismatch")); + } + pos += 4; + + let version_header_len = header_size(version)?; + if footer.len() < version_header_len || header.len() < version_header_len + 1 { + return Err(Error::Truncated); + } + if footer[..version_header_len] != header[..version_header_len] { + return Err(Error::Malformed("header/footer prefix mismatch")); + } + + pos += 1; // version + let mut be24 = [0u8; 4]; + be24[1..].copy_from_slice(&footer[pos..pos + 3]); + let block_size = get_be32(&be24); + pos += 3; + + let mut be64 = [0u8; 8]; + be64.copy_from_slice(&footer[pos..pos + 8]); + let min_update_index = get_be64(&be64); + pos += 8; + be64.copy_from_slice(&footer[pos..pos + 8]); + let max_update_index = get_be64(&be64); + pos += 8; + + let hash_id = if version == 1 { + HashId::Sha1 + } else { + let mut be32 = [0u8; 4]; + be32.copy_from_slice(&footer[pos..pos + 4]); + pos += 4; + match get_be32(&be32) { + FORMAT_ID_SHA1 => HashId::Sha1, + FORMAT_ID_SHA256 => HashId::Sha256, + _ => return Err(Error::Malformed("unknown hash format id")), + } + }; + + be64.copy_from_slice(&footer[pos..pos + 8]); + let ref_index_offset = get_be64(&be64); + pos += 8; + + be64.copy_from_slice(&footer[pos..pos + 8]); + let mut obj_offset_field = get_be64(&be64); + pos += 8; + let object_id_len = (obj_offset_field & ((1 << 5) - 1)) as u8; + obj_offset_field >>= 5; + + be64.copy_from_slice(&footer[pos..pos + 8]); + let obj_index_offset = get_be64(&be64); + pos += 8; + + be64.copy_from_slice(&footer[pos..pos + 8]); + let log_offset = get_be64(&be64); + pos += 8; + + be64.copy_from_slice(&footer[pos..pos + 8]); + let log_index_offset = get_be64(&be64); + pos += 8; + + let crc_expected = crc32fast::hash(&footer[..pos]); + let mut be32 = [0u8; 4]; + be32.copy_from_slice(&footer[pos..pos + 4]); + let crc_actual = get_be32(&be32); + if crc_expected != crc_actual { + return Err(Error::ChecksumMismatch); + } + + let first_block_typ = header[version_header_len]; + let ref_offsets = TableOffsets { + is_present: first_block_typ == constants::BLOCK_TYPE_REF, + offset: 0, + index_offset: ref_index_offset, + }; + let obj_offsets = TableOffsets { + is_present: obj_offset_field > 0, + offset: obj_offset_field, + index_offset: obj_index_offset, + }; + if obj_offsets.is_present && object_id_len == 0 { + return Err(Error::Malformed("object section present without object_id_len")); + } + let log_offsets = TableOffsets { + is_present: first_block_typ == constants::BLOCK_TYPE_LOG || log_offset > 0, + offset: log_offset, + index_offset: log_index_offset, + }; + + Ok(Self { + name, + source, + size, + hash_id, + version, + block_size, + min_update_index, + max_update_index, + object_id_len, + ref_offsets, + obj_offsets, + log_offsets, + }) + } + + /// Return the offset metadata for a given record block type. + pub fn offsets_for(&self, typ: u8) -> Result { + match typ { + constants::BLOCK_TYPE_REF => Ok(self.ref_offsets), + constants::BLOCK_TYPE_LOG => Ok(self.log_offsets), + constants::BLOCK_TYPE_OBJ => Ok(self.obj_offsets), + _ => Err(Error::Malformed("unsupported table section type")), + } + } + + /// Decode one block at `offset`. + pub fn init_block(&self, offset: u64, want_type: u8) -> Result, Error> { + if offset >= self.size { + return Ok(None); + } + let header_off = if offset == 0 { + header_size(self.version)? as u32 + } else { + 0 + }; + Block::init( + &self.source, + offset, + header_off, + self.block_size, + self.hash_id.size(), + want_type, + ) + } + + /// Create an iterator for records of type `typ`. + pub fn iter(&self, typ: u8) -> Result, Error> { + TableIter::new(self, typ) + } +} + +/// Iterator for all records of one section in a single table. +pub struct TableIter<'a> { + table: &'a Table, + typ: u8, + start_off: u64, + block_off: u64, + block_iter: Option, + finished: bool, +} + +impl<'a> TableIter<'a> { + fn new(table: &'a Table, typ: u8) -> Result { + let offsets = table.offsets_for(typ)?; + if !offsets.is_present { + return Ok(Self { + table, + typ, + start_off: 0, + block_off: 0, + block_iter: None, + finished: true, + }); + } + + let mut iter = Self { + table, + typ, + start_off: offsets.offset, + block_off: offsets.offset, + block_iter: None, + finished: false, + }; + iter.seek_to(offsets.offset, typ)?; + Ok(iter) + } + + fn seek_to(&mut self, off: u64, typ: u8) -> Result<(), Error> { + let Some(block) = self.table.init_block(off, typ)? else { + self.finished = true; + self.block_iter = None; + return Ok(()); + }; + + self.block_off = off; + self.block_iter = Some(BlockIter::new(block)); + self.finished = false; + Ok(()) + } + + fn next_block(&mut self) -> Result<(), Error> { + let Some(current) = self.block_iter.as_ref() else { + self.finished = true; + return Ok(()); + }; + + let next_off = self + .block_off + .checked_add(current.block().full_block_size as u64) + .ok_or(Error::Malformed("block offset overflow"))?; + self.seek_to(next_off, self.typ) + } + + /// Position iterator at the first record whose key is >= `want`. + pub fn seek_key(&mut self, want: &[u8]) -> Result<(), Error> { + self.seek_to(self.start_off, self.typ)?; + if self.finished { + return Ok(()); + } + + loop { + let Some(_current) = self.block_iter.as_ref() else { + return Ok(()); + }; + + let mut probe = self.clone_for_probe(); + probe.next_block()?; + if probe.finished { + break; + } + + let Some(probe_block) = probe.block_iter.as_ref() else { + break; + }; + let first_key = probe_block.block().first_key()?; + if first_key.as_slice() > want { + break; + } + + self.block_off = probe.block_off; + self.block_iter = probe.block_iter; + self.finished = probe.finished; + } + + if let Some(block_iter) = self.block_iter.as_mut() { + block_iter.seek_key(want)?; + } + Ok(()) + } + + fn clone_for_probe(&self) -> Self { + Self { + table: self.table, + typ: self.typ, + start_off: self.start_off, + block_off: self.block_off, + block_iter: self.block_iter.clone(), + finished: self.finished, + } + } + + /// Return the next record, if any. + pub fn next_record(&mut self) -> Result, Error> { + loop { + if self.finished { + return Ok(None); + } + + if let Some(block_iter) = self.block_iter.as_mut() { + if let Some(mut rec) = block_iter.next_record()? { + if let Record::Ref(ref mut r) = rec { + r.update_index = r + .update_index + .checked_add(self.table.min_update_index) + .ok_or(Error::VarintOverflow)?; + } + return Ok(Some(rec)); + } + } + + self.next_block()?; + } + } +} + +#[cfg(test)] +mod tests { + use std::{ + fs, + path::{Path, PathBuf}, + process::Command, + time::{SystemTime, UNIX_EPOCH}, + }; + + use crate::{constants, record::Record}; + + use super::Table; + + struct TempDir { + path: PathBuf, + } + + impl TempDir { + fn new() -> Self { + let stamp = SystemTime::now() + .duration_since(UNIX_EPOCH) + .expect("valid time") + .as_nanos(); + let path = std::env::temp_dir().join(format!("gix-reftable-{stamp}")); + fs::create_dir_all(&path).expect("temp dir"); + Self { path } + } + } + + impl Drop for TempDir { + fn drop(&mut self) { + let _ = fs::remove_dir_all(&self.path); + } + } + + fn run(cwd: &Path, args: &[&str]) { + let status = Command::new(args[0]) + .args(&args[1..]) + .current_dir(cwd) + .status() + .expect("command executed"); + assert!(status.success(), "command failed: {args:?}"); + } + + fn create_reftable_repo() -> Option<(TempDir, PathBuf)> { + let tmp = TempDir::new(); + let source = tmp.path.join("source"); + let clone = tmp.path.join("clone"); + fs::create_dir_all(&source).expect("source dir"); + + run(&source, &["git", "init", "-q"]); + run(&source, &["git", "config", "user.name", "committer"]); + run(&source, &["git", "config", "user.email", "committer@example.com"]); + run(&source, &["git", "config", "commit.gpgSign", "false"]); + fs::write(source.join("file"), "hello\n").expect("write file"); + run(&source, &["git", "add", "file"]); + run(&source, &["git", "commit", "-q", "-m", "c1"]); + + let clone_status = Command::new("git") + .args(["clone", "--ref-format=reftable"]) + .arg(source.to_str().expect("utf-8 path")) + .arg(clone.to_str().expect("utf-8 path")) + .current_dir(&tmp.path) + .status() + .ok()?; + if !clone_status.success() { + return None; + } + + let list = fs::read_to_string(clone.join(".git/reftable/tables.list")).ok()?; + let table_name = list.lines().next()?; + Some((tmp, clone.join(".git/reftable").join(table_name))) + } + + #[test] + fn open_table_and_iterate_refs_and_logs() { + let Some((_tmp, table_path)) = create_reftable_repo() else { + return; + }; + let table = Table::open(&table_path).expect("open table"); + + let mut refs = table.iter(constants::BLOCK_TYPE_REF).expect("ref iter"); + let mut saw_head = false; + while let Some(rec) = refs.next_record().expect("next ref") { + if let Record::Ref(ref_record) = rec { + if ref_record.refname == "HEAD" { + saw_head = true; + } + } + } + assert!(saw_head, "HEAD must be present in reftable refs"); + + let mut logs = table.iter(constants::BLOCK_TYPE_LOG).expect("log iter"); + let mut log_count = 0usize; + loop { + match logs.next_record() { + Ok(Some(_)) => log_count += 1, + Ok(None) => break, + Err(err) => panic!("next log #{log_count} failed: {err:?}"), + } + } + assert!(log_count > 0, "expected log records in cloned repository"); + } + + #[test] + fn seek_by_key_in_ref_section() { + let Some((_tmp, table_path)) = create_reftable_repo() else { + return; + }; + let table = Table::open(&table_path).expect("open table"); + + let mut refs = table.iter(constants::BLOCK_TYPE_REF).expect("ref iter"); + refs.seek_key(b"refs/heads/").expect("seek works"); + let rec = refs.next_record().expect("record read").expect("record exists"); + let Record::Ref(rec) = rec else { + panic!("expected ref record"); + }; + assert!(rec.refname.starts_with("refs/heads/")); + } +} diff --git a/gix-reftable/src/tree.rs b/gix-reftable/src/tree.rs new file mode 100644 index 00000000000..6488e5ccf4a --- /dev/null +++ b/gix-reftable/src/tree.rs @@ -0,0 +1,97 @@ +/// A simple binary-search tree node. +#[derive(Debug)] +pub struct TreeNode { + key: T, + left: Option>>, + right: Option>>, +} + +impl TreeNode { + fn new(key: T) -> Self { + Self { + key, + left: None, + right: None, + } + } +} + +/// Insert `key` into the tree rooted at `root` and return a mutable reference to the matched node. +pub fn tree_insert<'a, T, F>(root: &'a mut Option>>, key: T, compare: &F) -> &'a mut TreeNode +where + F: Fn(&T, &T) -> i32, +{ + match root { + Some(node) => { + let cmp = compare(&key, &node.key); + match cmp.cmp(&0) { + std::cmp::Ordering::Less => tree_insert(&mut node.left, key, compare), + std::cmp::Ordering::Greater => tree_insert(&mut node.right, key, compare), + std::cmp::Ordering::Equal => node, + } + } + None => { + *root = Some(Box::new(TreeNode::new(key))); + root.as_deref_mut().expect("inserted") + } + } +} + +/// Search `key` in the tree rooted at `root`. +pub fn tree_search<'a, T, F>(root: &'a Option>>, key: &T, compare: &F) -> Option<&'a TreeNode> +where + F: Fn(&T, &T) -> i32, +{ + let node = root.as_deref()?; + let cmp = compare(key, &node.key); + match cmp.cmp(&0) { + std::cmp::Ordering::Less => tree_search(&node.left, key, compare), + std::cmp::Ordering::Greater => tree_search(&node.right, key, compare), + std::cmp::Ordering::Equal => Some(node), + } +} + +/// In-order walk of all keys. +pub fn infix_walk(root: &Option>>, action: &mut F) +where + F: FnMut(&T), +{ + let Some(node) = root.as_deref() else { + return; + }; + infix_walk(&node.left, action); + action(&node.key); + infix_walk(&node.right, action); +} + +/// Release the tree. +pub fn tree_free(root: &mut Option>>) { + *root = None; +} + +#[cfg(test)] +mod tests { + use super::*; + + fn cmp(a: &i32, b: &i32) -> i32 { + a.cmp(b) as i32 + } + + #[test] + fn insert_search_walk() { + let mut root = None; + tree_insert(&mut root, 3, &cmp); + tree_insert(&mut root, 1, &cmp); + tree_insert(&mut root, 2, &cmp); + + assert!(tree_search(&root, &1, &cmp).is_some()); + assert!(tree_search(&root, &4, &cmp).is_none()); + + let mut out = Vec::new(); + infix_walk(&root, &mut |k| out.push(*k)); + assert_eq!(out, vec![1, 2, 3]); + + tree_free(&mut root); + assert!(root.is_none()); + } +} diff --git a/gix-reftable/src/writer.rs b/gix-reftable/src/writer.rs new file mode 100644 index 00000000000..a8e1b135398 --- /dev/null +++ b/gix-reftable/src/writer.rs @@ -0,0 +1,582 @@ +use std::io::Write; + +use crate::{ + basics::{put_be24, put_be32, put_be64, HashId}, + block::{footer_size, header_size}, + blocksource::BlockSource, + constants, + error::Error, + record::{encode_key, IndexRecord, LogRecord, LogValue, Record, RefRecord}, + table::Table, +}; + +const FORMAT_ID_SHA1: u32 = 0x7368_6131; +const FORMAT_ID_SHA256: u32 = 0x7332_3536; + +/// Options controlling writing behavior. +#[derive(Debug, Clone)] +pub struct WriteOptions { + /// If true, do not pad non-log blocks to `block_size`. + pub unpadded: bool, + /// Desired block size for non-log blocks. + pub block_size: u32, + /// If true, skip object reverse index generation. + pub skip_index_objects: bool, + /// Restart key interval. + pub restart_interval: u16, + /// Hash function used in written tables. + pub hash_id: HashId, + /// If false, log messages are normalized to one line ending in `\n`. + pub exact_log_message: bool, +} + +impl Default for WriteOptions { + fn default() -> Self { + Self { + unpadded: false, + block_size: 4096, + skip_index_objects: true, + restart_interval: 16, + hash_id: HashId::Sha1, + exact_log_message: false, + } + } +} + +#[derive(Debug, Clone)] +struct SectionResult { + bytes: Vec, + index_offset: u64, +} + +/// Writer for single reftable files. +#[derive(Debug, Clone)] +pub struct Writer { + opts: WriteOptions, + min_update_index: Option, + max_update_index: Option, + refs: Vec, + logs: Vec, +} + +impl Writer { + /// Create a new writer. + pub fn new(opts: WriteOptions) -> Self { + Self { + opts, + min_update_index: None, + max_update_index: None, + refs: Vec::new(), + logs: Vec::new(), + } + } + + /// Set update-index limits used by records in this table. + pub fn set_limits(&mut self, min: u64, max: u64) -> Result<(), Error> { + if min > max { + return Err(Error::Api("min_update_index must be <= max_update_index")); + } + if !self.refs.is_empty() || !self.logs.is_empty() { + return Err(Error::Api("set_limits must be called before adding records")); + } + self.min_update_index = Some(min); + self.max_update_index = Some(max); + Ok(()) + } + + /// Add one ref record. + pub fn add_ref(&mut self, rec: RefRecord) -> Result<(), Error> { + self.check_limits(rec.update_index)?; + if !self.logs.is_empty() { + return Err(Error::Api("cannot add ref after logs")); + } + self.refs.push(rec); + Ok(()) + } + + /// Add one log record. + pub fn add_log(&mut self, mut rec: LogRecord) -> Result<(), Error> { + self.check_limits(rec.update_index)?; + if !self.opts.exact_log_message { + normalize_log_message(&mut rec)?; + } + self.logs.push(rec); + Ok(()) + } + + fn check_limits(&self, update_index: u64) -> Result<(), Error> { + let (min, max) = self + .limits() + .ok_or(Error::Api("set_limits must be called before adding records"))?; + if update_index < min || update_index > max { + return Err(Error::Api("record update index outside set limits")); + } + Ok(()) + } + + fn limits(&self) -> Option<(u64, u64)> { + Some((self.min_update_index?, self.max_update_index?)) + } + + /// Finalize and return table bytes. + pub fn finish(mut self) -> Result, Error> { + let (min_update_index, max_update_index) = self + .limits() + .ok_or(Error::Api("set_limits must be called before finish"))?; + + self.refs.sort_by(|a, b| a.refname.cmp(&b.refname)); + self.logs.sort_by(|a, b| { + let by_name = a.refname.cmp(&b.refname); + if by_name == std::cmp::Ordering::Equal { + b.update_index.cmp(&a.update_index) + } else { + by_name + } + }); + + let version = match self.opts.hash_id { + HashId::Sha1 => 1, + HashId::Sha256 => 2, + }; + let header_len = header_size(version)?; + + let mut ref_records = Vec::with_capacity(self.refs.len()); + for rec in &self.refs { + let mut rec = rec.clone(); + rec.update_index = rec + .update_index + .checked_sub(min_update_index) + .ok_or(Error::Api("ref update index must be >= min_update_index"))?; + ref_records.push(Record::Ref(rec)); + } + let log_records = self.logs.into_iter().map(Record::Log).collect::>(); + + let first_section = if !ref_records.is_empty() { + Some(constants::BLOCK_TYPE_REF) + } else if !log_records.is_empty() { + Some(constants::BLOCK_TYPE_LOG) + } else { + None + }; + + let mut file = if first_section.is_some() { + Vec::new() + } else { + vec![0u8; header_len] + }; + + let mut ref_index_offset = 0u64; + let mut log_offset = 0u64; + let mut log_index_offset = 0u64; + + if let Some(first) = first_section { + if first == constants::BLOCK_TYPE_REF { + let ref_section = write_section(&ref_records, constants::BLOCK_TYPE_REF, 0, header_len, &self.opts)?; + file.extend_from_slice(&ref_section.bytes); + ref_index_offset = ref_section.index_offset; + + if !log_records.is_empty() { + log_offset = file.len() as u64; + let log_section = + write_section(&log_records, constants::BLOCK_TYPE_LOG, log_offset, 0, &self.opts)?; + file.extend_from_slice(&log_section.bytes); + log_index_offset = log_section.index_offset; + } + } else { + log_offset = 0; + let log_section = write_section(&log_records, constants::BLOCK_TYPE_LOG, 0, header_len, &self.opts)?; + file.extend_from_slice(&log_section.bytes); + log_index_offset = log_section.index_offset; + } + } + + let header = encode_header( + version, + self.opts.block_size, + min_update_index, + max_update_index, + self.opts.hash_id, + )?; + if file.len() < header_len { + file.resize(header_len, 0); + } + file[..header_len].copy_from_slice(&header); + + let footer = encode_footer(version, &header, ref_index_offset, 0, 0, log_offset, log_index_offset)?; + file.extend_from_slice(&footer); + Ok(file) + } + + /// Finalize directly into a [`Table`] instance. + pub fn finish_into_table(self, name: &str) -> Result { + let bytes = self.finish()?; + Table::from_block_source(name.into(), BlockSource::from_bytes(bytes)) + } +} + +fn normalize_log_message(log: &mut LogRecord) -> Result<(), Error> { + if let LogValue::Update { message, .. } = &mut log.value { + if message.is_empty() { + return Ok(()); + } + if message.trim_end_matches('\n').contains('\n') { + return Err(Error::Api( + "log message must be a single line unless exact_log_message is set", + )); + } + if !message.ends_with('\n') { + message.push('\n'); + } + } + Ok(()) +} + +fn encode_header( + version: u8, + block_size: u32, + min_update_index: u64, + max_update_index: u64, + hash_id: HashId, +) -> Result, Error> { + let header_len = header_size(version)?; + let mut out = vec![0u8; header_len]; + out[..4].copy_from_slice(b"REFT"); + out[4] = version; + + let mut be24 = [0u8; 3]; + put_be24(&mut be24, block_size); + out[5..8].copy_from_slice(&be24); + + let mut be64 = [0u8; 8]; + put_be64(&mut be64, min_update_index); + out[8..16].copy_from_slice(&be64); + put_be64(&mut be64, max_update_index); + out[16..24].copy_from_slice(&be64); + + if version == 2 { + let mut be32 = [0u8; 4]; + put_be32( + &mut be32, + match hash_id { + HashId::Sha1 => FORMAT_ID_SHA1, + HashId::Sha256 => FORMAT_ID_SHA256, + }, + ); + out[24..28].copy_from_slice(&be32); + } + + Ok(out) +} + +fn encode_footer( + version: u8, + header: &[u8], + ref_index_offset: u64, + obj_offset_field: u64, + obj_index_offset: u64, + log_offset: u64, + log_index_offset: u64, +) -> Result, Error> { + let footer_len = footer_size(version)?; + let mut out = vec![0u8; footer_len]; + let header_len = header_size(version)?; + out[..header_len].copy_from_slice(&header[..header_len]); + + let mut pos = header_len; + let mut be64 = [0u8; 8]; + + for value in [ + ref_index_offset, + obj_offset_field, + obj_index_offset, + log_offset, + log_index_offset, + ] { + put_be64(&mut be64, value); + out[pos..pos + 8].copy_from_slice(&be64); + pos += 8; + } + + let crc = crc32fast::hash(&out[..pos]); + let mut be32 = [0u8; 4]; + put_be32(&mut be32, crc); + out[pos..pos + 4].copy_from_slice(&be32); + + Ok(out) +} + +fn write_section( + records: &[Record], + typ: u8, + start_offset: u64, + first_block_header_off: usize, + opts: &WriteOptions, +) -> Result { + if records.is_empty() { + return Ok(SectionResult { + bytes: Vec::new(), + index_offset: 0, + }); + } + + let mut blocks = Vec::new(); + let mut index_records = Vec::new(); + + let mut block = Vec::::new(); + let mut restarts = Vec::::new(); + let mut entries = 0usize; + let mut last_key = Vec::new(); + + let mut header_off = first_block_header_off; + block.resize(header_off + 4, 0); + block[header_off] = typ; + + let block_limit = opts.block_size as usize; + + let flush_block = |blocks: &mut Vec>, + index_records: &mut Vec, + block: &mut Vec, + restarts: &mut Vec, + last_key: &Vec, + header_off: usize| + -> Result<(), Error> { + if restarts.is_empty() { + return Ok(()); + } + + for off in restarts.iter().copied() { + let mut be24 = [0u8; 3]; + put_be24(&mut be24, off); + block.extend_from_slice(&be24); + } + let restart_count = restarts.len() as u16; + block.extend_from_slice(&restart_count.to_be_bytes()); + + let block_len = block.len(); + let mut be24 = [0u8; 3]; + put_be24(&mut be24, block_len as u32); + block[header_off + 1..header_off + 4].copy_from_slice(&be24); + + let mut on_disk = if typ == constants::BLOCK_TYPE_LOG { + let split = header_off + 4; + let mut encoder = flate2::write::ZlibEncoder::new(Vec::new(), flate2::Compression::best()); + encoder.write_all(&block[split..]).map_err(Error::Io)?; + let compressed = encoder.finish().map_err(Error::Io)?; + + let mut out = Vec::with_capacity(split + compressed.len()); + out.extend_from_slice(&block[..split]); + out.extend_from_slice(&compressed); + out + } else { + block.clone() + }; + + if typ != constants::BLOCK_TYPE_LOG && !opts.unpadded && opts.block_size > 0 { + let target = opts.block_size as usize; + if on_disk.len() < target { + on_disk.resize(target, 0); + } + } + + let block_offset = start_offset + .checked_add(blocks.iter().map(|b| b.len() as u64).sum::()) + .ok_or(Error::VarintOverflow)?; + index_records.push(IndexRecord { + last_key: last_key.clone(), + offset: block_offset, + }); + + blocks.push(on_disk); + Ok(()) + }; + + for rec in records { + let key = rec.key(); + let val_type = rec.val_type(); + let prev_key = if entries % opts.restart_interval.max(1) as usize == 0 { + &[][..] + } else { + last_key.as_slice() + }; + let (key_bytes, is_restart) = encode_key(prev_key, &key, val_type)?; + let payload = rec.encode(opts.hash_id.size())?; + + let entry_len = key_bytes.len() + payload.len(); + let restart_growth = if is_restart { 1 } else { 0 }; + let reserved = 2 + 3 * (restarts.len() + restart_growth); + let would_len = block.len() + entry_len + reserved; + + let log_limit = (opts.block_size as usize).saturating_mul(2).max(256); + let limit = if typ == constants::BLOCK_TYPE_LOG { + log_limit + } else { + block_limit + }; + + if entries > 0 && would_len > limit { + flush_block( + &mut blocks, + &mut index_records, + &mut block, + &mut restarts, + &last_key, + header_off, + )?; + block.clear(); + header_off = 0; + block.resize(4, 0); + block[0] = typ; + restarts.clear(); + entries = 0; + last_key.clear(); + } + + let prev_key = if entries % opts.restart_interval.max(1) as usize == 0 { + &[][..] + } else { + last_key.as_slice() + }; + let (key_bytes, is_restart) = encode_key(prev_key, &key, val_type)?; + let payload = rec.encode(opts.hash_id.size())?; + let entry_len = key_bytes.len() + payload.len(); + let reserved = 2 + 3 * (restarts.len() + usize::from(is_restart)); + let limit = if typ == constants::BLOCK_TYPE_LOG { + log_limit + } else { + block_limit + }; + if block.len() + entry_len + reserved > limit { + return Err(Error::Api("record does not fit into configured block size")); + } + + if is_restart { + restarts.push(block.len() as u32); + } + block.extend_from_slice(&key_bytes); + block.extend_from_slice(&payload); + last_key = key; + entries += 1; + } + + if entries > 0 { + flush_block( + &mut blocks, + &mut index_records, + &mut block, + &mut restarts, + &last_key, + header_off, + )?; + } + + let mut bytes = Vec::new(); + for b in &blocks { + bytes.extend_from_slice(b); + } + + let needs_index = index_records.len() >= 4 || ((opts.unpadded || opts.block_size == 0) && index_records.len() > 1); + let mut index_offset = 0; + if needs_index { + index_offset = start_offset + bytes.len() as u64; + let idx_records = index_records.into_iter().map(Record::Index).collect::>(); + let idx_section = write_section(&idx_records, constants::BLOCK_TYPE_INDEX, index_offset, 0, opts)?; + bytes.extend_from_slice(&idx_section.bytes); + } + + Ok(SectionResult { bytes, index_offset }) +} + +#[cfg(test)] +mod tests { + use crate::{ + constants, + record::{LogValue, RefValue}, + }; + + use super::*; + + #[test] + fn write_and_read_roundtrip() { + let mut writer = Writer::new(WriteOptions { + block_size: 256, + ..Default::default() + }); + writer.set_limits(1, 10).expect("limits"); + writer + .add_ref(RefRecord { + refname: "HEAD".into(), + update_index: 2, + value: RefValue::Symref("refs/heads/main".into()), + }) + .expect("add ref"); + writer + .add_ref(RefRecord { + refname: "refs/heads/main".into(), + update_index: 2, + value: RefValue::Val1(vec![1; 20]), + }) + .expect("add ref"); + writer + .add_log(LogRecord { + refname: "refs/heads/main".into(), + update_index: 2, + value: LogValue::Update { + old_hash: vec![0; 20], + new_hash: vec![1; 20], + name: "a".into(), + email: "a@example.com".into(), + time: 0, + tz_offset: 0, + message: "msg".into(), + }, + }) + .expect("add log"); + + let table = writer.finish_into_table("mem").expect("table"); + + let mut refs = table.iter(constants::BLOCK_TYPE_REF).expect("ref iter"); + let mut count = 0; + while refs.next_record().expect("next").is_some() { + count += 1; + } + assert_eq!(count, 2); + + let mut logs = table.iter(constants::BLOCK_TYPE_LOG).expect("log iter"); + assert!(logs.next_record().expect("next").is_some()); + } + + #[test] + fn limits_are_enforced() { + let mut writer = Writer::new(WriteOptions::default()); + writer.set_limits(5, 5).expect("limits"); + let err = writer + .add_ref(RefRecord { + refname: "refs/heads/main".into(), + update_index: 4, + value: RefValue::Deletion, + }) + .expect_err("must fail"); + assert!(matches!(err, Error::Api(_))); + } + + #[test] + fn index_is_written_for_many_blocks() { + let mut writer = Writer::new(WriteOptions { + block_size: 96, + ..Default::default() + }); + writer.set_limits(1, 10).expect("limits"); + for idx in 0..32 { + writer + .add_ref(RefRecord { + refname: format!("refs/heads/{idx:02}"), + update_index: 2, + value: RefValue::Val1(vec![idx as u8; 20]), + }) + .expect("add"); + } + + let table = writer.finish_into_table("many").expect("table"); + assert!(table.ref_offsets.index_offset > 0); + } +} diff --git a/gix-reftable/tests/t061x_behavior.rs b/gix-reftable/tests/t061x_behavior.rs new file mode 100644 index 00000000000..b536f99fb44 --- /dev/null +++ b/gix-reftable/tests/t061x_behavior.rs @@ -0,0 +1,127 @@ +use std::{ + fs, + path::PathBuf, + time::{SystemTime, UNIX_EPOCH}, +}; + +use gix_reftable::{ + error::Error, + record::{LogRecord, LogValue, RefRecord, RefValue}, + stack::{Stack, StackOptions}, + writer::{WriteOptions, Writer}, +}; + +struct TempDir { + path: PathBuf, +} + +impl TempDir { + fn new() -> Self { + let stamp = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_nanos(); + let path = std::env::temp_dir().join(format!("t061x-reftable-{stamp}")); + fs::create_dir_all(&path).unwrap(); + Self { path } + } +} + +impl Drop for TempDir { + fn drop(&mut self) { + let _ = fs::remove_dir_all(&self.path); + } +} + +// Selected parity from t0610: init creates structures. +#[test] +fn t0610_init_creates_basic_structures() { + let tmp = TempDir::new(); + let _stack = Stack::open(&tmp.path, StackOptions::default()).unwrap(); + assert!(tmp.path.join("tables.list").is_file()); +} + +// Selected parity from t0610: corrupted tables list causes transaction/reload failure. +#[test] +fn t0610_corrupted_tables_list_fails_reload() { + let tmp = TempDir::new(); + let mut stack = Stack::open(&tmp.path, StackOptions::default()).unwrap(); + + let mut tx = stack.transaction(); + tx.add_ref(RefRecord { + refname: "refs/heads/main".into(), + update_index: 0, + value: RefValue::Val1(vec![1; 20]), + }); + tx.commit().unwrap(); + + fs::write(tmp.path.join("tables.list"), "garbage\n").unwrap(); + assert!(stack.reload().is_err()); +} + +// Selected parity from t0613: default write options use 4096-byte block size. +#[test] +fn t0613_default_write_options() { + let mut writer = Writer::new(WriteOptions::default()); + writer.set_limits(1, 1).unwrap(); + writer + .add_ref(RefRecord { + refname: "refs/heads/main".into(), + update_index: 1, + value: RefValue::Val1(vec![1; 20]), + }) + .unwrap(); + + let table = writer.finish_into_table("opts").unwrap(); + assert_eq!(table.block_size, 4096); +} + +// Selected parity from t0613: tiny block size with large log entry errors out. +#[test] +fn t0613_small_block_size_fails_large_log() { + let mut writer = Writer::new(WriteOptions { + block_size: 64, + ..Default::default() + }); + writer.set_limits(1, 1).unwrap(); + + let err = writer + .add_log(LogRecord { + refname: "refs/heads/main".into(), + update_index: 1, + value: LogValue::Update { + old_hash: vec![0; 20], + new_hash: vec![1; 20], + name: "n".into(), + email: "e@x".into(), + time: 1, + tz_offset: 0, + message: "x".repeat(500), + }, + }) + .and_then(|_| writer.finish().map(|_| ())) + .expect_err("must fail"); + assert!(matches!(err, Error::Api(_))); +} + +// Selected parity from t0614: fsck succeeds on healthy stack and fails on broken table names. +#[test] +fn t0614_fsck_behavior() { + let tmp = TempDir::new(); + let mut stack = Stack::open(&tmp.path, StackOptions::default()).unwrap(); + + let mut tx = stack.transaction(); + tx.add_ref(RefRecord { + refname: "refs/heads/main".into(), + update_index: 0, + value: RefValue::Val1(vec![1; 20]), + }); + tx.commit().unwrap(); + stack.fsck().unwrap(); + + let current = stack.table_names()[0].clone(); + let broken = "broken.ref".to_string(); + fs::rename(tmp.path.join(¤t), tmp.path.join(&broken)).unwrap(); + fs::write(tmp.path.join("tables.list"), format!("{broken}\n")).unwrap(); + + let mut stack = Stack::open(&tmp.path, StackOptions::default()).unwrap(); + assert!(stack.reload().is_ok()); + assert!(stack.fsck().is_err()); +} diff --git a/gix-reftable/tests/u_reftable_basics.rs b/gix-reftable/tests/u_reftable_basics.rs new file mode 100644 index 00000000000..aec637ba820 --- /dev/null +++ b/gix-reftable/tests/u_reftable_basics.rs @@ -0,0 +1,29 @@ +use gix_reftable::basics::{ + common_prefix_size, get_be16, get_be24, get_be32, get_be64, put_be16, put_be24, put_be32, put_be64, +}; + +// Upstream mapping: test_reftable_basics__common_prefix_size +#[test] +fn common_prefix() { + assert_eq!(common_prefix_size(b"refs/heads/a", b"refs/heads/b"), 11); +} + +// Upstream mapping: put_get_be64/be32/be24/be16 tests +#[test] +fn big_endian_roundtrip() { + let mut be64 = [0u8; 8]; + put_be64(&mut be64, 0x0102_0304_0506_0708); + assert_eq!(get_be64(&be64), 0x0102_0304_0506_0708); + + let mut be32 = [0u8; 4]; + put_be32(&mut be32, 0x0102_0304); + assert_eq!(get_be32(&be32), 0x0102_0304); + + let mut be24 = [0u8; 3]; + put_be24(&mut be24, 0x01_02_03); + assert_eq!(get_be24(&be24), 0x01_02_03); + + let mut be16 = [0u8; 2]; + put_be16(&mut be16, 0x0102); + assert_eq!(get_be16(&be16), 0x0102); +} diff --git a/gix-reftable/tests/u_reftable_block.rs b/gix-reftable/tests/u_reftable_block.rs new file mode 100644 index 00000000000..702171b4e03 --- /dev/null +++ b/gix-reftable/tests/u_reftable_block.rs @@ -0,0 +1,74 @@ +use gix_reftable::{ + constants, + record::{LogRecord, LogValue, RefRecord, RefValue}, + table::Table, + writer::{WriteOptions, Writer}, +}; + +fn sample_table() -> Table { + let mut writer = Writer::new(WriteOptions { + block_size: 128, + ..Default::default() + }); + writer.set_limits(1, 10).unwrap(); + writer + .add_ref(RefRecord { + refname: "HEAD".into(), + update_index: 2, + value: RefValue::Symref("refs/heads/main".into()), + }) + .unwrap(); + writer + .add_ref(RefRecord { + refname: "refs/heads/main".into(), + update_index: 2, + value: RefValue::Val1(vec![1; 20]), + }) + .unwrap(); + writer + .add_log(LogRecord { + refname: "refs/heads/main".into(), + update_index: 2, + value: LogValue::Update { + old_hash: vec![0; 20], + new_hash: vec![1; 20], + name: "n".into(), + email: "e@x".into(), + time: 1, + tz_offset: 0, + message: "m".into(), + }, + }) + .unwrap(); + writer.finish_into_table("sample").unwrap() +} + +// Upstream mapping: test_reftable_block__read_write + iterator +#[test] +fn read_write_and_iterate_block() { + let table = sample_table(); + let block = table + .init_block(0, constants::BLOCK_TYPE_REF) + .expect("init block") + .expect("block present"); + let mut iter = gix_reftable::block::BlockIter::new(block); + + let mut count = 0; + while iter.next_record().expect("next").is_some() { + count += 1; + } + assert!(count >= 2); +} + +// Upstream mapping: test_reftable_block__log_read_write +#[test] +fn log_block_is_readable() { + let table = sample_table(); + let log_off = table.log_offsets.offset; + let block = table + .init_block(log_off, constants::BLOCK_TYPE_LOG) + .expect("init block") + .expect("log block"); + let mut iter = gix_reftable::block::BlockIter::new(block); + assert!(iter.next_record().expect("next log").is_some()); +} diff --git a/gix-reftable/tests/u_reftable_merged.rs b/gix-reftable/tests/u_reftable_merged.rs new file mode 100644 index 00000000000..b831092080d --- /dev/null +++ b/gix-reftable/tests/u_reftable_merged.rs @@ -0,0 +1,49 @@ +use gix_reftable::{ + merged::MergedTable, + record::{RefRecord, RefValue}, + writer::{WriteOptions, Writer}, +}; + +fn table_with_value(value: u8, update_index: u64) -> gix_reftable::table::Table { + let mut writer = Writer::new(WriteOptions::default()); + writer.set_limits(update_index, update_index).unwrap(); + writer + .add_ref(RefRecord { + refname: "refs/heads/main".into(), + update_index, + value: RefValue::Val1(vec![value; 20]), + }) + .unwrap(); + writer.finish_into_table("m").unwrap() +} + +// Upstream mapping: test_reftable_merged__single_record + refs +#[test] +fn merged_prefers_newer_table_on_duplicate_keys() { + let old = table_with_value(1, 1); + let new = table_with_value(2, 2); + let merged = MergedTable::new(vec![old, new]).unwrap(); + + let mut iter = merged.ref_iter().unwrap(); + let rec = iter.next_record().unwrap().unwrap(); + let gix_reftable::record::Record::Ref(rec) = rec else { + panic!("expected ref"); + }; + let RefValue::Val1(id) = rec.value else { + panic!("expected val1"); + }; + assert_eq!(id, vec![2; 20]); +} + +// Upstream mapping: test_reftable_merged__seek_multiple_times +#[test] +fn merged_seek_multiple_times() { + let t = table_with_value(1, 1); + let merged = MergedTable::new(vec![t.clone(), t]).unwrap(); + + let mut iter = merged.ref_iter().unwrap(); + iter.seek_key(b"refs/heads/main").unwrap(); + assert!(iter.next_record().unwrap().is_some()); + iter.seek_key(b"refs/heads/main").unwrap(); + assert!(iter.next_record().unwrap().is_some()); +} diff --git a/gix-reftable/tests/u_reftable_pq.rs b/gix-reftable/tests/u_reftable_pq.rs new file mode 100644 index 00000000000..22ae0f676e2 --- /dev/null +++ b/gix-reftable/tests/u_reftable_pq.rs @@ -0,0 +1,28 @@ +use gix_reftable::{ + pq::{MergedIterPQueue, PqEntry}, + record::{Record, RefRecord, RefValue}, +}; + +fn entry(name: &str, idx: usize) -> PqEntry { + PqEntry { + index: idx, + record: Record::Ref(RefRecord { + refname: name.into(), + update_index: idx as u64, + value: RefValue::Deletion, + }), + } +} + +// Upstream mapping: test_reftable_pq__record + merged_iter_pqueue_top +#[test] +fn pq_record_order() { + let mut pq = MergedIterPQueue::default(); + pq.push(entry("refs/heads/b", 0)); + pq.push(entry("refs/heads/a", 0)); + pq.push(entry("refs/heads/a", 1)); + + assert_eq!(pq.pop().unwrap().index, 1); + assert_eq!(pq.pop().unwrap().record.key(), b"refs/heads/a".to_vec()); + assert_eq!(pq.pop().unwrap().record.key(), b"refs/heads/b".to_vec()); +} diff --git a/gix-reftable/tests/u_reftable_readwrite.rs b/gix-reftable/tests/u_reftable_readwrite.rs new file mode 100644 index 00000000000..5f21af27326 --- /dev/null +++ b/gix-reftable/tests/u_reftable_readwrite.rs @@ -0,0 +1,58 @@ +use gix_reftable::{ + error::Error, + record::{LogRecord, LogValue, RefRecord, RefValue}, + writer::{WriteOptions, Writer}, +}; + +// Upstream mapping: test_reftable_readwrite__write_empty_table +#[test] +fn write_empty_table() { + let mut writer = Writer::new(WriteOptions::default()); + writer.set_limits(1, 1).unwrap(); + let bytes = writer.finish().unwrap(); + assert!(!bytes.is_empty()); +} + +// Upstream mapping: test_reftable_readwrite__log_write_limits +#[test] +fn log_write_limits() { + let mut writer = Writer::new(WriteOptions::default()); + writer.set_limits(1, 1).unwrap(); + + let err = writer + .add_log(LogRecord { + refname: "refs/heads/main".into(), + update_index: 3, + value: LogValue::Deletion, + }) + .expect_err("out of range"); + assert!(matches!(err, Error::Api(_))); +} + +// Upstream mapping: test_reftable_readwrite__table_read_write_sequential +#[test] +fn table_read_write_sequential() { + let mut writer = Writer::new(WriteOptions { + block_size: 128, + ..Default::default() + }); + writer.set_limits(1, 10).unwrap(); + + for idx in 0..20u8 { + writer + .add_ref(RefRecord { + refname: format!("refs/heads/{idx:02}"), + update_index: 2, + value: RefValue::Val1(vec![idx; 20]), + }) + .unwrap(); + } + + let table = writer.finish_into_table("seq").unwrap(); + let mut iter = table.iter(gix_reftable::constants::BLOCK_TYPE_REF).unwrap(); + let mut count = 0; + while iter.next_record().unwrap().is_some() { + count += 1; + } + assert_eq!(count, 20); +} diff --git a/gix-reftable/tests/u_reftable_record.rs b/gix-reftable/tests/u_reftable_record.rs new file mode 100644 index 00000000000..48a575fc6f0 --- /dev/null +++ b/gix-reftable/tests/u_reftable_record.rs @@ -0,0 +1,100 @@ +use gix_reftable::{ + basics::{decode_varint, encode_varint}, + constants, + record::{decode_key, encode_key, IndexRecord, LogRecord, LogValue, ObjRecord, Record, RefRecord, RefValue}, +}; + +fn hash(seed: u8, len: usize) -> Vec { + (0..len).map(|i| seed.wrapping_add(i as u8)).collect() +} + +// Upstream mapping: test_reftable_record__varint_roundtrip +#[test] +fn varint_roundtrip() { + let mut buf = [0u8; 10]; + for value in [0, 1, 27, 127, 128, 257, 4096, u64::MAX] { + let n = encode_varint(value, &mut buf); + let (decoded, consumed) = decode_varint(&buf[..n]).expect("decode"); + assert_eq!(consumed, n); + assert_eq!(decoded, value); + } +} + +// Upstream mapping: test_reftable_record__key_roundtrip +#[test] +fn key_roundtrip() { + let (encoded, _restart) = encode_key(b"refs/heads/master", b"refs/tags/v1", 6).expect("encode"); + let mut key = b"refs/heads/master".to_vec(); + let (_n, extra) = decode_key(&mut key, &encoded).expect("decode"); + assert_eq!(extra, 6); + assert_eq!(key, b"refs/tags/v1"); +} + +// Upstream mapping: test_reftable_record__ref_record_roundtrip +#[test] +fn ref_record_roundtrip() { + let rec = Record::Ref(RefRecord { + refname: "refs/heads/main".into(), + update_index: 42, + value: RefValue::Val2 { + value: hash(1, 20), + target_value: hash(2, 20), + }, + }); + let payload = rec.encode(20).expect("encode"); + let out = Record::decode(constants::BLOCK_TYPE_REF, &rec.key(), rec.val_type(), &payload, 20).expect("decode"); + assert_eq!(rec, out); +} + +// Upstream mapping: test_reftable_record__log_record_roundtrip +#[test] +fn log_record_roundtrip() { + let rec = Record::Log(LogRecord { + refname: "refs/heads/main".into(), + update_index: 9, + value: LogValue::Update { + old_hash: hash(1, 20), + new_hash: hash(2, 20), + name: "n".into(), + email: "e@x".into(), + time: 123, + tz_offset: 100, + message: "m".into(), + }, + }); + let payload = rec.encode(20).expect("encode"); + let out = Record::decode(constants::BLOCK_TYPE_LOG, &rec.key(), rec.val_type(), &payload, 20).expect("decode"); + assert_eq!(rec, out); +} + +// Upstream mapping: test_reftable_record__obj_record_roundtrip + index_record_roundtrip +#[test] +fn obj_and_index_roundtrip() { + let obj = Record::Obj(ObjRecord { + hash_prefix: vec![1, 2, 3, 4], + offsets: vec![1, 5, 9], + }); + let obj_out = Record::decode( + constants::BLOCK_TYPE_OBJ, + &obj.key(), + obj.val_type(), + &obj.encode(20).unwrap(), + 20, + ) + .expect("obj decode"); + assert_eq!(obj, obj_out); + + let idx = Record::Index(IndexRecord { + last_key: b"refs/heads/main".to_vec(), + offset: 77, + }); + let idx_out = Record::decode( + constants::BLOCK_TYPE_INDEX, + &idx.key(), + idx.val_type(), + &idx.encode(20).unwrap(), + 20, + ) + .expect("index decode"); + assert_eq!(idx, idx_out); +} diff --git a/gix-reftable/tests/u_reftable_stack.rs b/gix-reftable/tests/u_reftable_stack.rs new file mode 100644 index 00000000000..7f4c234a7ed --- /dev/null +++ b/gix-reftable/tests/u_reftable_stack.rs @@ -0,0 +1,89 @@ +use std::{ + fs, + path::PathBuf, + time::{SystemTime, UNIX_EPOCH}, +}; + +use gix_reftable::{ + record::{RefRecord, RefValue}, + stack::{Stack, StackOptions}, + writer::WriteOptions, +}; + +struct TempDir { + path: PathBuf, +} + +impl TempDir { + fn new() -> Self { + let stamp = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_nanos(); + let path = std::env::temp_dir().join(format!("u-reftable-stack-{stamp}")); + fs::create_dir_all(&path).unwrap(); + Self { path } + } +} + +impl Drop for TempDir { + fn drop(&mut self) { + let _ = fs::remove_dir_all(&self.path); + } +} + +// Upstream mapping: test_reftable_stack__add_one + transaction_api +#[test] +fn add_one_transaction() { + let tmp = TempDir::new(); + let mut stack = Stack::open( + &tmp.path, + StackOptions { + disable_auto_compact: true, + write_options: WriteOptions { + block_size: 128, + ..Default::default() + }, + ..Default::default() + }, + ) + .unwrap(); + + let mut tx = stack.transaction(); + tx.add_ref(RefRecord { + refname: "refs/heads/main".into(), + update_index: 0, + value: RefValue::Val1(vec![1; 20]), + }); + tx.commit().unwrap(); + + assert_eq!(stack.tables().len(), 1); + stack.fsck().unwrap(); +} + +// Upstream mapping: test_reftable_stack__auto_compaction +#[test] +fn auto_compaction() { + let tmp = TempDir::new(); + let mut stack = Stack::open( + &tmp.path, + StackOptions { + auto_compaction_factor: 2, + write_options: WriteOptions { + block_size: 96, + ..Default::default() + }, + ..Default::default() + }, + ) + .unwrap(); + + for idx in 0..3u8 { + let mut tx = stack.transaction(); + tx.add_ref(RefRecord { + refname: format!("refs/heads/{idx}"), + update_index: 0, + value: RefValue::Val1(vec![idx; 20]), + }); + tx.commit().unwrap(); + } + + assert!(stack.tables().len() <= 2); +} diff --git a/gix-reftable/tests/u_reftable_table.rs b/gix-reftable/tests/u_reftable_table.rs new file mode 100644 index 00000000000..212117d9f9a --- /dev/null +++ b/gix-reftable/tests/u_reftable_table.rs @@ -0,0 +1,59 @@ +use gix_reftable::{ + constants, + record::{RefRecord, RefValue}, + writer::{WriteOptions, Writer}, +}; + +fn table_with_refs() -> gix_reftable::table::Table { + let mut writer = Writer::new(WriteOptions { + block_size: 96, + ..Default::default() + }); + writer.set_limits(1, 10).unwrap(); + for i in 0..16u8 { + writer + .add_ref(RefRecord { + refname: format!("refs/heads/{i:02}"), + update_index: 2, + value: RefValue::Val1(vec![i; 20]), + }) + .unwrap(); + } + writer.finish_into_table("t").unwrap() +} + +// Upstream mapping: test_reftable_table__seek_once +#[test] +fn seek_once() { + let table = table_with_refs(); + let mut iter = table.iter(constants::BLOCK_TYPE_REF).unwrap(); + iter.seek_key(b"refs/heads/08").unwrap(); + let rec = iter.next_record().unwrap().unwrap(); + assert_eq!(rec.key(), b"refs/heads/08"); +} + +// Upstream mapping: test_reftable_table__reseek +#[test] +fn reseek() { + let table = table_with_refs(); + let mut iter = table.iter(constants::BLOCK_TYPE_REF).unwrap(); + iter.seek_key(b"refs/heads/10").unwrap(); + let rec = iter.next_record().unwrap().unwrap(); + assert_eq!(rec.key(), b"refs/heads/10"); + + iter.seek_key(b"refs/heads/03").unwrap(); + let rec = iter.next_record().unwrap().unwrap(); + assert_eq!(rec.key(), b"refs/heads/03"); +} + +// Upstream mapping: test_reftable_table__block_iterator +#[test] +fn block_iterator_progresses() { + let table = table_with_refs(); + let mut iter = table.iter(constants::BLOCK_TYPE_REF).unwrap(); + let mut count = 0; + while iter.next_record().unwrap().is_some() { + count += 1; + } + assert_eq!(count, 16); +} diff --git a/gix-reftable/tests/u_reftable_tree.rs b/gix-reftable/tests/u_reftable_tree.rs new file mode 100644 index 00000000000..05aad1509c2 --- /dev/null +++ b/gix-reftable/tests/u_reftable_tree.rs @@ -0,0 +1,29 @@ +use gix_reftable::tree::{infix_walk, tree_free, tree_insert, tree_search, TreeNode}; + +fn cmp(a: &i32, b: &i32) -> i32 { + a.cmp(b) as i32 +} + +// Upstream mapping: test_reftable_tree__tree_search +#[test] +fn tree_search_case() { + let mut root: Option>> = None; + tree_insert(&mut root, 2, &cmp); + tree_insert(&mut root, 1, &cmp); + tree_insert(&mut root, 3, &cmp); + assert!(tree_search(&root, &1, &cmp).is_some()); + assert!(tree_search(&root, &4, &cmp).is_none()); +} + +// Upstream mapping: test_reftable_tree__infix_walk +#[test] +fn infix_walk_case() { + let mut root: Option>> = None; + for k in [2, 1, 3] { + tree_insert(&mut root, k, &cmp); + } + let mut out = Vec::new(); + infix_walk(&root, &mut |k| out.push(*k)); + assert_eq!(out, vec![1, 2, 3]); + tree_free(&mut root); +}