diff --git a/gitoxide-core/src/index/checkout.rs b/gitoxide-core/src/index/checkout.rs index 35320afaf5d..3008be042ce 100644 --- a/gitoxide-core/src/index/checkout.rs +++ b/gitoxide-core/src/index/checkout.rs @@ -190,7 +190,11 @@ where }; buf.clear(); // …but write nothing - Ok(Some(gix::objs::Data { kind, data: buf })) + Ok(Some(gix::objs::Data { + kind, + hash_kind: id.kind(), + data: buf, + })) } else { self.db.try_find(id, buf) } @@ -201,10 +205,11 @@ where struct Empty; impl gix::objs::Find for Empty { - fn try_find<'a>(&self, _id: &gix::oid, buffer: &'a mut Vec) -> Result>, Error> { + fn try_find<'a>(&self, id: &gix::oid, buffer: &'a mut Vec) -> Result>, Error> { buffer.clear(); Ok(Some(gix::objs::Data { kind: gix::object::Kind::Blob, + hash_kind: id.kind(), data: buffer, })) } diff --git a/gix-diff/tests/diff/main.rs b/gix-diff/tests/diff/main.rs index 667f1daf8d3..74d2e0797c9 100644 --- a/gix-diff/tests/diff/main.rs +++ b/gix-diff/tests/diff/main.rs @@ -41,6 +41,7 @@ mod util { buffer.extend_from_slice(data); Ok(Some(gix_object::Data { kind: gix_object::Kind::Blob, + hash_kind: id.kind(), data: buffer.as_slice(), })) } diff --git a/gix-diff/tests/diff/tree_with_rewrites.rs b/gix-diff/tests/diff/tree_with_rewrites.rs index 9bfd57d4fd3..383072f12a7 100644 --- a/gix-diff/tests/diff/tree_with_rewrites.rs +++ b/gix-diff/tests/diff/tree_with_rewrites.rs @@ -107,8 +107,8 @@ fn empty_to_new_tree_without_rename_tracking() -> crate::Result { { let (lhs, rhs, mut cache, odb) = repo_with_trees(None, "c1 - initial")?; let err = gix_diff::tree_with_rewrites( - TreeRefIter::from_bytes(&lhs), - TreeRefIter::from_bytes(&rhs), + TreeRefIter::from_bytes(&lhs, gix_testtools::hash_kind_from_env().unwrap_or_default()), + TreeRefIter::from_bytes(&rhs, gix_testtools::hash_kind_from_env().unwrap_or_default()), &mut cache, &mut Default::default(), &odb, @@ -1843,8 +1843,11 @@ mod util { let (from, to, mut cache, odb) = repo_with_trees(lhs, rhs)?; let mut out = Vec::new(); let rewrites_info = gix_diff::tree_with_rewrites( - TreeRefIter::from_bytes(&from), - TreeRefIter::from_bytes(&to), + // TODO(SHA256): + // Get hash from env. This requires updating the snapshot at the top of the file as + // well. + TreeRefIter::from_bytes(&from, gix_hash::Kind::Sha1), + TreeRefIter::from_bytes(&to, gix_hash::Kind::Sha1), &mut cache, &mut Default::default(), &odb, diff --git a/gix-merge/src/tree/function.rs b/gix-merge/src/tree/function.rs index f3ea1f2f1fb..f76acd11366 100644 --- a/gix-merge/src/tree/function.rs +++ b/gix-merge/src/tree/function.rs @@ -77,7 +77,7 @@ where let (mut base_buf, mut side_buf) = (Vec::new(), Vec::new()); let ancestor_tree = objects.find_tree(base_tree, &mut base_buf)?; let mut editor = tree::Editor::new(ancestor_tree.to_owned(), objects, base_tree.kind()); - let ancestor_tree = gix_object::TreeRefIter::from_bytes(&base_buf); + let ancestor_tree = gix_object::TreeRefIter::from_bytes(&base_buf, base_tree.kind()); let tree_conflicts = options.tree_conflicts; let mut our_changes = Vec::new(); diff --git a/gix-merge/tests/merge/blob/mod.rs b/gix-merge/tests/merge/blob/mod.rs index 580ef7dfeb4..9774739b328 100644 --- a/gix-merge/tests/merge/blob/mod.rs +++ b/gix-merge/tests/merge/blob/mod.rs @@ -33,6 +33,7 @@ mod util { buffer.extend_from_slice(data); Ok(Some(gix_object::Data { kind: gix_object::Kind::Blob, + hash_kind: id.kind(), data: buffer.as_slice(), })) } diff --git a/gix-object/benches/decode_objects.rs b/gix-object/benches/decode_objects.rs index 93180cfef43..836ac8ea288 100644 --- a/gix-object/benches/decode_objects.rs +++ b/gix-object/benches/decode_objects.rs @@ -21,10 +21,21 @@ fn parse_tag(c: &mut Criterion) { fn parse_tree(c: &mut Criterion) { c.bench_function("TreeRef()", |b| { - b.iter(|| black_box(gix_object::TreeRef::from_bytes(TREE)).unwrap()); + b.iter(|| { + black_box(gix_object::TreeRef::from_bytes( + TREE, + gix_testtools::hash_kind_from_env().unwrap_or_default(), + )) + .unwrap() + }); }); c.bench_function("TreeRefIter()", |b| { - b.iter(|| black_box(gix_object::TreeRefIter::from_bytes(TREE).count())); + b.iter(|| { + black_box( + gix_object::TreeRefIter::from_bytes(TREE, gix_testtools::hash_kind_from_env().unwrap_or_default()) + .count(), + ) + }); }); } diff --git a/gix-object/benches/edit_tree.rs b/gix-object/benches/edit_tree.rs index 067065d5892..c9915e4ca73 100644 --- a/gix-object/benches/edit_tree.rs +++ b/gix-object/benches/edit_tree.rs @@ -173,6 +173,7 @@ impl gix_object::Find for StorageOdb { tree.write_to(buffer).expect("valid trees can always be serialized"); Ok(Some(gix_object::Data { kind: gix_object::Kind::Tree, + hash_kind: id.kind(), data: &*buffer, })) } diff --git a/gix-object/fuzz/Cargo.toml b/gix-object/fuzz/Cargo.toml index 91539cf79cf..b4998a95020 100644 --- a/gix-object/fuzz/Cargo.toml +++ b/gix-object/fuzz/Cargo.toml @@ -10,6 +10,10 @@ cargo-fuzz = true [dependencies] libfuzzer-sys = "0.4" +[dependencies.gix-hash] +path = "../../gix-hash" +features = ["sha1", "sha256"] + [dependencies.gix-object] path = ".." features = ["sha1"] diff --git a/gix-object/fuzz/fuzz_targets/fuzz_tree.rs b/gix-object/fuzz/fuzz_targets/fuzz_tree.rs index 3f7134ca429..af8357e28f4 100644 --- a/gix-object/fuzz/fuzz_targets/fuzz_tree.rs +++ b/gix-object/fuzz/fuzz_targets/fuzz_tree.rs @@ -4,5 +4,6 @@ use libfuzzer_sys::fuzz_target; use std::hint::black_box; fuzz_target!(|tree: &[u8]| { - let _ = black_box(gix_object::TreeRef::from_bytes(tree)); + let _ = black_box(gix_object::TreeRef::from_bytes(tree, gix_hash::Kind::Sha1)); + let _ = black_box(gix_object::TreeRef::from_bytes(tree, gix_hash::Kind::Sha256)); }); diff --git a/gix-object/src/data.rs b/gix-object/src/data.rs index 1f733263811..f8c3692f6b3 100644 --- a/gix-object/src/data.rs +++ b/gix-object/src/data.rs @@ -3,9 +3,9 @@ use crate::{BlobRef, CommitRef, CommitRefIter, Data, Kind, ObjectRef, TagRef, TagRefIter, TreeRef, TreeRefIter}; impl<'a> Data<'a> { - /// Constructs a new data object from `kind` and `data`. - pub fn new(kind: Kind, data: &'a [u8]) -> Data<'a> { - Data { kind, data } + /// Constructs a new data object from `kind`, `hash_kind` and `data`. + pub fn new(kind: Kind, hash_kind: gix_hash::Kind, data: &'a [u8]) -> Data<'a> { + Data { kind, hash_kind, data } } /// Decodes the data in the backing slice into a [`ObjectRef`], allowing to access all of its data /// conveniently. The cost of parsing an object is negligible. @@ -14,7 +14,7 @@ impl<'a> Data<'a> { /// using [`crate::ObjectRef::into_owned()`]. pub fn decode(&self) -> Result, crate::decode::Error> { Ok(match self.kind { - Kind::Tree => ObjectRef::Tree(TreeRef::from_bytes(self.data)?), + Kind::Tree => ObjectRef::Tree(TreeRef::from_bytes(self.data, self.hash_kind)?), Kind::Blob => ObjectRef::Blob(BlobRef { data: self.data }), Kind::Commit => ObjectRef::Commit(CommitRef::from_bytes(self.data)?), Kind::Tag => ObjectRef::Tag(TagRef::from_bytes(self.data)?), @@ -25,7 +25,7 @@ impl<'a> Data<'a> { /// `None` if this is not a tree object. pub fn try_into_tree_iter(self) -> Option> { match self.kind { - Kind::Tree => Some(TreeRefIter::from_bytes(self.data)), + Kind::Tree => Some(TreeRefIter::from_bytes(self.data, self.hash_kind)), _ => None, } } diff --git a/gix-object/src/lib.rs b/gix-object/src/lib.rs index 057e3873771..241b4cbd34c 100644 --- a/gix-object/src/lib.rs +++ b/gix-object/src/lib.rs @@ -4,7 +4,7 @@ //! ## Decode Borrowed Objects //! //! ``` -//! let object = gix_object::ObjectRef::from_loose(b"blob 5\0hello").unwrap(); +//! let object = gix_object::ObjectRef::from_loose(b"blob 5\0hello", gix_hash::Kind::Sha1).unwrap(); //! let blob = object.as_blob().unwrap(); //! //! assert_eq!(blob.data, b"hello"); @@ -16,7 +16,7 @@ //! ``` //! use gix_object::WriteTo; //! -//! let object = gix_object::ObjectRef::from_loose(b"blob 5\0hello") +//! let object = gix_object::ObjectRef::from_loose(b"blob 5\0hello", gix_hash::Kind::Sha1) //! .unwrap() //! .into_owned() //! .unwrap(); @@ -263,6 +263,8 @@ pub struct TreeRef<'a> { /// A directory snapshot containing files (blobs), directories (trees) and submodules (commits), lazily evaluated. #[derive(Default, PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)] pub struct TreeRefIter<'a> { + /// The hash kind to use in this tree. + hash_kind: gix_hash::Kind, /// The directories and files contained in this tree. data: &'a [u8], } @@ -289,6 +291,8 @@ impl Tree { pub struct Data<'a> { /// kind of object pub kind: Kind, + /// The hash kind to use for this piece of data. + pub hash_kind: gix_hash::Kind, /// decoded, decompressed data, owned by a backing store. pub data: &'a [u8], } diff --git a/gix-object/src/object/mod.rs b/gix-object/src/object/mod.rs index b785b791c85..a0c0b36f1bb 100644 --- a/gix-object/src/object/mod.rs +++ b/gix-object/src/object/mod.rs @@ -191,7 +191,7 @@ pub enum LooseDecodeError { impl<'a> ObjectRef<'a> { /// Deserialize an object from a loose serialisation - pub fn from_loose(data: &'a [u8]) -> Result, LooseDecodeError> { + pub fn from_loose(data: &'a [u8], hash_kind: gix_hash::Kind) -> Result, LooseDecodeError> { let (kind, size, offset) = loose_header(data)?; let body = &data[offset..] @@ -200,13 +200,17 @@ impl<'a> ObjectRef<'a> { message: "object data was shorter than its size declared in the header", })?; - Ok(Self::from_bytes(kind, body)?) + Ok(Self::from_bytes(kind, hash_kind, body)?) } /// Deserialize an object of `kind` from the given `data`. - pub fn from_bytes(kind: Kind, data: &'a [u8]) -> Result, crate::decode::Error> { + pub fn from_bytes( + kind: Kind, + hash_kind: gix_hash::Kind, + data: &'a [u8], + ) -> Result, crate::decode::Error> { Ok(match kind { - Kind::Tree => ObjectRef::Tree(TreeRef::from_bytes(data)?), + Kind::Tree => ObjectRef::Tree(TreeRef::from_bytes(data, hash_kind)?), Kind::Blob => ObjectRef::Blob(BlobRef { data }), Kind::Commit => ObjectRef::Commit(CommitRef::from_bytes(data)?), Kind::Tag => ObjectRef::Tag(TagRef::from_bytes(data)?), diff --git a/gix-object/src/tree/ref_iter.rs b/gix-object/src/tree/ref_iter.rs index 12ebb365ea8..316b0b7694d 100644 --- a/gix-object/src/tree/ref_iter.rs +++ b/gix-object/src/tree/ref_iter.rs @@ -39,7 +39,7 @@ where return ControlFlow::Break(None); }; - let Some(entry) = TreeRefIter::from_bytes(tree.data) + let Some(entry) = TreeRefIter::from_bytes(tree.data, tree.hash_kind) .filter_map(Result::ok) .find(|entry| component.eq(entry.filename)) else { @@ -55,8 +55,8 @@ where impl<'a> TreeRefIter<'a> { /// Instantiate an iterator from the given tree data. - pub fn from_bytes(data: &'a [u8]) -> TreeRefIter<'a> { - TreeRefIter { data } + pub fn from_bytes(data: &'a [u8], hash_kind: gix_hash::Kind) -> TreeRefIter<'a> { + TreeRefIter { data, hash_kind } } /// Follow a sequence of `path` components starting from this instance, and look them up in `odb` one by one using `buffer` @@ -81,7 +81,7 @@ impl<'a> TreeRefIter<'a> { buffer.extend_from_slice(self.data); let mut iter = path.into_iter().peekable(); - let mut data = crate::Data::new(crate::Kind::Tree, buffer); + let mut data = crate::Data::new(crate::Kind::Tree, self.hash_kind, buffer); loop { data = match next_entry(&mut iter, data) { @@ -123,11 +123,14 @@ impl<'a> TreeRefIter<'a> { impl<'a> TreeRef<'a> { /// Deserialize a Tree from `data`. - pub fn from_bytes(mut data: &'a [u8]) -> Result, crate::decode::Error> { - let input = &mut data; - match decode::tree.parse_next(input) { + pub fn from_bytes(data: &'a [u8], hash_kind: gix_hash::Kind) -> Result, crate::decode::Error> { + let state = decode::State { + hash_len: hash_kind.len_in_bytes(), + }; + let mut input = decode::Stream { input: data, state }; + match decode::tree.parse_next(&mut input) { Ok(tag) => Ok(tag), - Err(err) => Err(crate::decode::Error::with_err(err, input)), + Err(err) => Err(crate::decode::Error::with_err(err, &input)), } } @@ -190,7 +193,7 @@ impl<'a> Iterator for TreeRefIter<'a> { if self.data.is_empty() { return None; } - match decode::fast_entry(self.data) { + match decode::fast_entry(self.data, self.hash_kind.len_in_bytes()) { Some((data_left, entry)) => { self.data = data_left; Some(Ok(entry)) @@ -218,47 +221,52 @@ impl<'a> TryFrom<&'a [u8]> for tree::EntryMode { mod decode { use bstr::ByteSlice; - use winnow::{error::ParserError, prelude::*}; + use winnow::{error::ParserError, prelude::*, Stateful}; use crate::{tree, tree::EntryRef, TreeRef}; - pub fn fast_entry(i: &[u8]) -> Option<(&[u8], EntryRef<'_>)> { + pub fn fast_entry(i: &[u8], hash_len: usize) -> Option<(&[u8], EntryRef<'_>)> { let (mode, i) = tree::EntryMode::extract_from_bytes(i)?; let (filename, i) = i.split_at(i.find_byte(0)?); let i = &i[1..]; - const HASH_LEN_FIXME: usize = 20; // TODO(SHA256): know actual/desired length or we may overshoot let (oid, i) = match i.len() { - len if len < HASH_LEN_FIXME => return None, - _ => i.split_at(20), + len if len < hash_len => return None, + _ => i.split_at(hash_len), }; Some(( i, EntryRef { mode, filename: filename.as_bstr(), - oid: gix_hash::oid::try_from_bytes(oid).expect("we counted exactly 20 bytes"), + oid: gix_hash::oid::try_from_bytes(oid) + .unwrap_or_else(|_| panic!("we counted exactly {hash_len} bytes")), }, )) } - pub fn tree<'a, E: ParserError<&'a [u8]>>(i: &mut &'a [u8]) -> ModalResult, E> { - let mut i = &**i; + #[derive(Debug)] + pub struct State { + pub hash_len: usize, + } + + pub type Stream<'is> = Stateful<&'is [u8], State>; + + pub fn tree<'a, E: ParserError<&'a [u8]>>(stream: &mut Stream<'a>) -> ModalResult, E> { + let mut i = stream.input; // Calculate an estimate of the amount of entries to reduce // the amount of allocations necessary. // Note that this assumes that we want speed over fitting Vecs, this is a trade-off. - // TODO(SHA256): know actual/desired length for reduced overallocation - const HASH_LEN_FIXME: usize = 20; const AVERAGE_FILENAME_LEN: usize = 24; const AVERAGE_MODE_LEN: usize = 6; const ENTRY_DELIMITER_LEN: usize = 2; // space + trailing zero const AVERAGE_TREE_ENTRIES: usize = 16 * 2; // prevent overallocation beyond what's meaningful or what could be dangerous - let average_entry_len = ENTRY_DELIMITER_LEN + HASH_LEN_FIXME + AVERAGE_MODE_LEN + AVERAGE_FILENAME_LEN; + let average_entry_len = ENTRY_DELIMITER_LEN + stream.state.hash_len + AVERAGE_MODE_LEN + AVERAGE_FILENAME_LEN; let upper_bound = i.len() / average_entry_len; let mut out = Vec::with_capacity(upper_bound.min(AVERAGE_TREE_ENTRIES)); while !i.is_empty() { - let Some((rest, entry)) = fast_entry(i) else { + let Some((rest, entry)) = fast_entry(i, stream.state.hash_len) else { #[allow(clippy::unit_arg)] return Err(winnow::error::ErrMode::from_input(&i)); }; diff --git a/gix-object/tests/object/encode.rs b/gix-object/tests/object/encode.rs index a77c58857cf..7b62eb19d5b 100644 --- a/gix-object/tests/object/encode.rs +++ b/gix-object/tests/object/encode.rs @@ -50,7 +50,7 @@ macro_rules! round_trip { let w = &mut output; w.write_all(&item.loose_header())?; item.write_to(w)?; - let parsed = ObjectRef::from_loose(&output)?; + let parsed = ObjectRef::from_loose(&output, gix_testtools::hash_kind_from_env().unwrap_or_default())?; let item2 = <$borrowed>::try_from(parsed).or(Err(super::Error::TryFromError))?; assert_eq!(item2, item, "object-ref loose: {input_name} {:?}\n{:?}", output.as_bstr(), input.as_bstr()); } @@ -61,7 +61,72 @@ macro_rules! round_trip { let w = &mut output; w.write_all(&item.loose_header())?; item.write_to(w)?; - let parsed = ObjectRef::from_loose(&output)?; + let parsed = ObjectRef::from_loose(&output, gix_testtools::hash_kind_from_env().unwrap_or_default())?; + let parsed_borrowed = <$borrowed>::try_from(parsed).or(Err(super::Error::TryFromError))?; + let item2: $owned = parsed_borrowed.try_into().or(Err(super::Error::TryFromError))?; + assert_eq!(item2, item, "object-ref loose owned: {input_name} {:?}\n{:?}", output.as_bstr(), input.as_bstr()); + } + Ok(()) + } + }; +} + +macro_rules! round_trip_with_hash_len { + ($owned:ty, $borrowed:ty, $( $files:literal ), +) => { + #[test] + fn round_trip() -> Result<(), Box> { + use std::convert::TryFrom; + use std::io::Write; + use crate::fixture_bytes; + use gix_object::{ObjectRef, Object, WriteTo}; + use bstr::ByteSlice; + + for input_name in &[ + $( $files ),* + ] { + let input = fixture_bytes(input_name); + // Test the parse->borrowed->owned->write chain for an object kind + let mut output = Vec::new(); + let item = <$borrowed>::from_bytes(&input, gix_testtools::hash_kind_from_env().unwrap_or_default())?; + item.write_to(&mut output)?; + assert_eq!(output.as_bstr(), input.as_bstr(), "borrowed: {input_name}"); + + let item: $owned = item.try_into()?; + output.clear(); + item.write_to(&mut output)?; + assert_eq!(output.as_bstr(), input.as_bstr()); + + // Test the parse->borrowed->owned->write chain for the top-level objects + let item = ObjectRef::from(<$borrowed>::from_bytes(&input, gix_testtools::hash_kind_from_env().unwrap_or_default())?); + output.clear(); + item.write_to(&mut output)?; + assert_eq!(output.as_bstr(), input.as_bstr(), "object-ref"); + + let item: Object = Object::try_from(item)?; + output.clear(); + item.write_to(&mut output)?; + assert_eq!(output.as_bstr(), input.as_bstr(), "owned"); + + // Test the loose serialisation -> parse chain for an object kind + let item = <$borrowed>::from_bytes(&input, gix_testtools::hash_kind_from_env().unwrap_or_default())?; + // serialise a borowed item to a tagged loose object + output.clear(); + { + let w = &mut output; + w.write_all(&item.loose_header())?; + item.write_to(w)?; + let parsed = ObjectRef::from_loose(&output, gix_testtools::hash_kind_from_env().unwrap_or_default())?; + let item2 = <$borrowed>::try_from(parsed).or(Err(super::Error::TryFromError))?; + assert_eq!(item2, item, "object-ref loose: {input_name} {:?}\n{:?}", output.as_bstr(), input.as_bstr()); + } + + let item: $owned = item.try_into()?; + // serialise an owned to a tagged loose object + output.clear(); + let w = &mut output; + w.write_all(&item.loose_header())?; + item.write_to(w)?; + let parsed = ObjectRef::from_loose(&output, gix_testtools::hash_kind_from_env().unwrap_or_default())?; let parsed_borrowed = <$borrowed>::try_from(parsed).or(Err(super::Error::TryFromError))?; let item2: $owned = parsed_borrowed.try_into().or(Err(super::Error::TryFromError))?; assert_eq!(item2, item, "object-ref loose owned: {input_name} {:?}\n{:?}", output.as_bstr(), input.as_bstr()); @@ -138,7 +203,7 @@ mod tree { ); } - round_trip!(gix_object::Tree, gix_object::TreeRef, "tree/everything.tree"); + round_trip_with_hash_len!(gix_object::Tree, gix_object::TreeRef, "tree/everything.tree"); } mod blob { diff --git a/gix-object/tests/object/object_ref.rs b/gix-object/tests/object/object_ref.rs index d1285947bf8..00b7298655f 100644 --- a/gix-object/tests/object/object_ref.rs +++ b/gix-object/tests/object/object_ref.rs @@ -4,7 +4,12 @@ mod from_loose { #[test] fn shorter_than_advertised() { assert_eq!( - ObjectRef::from_loose(b"tree 1000\x00").unwrap_err().to_string(), + ObjectRef::from_loose( + b"tree 1000\x00", + gix_testtools::hash_kind_from_env().unwrap_or_default() + ) + .unwrap_err() + .to_string(), "object data was shorter than its size declared in the header" ); } diff --git a/gix-object/tests/object/tree/editor.rs b/gix-object/tests/object/tree/editor.rs index 70d73af68f4..086c466cf13 100644 --- a/gix-object/tests/object/tree/editor.rs +++ b/gix-object/tests/object/tree/editor.rs @@ -859,6 +859,7 @@ mod utils { tree.write_to(buffer).expect("valid trees can always be serialized"); Ok(Some(gix_object::Data { kind: gix_object::Kind::Tree, + hash_kind: id.kind(), data: &*buffer, })) } diff --git a/gix-object/tests/object/tree/entries.rs b/gix-object/tests/object/tree/entries.rs index f60e342da9d..afd066fb81d 100644 --- a/gix-object/tests/object/tree/entries.rs +++ b/gix-object/tests/object/tree/entries.rs @@ -5,7 +5,7 @@ fn sort_order_is_correct() -> crate::Result { let root = gix_testtools::scripted_fixture_read_only("make_trees.sh")?; let input = std::fs::read(root.join("tree.baseline"))?; - let mut tree = TreeRef::from_bytes(&input)?; + let mut tree = TreeRef::from_bytes(&input, gix_testtools::hash_kind_from_env().unwrap_or_default())?; let expected = tree.entries.clone(); tree.entries.sort(); diff --git a/gix-object/tests/object/tree/from_bytes.rs b/gix-object/tests/object/tree/from_bytes.rs index d546ac3d226..ecad039b67c 100644 --- a/gix-object/tests/object/tree/from_bytes.rs +++ b/gix-object/tests/object/tree/from_bytes.rs @@ -4,7 +4,7 @@ use crate::{fixture_name, hex_to_id}; #[test] fn empty() -> crate::Result { - let tree_ref = TreeRef::from_bytes(&[])?; + let tree_ref = TreeRef::from_bytes(&[], gix_testtools::hash_kind_from_env().unwrap_or_default())?; assert_eq!( tree_ref, TreeRef { entries: vec![] }, @@ -24,7 +24,7 @@ fn empty() -> crate::Result { #[test] fn everything() -> crate::Result { let fixture = fixture_name("tree", "everything.tree"); - let tree_ref = TreeRef::from_bytes(&fixture)?; + let tree_ref = TreeRef::from_bytes(&fixture, gix_testtools::hash_kind_from_env().unwrap_or_default())?; assert_eq!( tree_ref, TreeRef { @@ -64,14 +64,18 @@ fn everything() -> crate::Result { fn invalid() { let fixture = fixture_name("tree", "definitely-special.tree"); let partial_tree = &fixture[..fixture.len() / 2]; - let err = TreeRef::from_bytes(partial_tree).unwrap_err().to_string(); + let err = TreeRef::from_bytes(partial_tree, gix_testtools::hash_kind_from_env().unwrap_or_default()) + .unwrap_err() + .to_string(); if cfg!(feature = "verbose-object-parsing-errors") { assert!(err.starts_with("object parsing failed at `100644"), "{err}"); } else { assert_eq!(err, "object parsing failed"); } assert_eq!( - TreeRefIter::from_bytes(partial_tree).take_while(Result::is_ok).count(), + TreeRefIter::from_bytes(partial_tree, gix_testtools::hash_kind_from_env().unwrap_or_default()) + .take_while(Result::is_ok) + .count(), 9, "we can decode about half of it before failing" ); @@ -79,7 +83,10 @@ fn invalid() { #[test] fn fuzzed() { - assert!(gix_object::TreeRef::from_bytes(b"2").is_err(), "fail, but don't crash"); + assert!( + gix_object::TreeRef::from_bytes(b"2", gix_testtools::hash_kind_from_env().unwrap_or_default()).is_err(), + "fail, but don't crash" + ); } #[test] @@ -94,10 +101,12 @@ fn special_trees() -> crate::Result { ("special-5", 17), ] { let fixture = fixture_name("tree", &format!("{name}.tree")); - let actual = TreeRef::from_bytes(&fixture)?; + let actual = TreeRef::from_bytes(&fixture, gix_testtools::hash_kind_from_env().unwrap_or_default())?; assert_eq!(actual.entries.len(), expected_entry_count, "{name}"); assert_eq!( - TreeRefIter::from_bytes(&fixture).map(Result::unwrap).count(), + TreeRefIter::from_bytes(&fixture, gix_testtools::hash_kind_from_env().unwrap_or_default()) + .map(Result::unwrap) + .count(), expected_entry_count, "{name}" ); diff --git a/gix-object/tests/object/tree/iter.rs b/gix-object/tests/object/tree/iter.rs index 264d0f08e39..b9e42d77e74 100644 --- a/gix-object/tests/object/tree/iter.rs +++ b/gix-object/tests/object/tree/iter.rs @@ -9,13 +9,20 @@ use crate::{fixture_name, hex_to_id}; #[test] fn empty() { - assert_eq!(TreeRefIter::from_bytes(&[]).count(), 0, "empty trees are definitely ok"); + assert_eq!( + TreeRefIter::from_bytes(&[], gix_testtools::hash_kind_from_env().unwrap_or_default()).count(), + 0, + "empty trees are definitely ok" + ); } #[test] fn error_handling() { let data = fixture_name("tree", "everything.tree"); - let iter = TreeRefIter::from_bytes(&data[..data.len() / 2]); + let iter = TreeRefIter::from_bytes( + &data[..data.len() / 2], + gix_testtools::hash_kind_from_env().unwrap_or_default(), + ); let entries = iter.collect::>(); assert!( entries.last().expect("at least one token").is_err(), @@ -26,14 +33,14 @@ fn error_handling() { #[test] fn offset_to_next_entry() { let buf = fixture_name("tree", "everything.tree"); - let mut iter = TreeRefIter::from_bytes(&buf); + let mut iter = TreeRefIter::from_bytes(&buf, gix_testtools::hash_kind_from_env().unwrap_or_default()); assert_eq!(iter.offset_to_next_entry(&buf), 0, "first entry is always at 0"); iter.next(); let actual = iter.offset_to_next_entry(&buf); assert_eq!(actual, 31, "now the offset increases"); assert_eq!( - TreeRefIter::from_bytes(&buf[actual..]) + TreeRefIter::from_bytes(&buf[actual..], gix_testtools::hash_kind_from_env().unwrap_or_default()) .next() .map(|e| e.unwrap().filename), iter.next().map(|e| e.unwrap().filename), @@ -44,7 +51,11 @@ fn offset_to_next_entry() { #[test] fn everything() -> crate::Result { assert_eq!( - TreeRefIter::from_bytes(&fixture_name("tree", "everything.tree")).collect::, _>>()?, + TreeRefIter::from_bytes( + &fixture_name("tree", "everything.tree"), + gix_testtools::hash_kind_from_env().unwrap_or_default() + ) + .collect::, _>>()?, vec![ EntryRef { mode: tree::EntryKind::BlobExecutable.into(), diff --git a/gix-odb/src/cache.rs b/gix-odb/src/cache.rs index 77022a09661..b869e285a01 100644 --- a/gix-odb/src/cache.rs +++ b/gix-odb/src/cache.rs @@ -218,7 +218,7 @@ mod impls { ) -> Result, Option)>, gix_object::find::Error> { if let Some(mut obj_cache) = self.object_cache.as_ref().map(RefCell::borrow_mut) { if let Some(kind) = obj_cache.get(&id.as_ref().to_owned(), buffer) { - return Ok(Some((Data::new(kind, buffer), None))); + return Ok(Some((Data::new(kind, id.kind(), buffer), None))); } } let possibly_obj = self.inner.try_find_cached(id.as_ref(), buffer, pack_cache)?; diff --git a/gix-odb/src/memory.rs b/gix-odb/src/memory.rs index 6862437f1fc..ba5b4ef9faf 100644 --- a/gix-odb/src/memory.rs +++ b/gix-odb/src/memory.rs @@ -152,6 +152,7 @@ where buffer.extend_from_slice(data); return Ok(Some(Data { kind: *kind, + hash_kind: id.kind(), data: &*buffer, })); } diff --git a/gix-odb/src/store_impls/dynamic/find.rs b/gix-odb/src/store_impls/dynamic/find.rs index 13608b7c876..4713615a2e5 100644 --- a/gix-odb/src/store_impls/dynamic/find.rs +++ b/gix-odb/src/store_impls/dynamic/find.rs @@ -164,6 +164,7 @@ where Ok(r) => Ok(( gix_object::Data { kind: r.kind, + hash_kind: pack.object_hash(), data: buffer.as_slice(), }, Some(gix_pack::data::entry::Location { @@ -265,6 +266,7 @@ where ( gix_object::Data { kind: r.kind, + hash_kind: pack.object_hash(), data: buffer.as_slice(), }, Some(gix_pack::data::entry::Location { diff --git a/gix-odb/src/store_impls/loose/find.rs b/gix-odb/src/store_impls/loose/find.rs index 6ac6d26de67..5306f2b3688 100644 --- a/gix-odb/src/store_impls/loose/find.rs +++ b/gix-odb/src/store_impls/loose/find.rs @@ -261,6 +261,10 @@ impl Store { .expect("BUG: here the size is already confirmed to fit into memory"), 0, ); - Ok(gix_object::Data { kind, data: buf }) + Ok(gix_object::Data { + kind, + hash_kind: id.kind(), + data: buf, + }) } } diff --git a/gix-pack/src/bundle/find.rs b/gix-pack/src/bundle/find.rs index 4a7fd23129d..bfd128c551e 100644 --- a/gix-pack/src/bundle/find.rs +++ b/gix-pack/src/bundle/find.rs @@ -58,6 +58,7 @@ impl crate::Bundle { gix_object::Data { kind: r.kind, data: out.as_slice(), + hash_kind: self.pack.object_hash(), }, crate::data::entry::Location { pack_id: self.pack.id, diff --git a/gix-pack/src/data/output/count/objects/mod.rs b/gix-pack/src/data/output/count/objects/mod.rs index a057d45a986..05f06f58663 100644 --- a/gix-pack/src/data/output/count/objects/mod.rs +++ b/gix-pack/src/data/output/count/objects/mod.rs @@ -204,7 +204,7 @@ mod expand { push_obj_count_unique( &mut out, seen_objs, &tree_id, location, objects, stats, true, ); - gix_object::TreeRefIter::from_bytes(obj.data) + gix_object::TreeRefIter::from_bytes(obj.data, obj.hash_kind) }; let objects_ref = if parent_commit_ids.is_empty() { @@ -242,7 +242,10 @@ mod expand { stats, true, ); - gix_object::TreeRefIter::from_bytes(parent_tree_obj.data) + gix_object::TreeRefIter::from_bytes( + parent_tree_obj.data, + parent_tree_obj.hash_kind, + ) }; changes_delegate.clear(); @@ -279,7 +282,7 @@ mod expand { { let objects = ExpandedCountingObjects::new(db, out, objects); gix_traverse::tree::breadthfirst( - gix_object::TreeRefIter::from_bytes(obj.0.data), + gix_object::TreeRefIter::from_bytes(obj.0.data, obj.0.hash_kind), &mut tree_traversal_state, &objects, &mut traverse_delegate, diff --git a/gix-pack/src/index/traverse/mod.rs b/gix-pack/src/index/traverse/mod.rs index d6b3c5cac0b..bcf8b40c5fe 100644 --- a/gix-pack/src/index/traverse/mod.rs +++ b/gix-pack/src/index/traverse/mod.rs @@ -207,7 +207,7 @@ where E: std::error::Error + Send + Sync + 'static, { if check.object_checksum() { - gix_object::Data::new(object_kind, decompressed) + gix_object::Data::new(object_kind, index_entry.oid.kind(), decompressed) .verify_checksum(&index_entry.oid) .map_err(|source| Error::PackObjectVerify { offset: index_entry.pack_offset, diff --git a/gix-pack/src/index/verify.rs b/gix-pack/src/index/verify.rs index c0ed279152a..372210964b4 100644 --- a/gix-pack/src/index/verify.rs +++ b/gix-pack/src/index/verify.rs @@ -241,13 +241,14 @@ impl index::File { use gix_object::Kind::*; match object_kind { Tree | Commit | Tag => { - let object = gix_object::ObjectRef::from_bytes(object_kind, buf).map_err(|err| { - integrity::Error::ObjectDecode { - source: err, - kind: object_kind, - id: index_entry.oid, - } - })?; + let object = + gix_object::ObjectRef::from_bytes(object_kind, index_entry.oid.kind(), buf).map_err(|err| { + integrity::Error::ObjectDecode { + source: err, + kind: object_kind, + id: index_entry.oid, + } + })?; if let Mode::HashCrc32DecodeEncode = verify_mode { encode_buf.clear(); object.write_to(&mut *encode_buf)?; diff --git a/gix-pack/tests/pack/data/input.rs b/gix-pack/tests/pack/data/input.rs index e8997526963..0db4962d9d4 100644 --- a/gix-pack/tests/pack/data/input.rs +++ b/gix-pack/tests/pack/data/input.rs @@ -32,6 +32,7 @@ mod lookup_ref_delta_objects { fn entry(header: Header, data: &'static [u8]) -> input::Entry { let obj = gix_object::Data { kind: header.as_kind().unwrap_or(gix_object::Kind::Blob), + hash_kind: gix_testtools::hash_kind_from_env().unwrap_or_default(), data, }; let mut entry = input::Entry::from_data_obj(&obj, 0).expect("valid object"); @@ -78,13 +79,14 @@ mod lookup_ref_delta_objects { } impl gix_object::Find for FindData<'_> { - fn try_find<'a>(&self, _id: &oid, buf: &'a mut Vec) -> Result>, Error> { + fn try_find<'a>(&self, id: &oid, buf: &'a mut Vec) -> Result>, Error> { self.calls.fetch_add(1, Ordering::Relaxed); if let Some(data) = self.data { buf.resize(data.len(), 0); buf.copy_from_slice(data); Ok(Some(gix_object::Data { kind: gix_object::Kind::Blob, + hash_kind: id.kind(), data: buf.as_slice(), })) } else { diff --git a/gix-ref/src/store/file/raw_ext.rs b/gix-ref/src/store/file/raw_ext.rs index fc0a3a9f378..760cceb9ecb 100644 --- a/gix-ref/src/store/file/raw_ext.rs +++ b/gix-ref/src/store/file/raw_ext.rs @@ -160,13 +160,16 @@ impl ReferenceExt for Reference { let mut oid = self.follow_to_object_packed(store, packed)?; let mut buf = Vec::new(); let peeled_id = loop { - let gix_object::Data { kind, data } = - objects - .try_find(&oid, &mut buf)? - .ok_or_else(|| peel::to_id::Error::NotFound { - oid, - name: self.name.0.clone(), - })?; + let gix_object::Data { + kind, + data, + hash_kind: _, + } = objects + .try_find(&oid, &mut buf)? + .ok_or_else(|| peel::to_id::Error::NotFound { + oid, + name: self.name.0.clone(), + })?; match kind { gix_object::Kind::Tag => { oid = gix_object::TagRefIter::from_bytes(data).target_id().map_err(|_err| { diff --git a/gix-ref/tests/refs/file/mod.rs b/gix-ref/tests/refs/file/mod.rs index c2e7b201b09..f9087dd3a6b 100644 --- a/gix-ref/tests/refs/file/mod.rs +++ b/gix-ref/tests/refs/file/mod.rs @@ -37,11 +37,12 @@ struct EmptyCommit; impl gix_object::Find for EmptyCommit { fn try_find<'a>( &self, - _id: &gix_hash::oid, + id: &gix_hash::oid, _buffer: &'a mut Vec, ) -> Result>, gix_object::find::Error> { Ok(Some(gix_object::Data { kind: gix_object::Kind::Commit, + hash_kind: id.kind(), data: &[], })) } diff --git a/gix-traverse/src/tree/depthfirst.rs b/gix-traverse/src/tree/depthfirst.rs index 6fbc5743f41..7c34f5d71ed 100644 --- a/gix-traverse/src/tree/depthfirst.rs +++ b/gix-traverse/src/tree/depthfirst.rs @@ -72,7 +72,7 @@ pub(super) mod function { tree_buf: buf, byte_offset_to_next_entry, } => { - let mut iter = TreeRefIter::from_bytes(&buf[byte_offset_to_next_entry..]); + let mut iter = TreeRefIter::from_bytes(&buf[byte_offset_to_next_entry..], root.kind()); delegate.pop_back_tracked_path_and_set_current(); while let Some(entry) = iter.next() { let entry = entry?; diff --git a/gix/src/object/mod.rs b/gix/src/object/mod.rs index d640e737d9b..d3b003e24ad 100644 --- a/gix/src/object/mod.rs +++ b/gix/src/object/mod.rs @@ -172,7 +172,7 @@ impl<'repo> Object<'repo> { /// Obtain a fully parsed commit whose fields reference our data buffer. pub fn try_to_commit_ref(&self) -> Result, conversion::Error> { - gix_object::Data::new(self.kind, &self.data) + gix_object::Data::new(self.kind, self.id.kind(), &self.data) .decode()? .into_commit() .ok_or(conversion::Error::UnexpectedType { @@ -187,14 +187,14 @@ impl<'repo> Object<'repo> { /// /// - this object is not a commit pub fn to_commit_ref_iter(&self) -> gix_object::CommitRefIter<'_> { - gix_object::Data::new(self.kind, &self.data) + gix_object::Data::new(self.kind, self.id.kind(), &self.data) .try_into_commit_iter() .expect("BUG: This object must be a commit") } /// Obtain a commit token iterator from the data in this instance, if it is a commit. pub fn try_to_commit_ref_iter(&self) -> Option> { - gix_object::Data::new(self.kind, &self.data).try_into_commit_iter() + gix_object::Data::new(self.kind, self.id.kind(), &self.data).try_into_commit_iter() } /// Obtain a tag token iterator from the data in this instance. @@ -203,7 +203,7 @@ impl<'repo> Object<'repo> { /// /// - this object is not a tag pub fn to_tag_ref_iter(&self) -> gix_object::TagRefIter<'_> { - gix_object::Data::new(self.kind, &self.data) + gix_object::Data::new(self.kind, self.id.kind(), &self.data) .try_into_tag_iter() .expect("BUG: this object must be a tag") } @@ -214,7 +214,7 @@ impl<'repo> Object<'repo> { /// /// - this object is not a tag pub fn try_to_tag_ref_iter(&self) -> Option> { - gix_object::Data::new(self.kind, &self.data).try_into_tag_iter() + gix_object::Data::new(self.kind, self.id.kind(), &self.data).try_into_tag_iter() } /// Obtain a tag object from the data in this instance. @@ -229,7 +229,7 @@ impl<'repo> Object<'repo> { /// Obtain a fully parsed tag object whose fields reference our data buffer. pub fn try_to_tag_ref(&self) -> Result, conversion::Error> { - gix_object::Data::new(self.kind, &self.data) + gix_object::Data::new(self.kind, self.id.kind(), &self.data) .decode()? .into_tag() .ok_or(conversion::Error::UnexpectedType { diff --git a/gix/src/object/tree/diff/for_each.rs b/gix/src/object/tree/diff/for_each.rs index e8874303f4a..c94f3a4820d 100644 --- a/gix/src/object/tree/diff/for_each.rs +++ b/gix/src/object/tree/diff/for_each.rs @@ -76,8 +76,8 @@ impl<'old> Platform<'_, 'old> { }; let opts = self.options.into(); Ok(gix_diff::tree_with_rewrites( - TreeRefIter::from_bytes(&self.lhs.data), - TreeRefIter::from_bytes(&other.data), + TreeRefIter::from_bytes(&self.lhs.data, self.lhs.id.kind()), + TreeRefIter::from_bytes(&other.data, other.id.kind()), cache, &mut self.state, &repo.objects, diff --git a/gix/src/object/tree/mod.rs b/gix/src/object/tree/mod.rs index e50d617b60b..95abd1c0f80 100644 --- a/gix/src/object/tree/mod.rs +++ b/gix/src/object/tree/mod.rs @@ -37,7 +37,7 @@ impl<'repo> Tree<'repo> { /// Parse our tree data and return the parse tree for direct access to its entries. pub fn decode(&self) -> Result, gix_object::decode::Error> { - gix_object::TreeRef::from_bytes(&self.data) + gix_object::TreeRef::from_bytes(&self.data, self.repo.object_hash()) } /// Find the entry named `name` by iteration, or return `None` if it wasn't found. @@ -55,7 +55,7 @@ impl<'repo> Tree<'repo> { /// # Ok(()) } /// ``` pub fn find_entry(&self, name: impl PartialEq) -> Option> { - TreeRefIter::from_bytes(&self.data) + TreeRefIter::from_bytes(&self.data, self.id.kind()) .filter_map(Result::ok) .find(|entry| name.eq(entry.filename)) .map(|entry| EntryRef { @@ -82,7 +82,7 @@ impl<'repo> Tree<'repo> { buf.extend_from_slice(&self.data); let mut iter = path.into_iter().peekable(); - let mut data = gix_object::Data::new(gix_object::Kind::Tree, buf); + let mut data = gix_object::Data::new(gix_object::Kind::Tree, self.id.kind(), buf); loop { data = match next_entry(&mut iter, data) { @@ -115,7 +115,7 @@ impl<'repo> Tree<'repo> { P: PartialEq, { let mut iter = path.into_iter().peekable(); - let mut data = gix_object::Data::new(gix_object::Kind::Tree, &self.data); + let mut data = gix_object::Data::new(gix_object::Kind::Tree, self.id.kind(), &self.data); let mut data_id = self.id; loop { @@ -222,7 +222,8 @@ mod iter { /// Return an iterator over tree entries to obtain information about files and directories this tree contains. pub fn iter(&self) -> impl Iterator, gix_object::decode::Error>> { let repo = self.repo; - gix_object::TreeRefIter::from_bytes(&self.data).map(move |e| e.map(|entry| EntryRef { inner: entry, repo })) + gix_object::TreeRefIter::from_bytes(&self.data, self.id.kind()) + .map(move |e| e.map(|entry| EntryRef { inner: entry, repo })) } } } diff --git a/gix/src/object/tree/traverse.rs b/gix/src/object/tree/traverse.rs index 3c66ac56953..444bd859a2e 100644 --- a/gix/src/object/tree/traverse.rs +++ b/gix/src/object/tree/traverse.rs @@ -50,7 +50,7 @@ impl Platform<'_, '_> { where V: gix_traverse::tree::Visit, { - let root = gix_object::TreeRefIter::from_bytes(&self.root.data); + let root = gix_object::TreeRefIter::from_bytes(&self.root.data, self.root.id.kind()); let state = gix_traverse::tree::breadthfirst::State::default(); gix_traverse::tree::breadthfirst(root, state, &self.root.repo.objects, delegate) } diff --git a/gix/src/repository/diff.rs b/gix/src/repository/diff.rs index ff45f01cd2a..8a04e2ac3f5 100644 --- a/gix/src/repository/diff.rs +++ b/gix/src/repository/diff.rs @@ -64,8 +64,8 @@ impl Repository { let new_tree = new_tree.into().unwrap_or(&empty_tree); let mut out = Vec::new(); gix_diff::tree_with_rewrites( - TreeRefIter::from_bytes(&old_tree.data), - TreeRefIter::from_bytes(&new_tree.data), + TreeRefIter::from_bytes(&old_tree.data, old_tree.id.kind()), + TreeRefIter::from_bytes(&new_tree.data, new_tree.id.kind()), &mut cache, &mut Default::default(), &self.objects, diff --git a/gix/src/repository/impls.rs b/gix/src/repository/impls.rs index 3b81a778064..e8a25403159 100644 --- a/gix/src/repository/impls.rs +++ b/gix/src/repository/impls.rs @@ -151,6 +151,7 @@ impl gix_object::Find for crate::Repository { buffer.clear(); return Ok(Some(gix_object::Data { kind: gix_object::Kind::Tree, + hash_kind: self.object_hash(), data: &[], })); }