GitoxideLabs · Christoph Rüßler (cruessler) · Mar 31, 2026 · Mar 31, 2026 · Mar 31, 2026 · Mar 31, 2026
diff --git a/gitoxide-core/src/index/checkout.rs b/gitoxide-core/src/index/checkout.rs
@@ -190,7 +190,11 @@ where
             };
             buf.clear();
             // …but write nothing
-            Ok(Some(gix::objs::Data { kind, data: buf }))
+            Ok(Some(gix::objs::Data {
+                kind,
+                hash_len: id.kind().len_in_bytes(),
+                data: buf,
+            }))
         } else {
             self.db.try_find(id, buf)
         }
@@ -201,10 +205,11 @@ where
 struct Empty;
 
 impl gix::objs::Find for Empty {
-    fn try_find<'a>(&self, _id: &gix::oid, buffer: &'a mut Vec<u8>) -> Result<Option<gix::objs::Data<'a>>, Error> {
+    fn try_find<'a>(&self, id: &gix::oid, buffer: &'a mut Vec<u8>) -> Result<Option<gix::objs::Data<'a>>, Error> {
         buffer.clear();
         Ok(Some(gix::objs::Data {
             kind: gix::object::Kind::Blob,
+            hash_len: id.kind().len_in_bytes(),
             data: buffer,
         }))
     }

diff --git a/gix-diff/tests/diff/main.rs b/gix-diff/tests/diff/main.rs
@@ -41,6 +41,7 @@ mod util {
                     buffer.extend_from_slice(data);
                     Ok(Some(gix_object::Data {
                         kind: gix_object::Kind::Blob,
+                        hash_len: id.kind().len_in_bytes(),
                         data: buffer.as_slice(),
                     }))
                 }

diff --git a/gix-diff/tests/diff/tree_with_rewrites.rs b/gix-diff/tests/diff/tree_with_rewrites.rs
@@ -107,8 +107,14 @@ fn empty_to_new_tree_without_rename_tracking() -> crate::Result {
     {
         let (lhs, rhs, mut cache, odb) = repo_with_trees(None, "c1 - initial")?;
         let err = gix_diff::tree_with_rewrites(
-            TreeRefIter::from_bytes(&lhs),
-            TreeRefIter::from_bytes(&rhs),
+            TreeRefIter::from_bytes(
+                &lhs,
+                gix_testtools::hash_kind_from_env().unwrap_or_default().len_in_bytes(),
+            ),
+            TreeRefIter::from_bytes(
+                &rhs,
+                gix_testtools::hash_kind_from_env().unwrap_or_default().len_in_bytes(),
+            ),
             &mut cache,
             &mut Default::default(),
             &odb,
@@ -1843,8 +1849,11 @@ mod util {
         let (from, to, mut cache, odb) = repo_with_trees(lhs, rhs)?;
         let mut out = Vec::new();
         let rewrites_info = gix_diff::tree_with_rewrites(
-            TreeRefIter::from_bytes(&from),
-            TreeRefIter::from_bytes(&to),
+            // TODO(SHA256):
+            // Get hash from env. This requires updating the snapshot at the top of the file as
+            // well.
+            TreeRefIter::from_bytes(&from, gix_hash::Kind::Sha1.len_in_bytes()),
+            TreeRefIter::from_bytes(&to, gix_hash::Kind::Sha1.len_in_bytes()),
             &mut cache,
             &mut Default::default(),
             &odb,

diff --git a/gix-merge/src/tree/function.rs b/gix-merge/src/tree/function.rs
@@ -77,7 +77,7 @@ where
     let (mut base_buf, mut side_buf) = (Vec::new(), Vec::new());
     let ancestor_tree = objects.find_tree(base_tree, &mut base_buf)?;
     let mut editor = tree::Editor::new(ancestor_tree.to_owned(), objects, base_tree.kind());
-    let ancestor_tree = gix_object::TreeRefIter::from_bytes(&base_buf);
+    let ancestor_tree = gix_object::TreeRefIter::from_bytes(&base_buf, base_tree.kind().len_in_bytes());
     let tree_conflicts = options.tree_conflicts;
 
     let mut our_changes = Vec::new();

diff --git a/gix-merge/tests/merge/blob/mod.rs b/gix-merge/tests/merge/blob/mod.rs
@@ -33,6 +33,7 @@ mod util {
                     buffer.extend_from_slice(data);
                     Ok(Some(gix_object::Data {
                         kind: gix_object::Kind::Blob,
+                        hash_len: id.kind().len_in_bytes(),
                         data: buffer.as_slice(),
                     }))
                 }

diff --git a/gix-object/benches/decode_objects.rs b/gix-object/benches/decode_objects.rs
@@ -21,10 +21,24 @@ fn parse_tag(c: &mut Criterion) {
 
 fn parse_tree(c: &mut Criterion) {
     c.bench_function("TreeRef()", |b| {
-        b.iter(|| black_box(gix_object::TreeRef::from_bytes(TREE)).unwrap());
+        b.iter(|| {
+            black_box(gix_object::TreeRef::from_bytes(
+                TREE,
+                gix_testtools::hash_kind_from_env().unwrap_or_default().len_in_bytes(),
+            ))
+            .unwrap()
+        });
     });
     c.bench_function("TreeRefIter()", |b| {
-        b.iter(|| black_box(gix_object::TreeRefIter::from_bytes(TREE).count()));
+        b.iter(|| {
+            black_box(
+                gix_object::TreeRefIter::from_bytes(
+                    TREE,
+                    gix_testtools::hash_kind_from_env().unwrap_or_default().len_in_bytes(),
+                )
+                .count(),
+            )
+        });
     });
 }
 

diff --git a/gix-object/benches/edit_tree.rs b/gix-object/benches/edit_tree.rs
@@ -173,6 +173,7 @@ impl gix_object::Find for StorageOdb {
                 tree.write_to(buffer).expect("valid trees can always be serialized");
                 Ok(Some(gix_object::Data {
                     kind: gix_object::Kind::Tree,
+                    hash_len: id.kind().len_in_bytes(),
                     data: &*buffer,
                 }))
             }

diff --git a/gix-object/src/data.rs b/gix-object/src/data.rs
@@ -4,8 +4,8 @@ use crate::{BlobRef, CommitRef, CommitRefIter, Data, Kind, ObjectRef, TagRef, Ta
 
 impl<'a> Data<'a> {
     /// Constructs a new data object from `kind` and `data`.
-    pub fn new(kind: Kind, data: &'a [u8]) -> Data<'a> {
-        Data { kind, data }
+    pub fn new(kind: Kind, hash_len: usize, data: &'a [u8]) -> Data<'a> {
+        Data { kind, hash_len, data }
     }
     /// Decodes the data in the backing slice into a [`ObjectRef`], allowing to access all of its data
     /// conveniently. The cost of parsing an object is negligible.
@@ -14,7 +14,7 @@ impl<'a> Data<'a> {
     /// using [`crate::ObjectRef::into_owned()`].
     pub fn decode(&self) -> Result<ObjectRef<'a>, crate::decode::Error> {
         Ok(match self.kind {
-            Kind::Tree => ObjectRef::Tree(TreeRef::from_bytes(self.data)?),
+            Kind::Tree => ObjectRef::Tree(TreeRef::from_bytes(self.data, self.hash_len)?),
             Kind::Blob => ObjectRef::Blob(BlobRef { data: self.data }),
             Kind::Commit => ObjectRef::Commit(CommitRef::from_bytes(self.data)?),
             Kind::Tag => ObjectRef::Tag(TagRef::from_bytes(self.data)?),
@@ -25,7 +25,7 @@ impl<'a> Data<'a> {
     /// `None` if this is not a tree object.
     pub fn try_into_tree_iter(self) -> Option<TreeRefIter<'a>> {
         match self.kind {
-            Kind::Tree => Some(TreeRefIter::from_bytes(self.data)),
+            Kind::Tree => Some(TreeRefIter::from_bytes(self.data, self.hash_len)),
             _ => None,
         }
     }

diff --git a/gix-object/src/lib.rs b/gix-object/src/lib.rs
@@ -263,6 +263,9 @@ pub struct TreeRef<'a> {
 /// A directory snapshot containing files (blobs), directories (trees) and submodules (commits), lazily evaluated.
 #[derive(Default, PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
 pub struct TreeRefIter<'a> {
+    /// TODO:
+    /// Document.
+    hash_len: usize,
     /// The directories and files contained in this tree.
     data: &'a [u8],
 }
@@ -289,6 +292,9 @@ impl Tree {
 pub struct Data<'a> {
     /// kind of object
     pub kind: Kind,
+    /// TODO:
+    /// Document.
+    pub hash_len: usize,
     /// decoded, decompressed data, owned by a backing store.
     pub data: &'a [u8],
 }

diff --git a/gix-object/src/object/mod.rs b/gix-object/src/object/mod.rs
@@ -191,7 +191,7 @@ pub enum LooseDecodeError {
 
 impl<'a> ObjectRef<'a> {
     /// Deserialize an object from a loose serialisation
-    pub fn from_loose(data: &'a [u8]) -> Result<ObjectRef<'a>, LooseDecodeError> {
+    pub fn from_loose(data: &'a [u8], hash_len: usize) -> Result<ObjectRef<'a>, LooseDecodeError> {
         let (kind, size, offset) = loose_header(data)?;
 
         let body = &data[offset..]
@@ -200,13 +200,13 @@ impl<'a> ObjectRef<'a> {
                 message: "object data was shorter than its size declared in the header",
             })?;
 
-        Ok(Self::from_bytes(kind, body)?)
+        Ok(Self::from_bytes(kind, hash_len, body)?)
     }
 
     /// Deserialize an object of `kind` from the given `data`.
-    pub fn from_bytes(kind: Kind, data: &'a [u8]) -> Result<ObjectRef<'a>, crate::decode::Error> {
+    pub fn from_bytes(kind: Kind, hash_len: usize, data: &'a [u8]) -> Result<ObjectRef<'a>, crate::decode::Error> {
         Ok(match kind {
-            Kind::Tree => ObjectRef::Tree(TreeRef::from_bytes(data)?),
+            Kind::Tree => ObjectRef::Tree(TreeRef::from_bytes(data, hash_len)?),
             Kind::Blob => ObjectRef::Blob(BlobRef { data }),
             Kind::Commit => ObjectRef::Commit(CommitRef::from_bytes(data)?),
             Kind::Tag => ObjectRef::Tag(TagRef::from_bytes(data)?),

diff --git a/gix-object/src/tree/ref_iter.rs b/gix-object/src/tree/ref_iter.rs
@@ -39,7 +39,7 @@ where
         return ControlFlow::Break(None);
     };
 
-    let Some(entry) = TreeRefIter::from_bytes(tree.data)
+    let Some(entry) = TreeRefIter::from_bytes(tree.data, tree.hash_len)
         .filter_map(Result::ok)
         .find(|entry| component.eq(entry.filename))
     else {
@@ -55,8 +55,8 @@ where
 
 impl<'a> TreeRefIter<'a> {
     /// Instantiate an iterator from the given tree data.
-    pub fn from_bytes(data: &'a [u8]) -> TreeRefIter<'a> {
-        TreeRefIter { data }
+    pub fn from_bytes(data: &'a [u8], hash_len: usize) -> TreeRefIter<'a> {
+        TreeRefIter { data, hash_len }
     }
 
     /// Follow a sequence of `path` components starting from this instance, and look them up in `odb` one by one using `buffer`
@@ -81,7 +81,7 @@ impl<'a> TreeRefIter<'a> {
         buffer.extend_from_slice(self.data);
 
         let mut iter = path.into_iter().peekable();
-        let mut data = crate::Data::new(crate::Kind::Tree, buffer);
+        let mut data = crate::Data::new(crate::Kind::Tree, self.hash_len, buffer);
 
         loop {
             data = match next_entry(&mut iter, data) {
@@ -123,11 +123,12 @@ impl<'a> TreeRefIter<'a> {
 
 impl<'a> TreeRef<'a> {
     /// Deserialize a Tree from `data`.
-    pub fn from_bytes(mut data: &'a [u8]) -> Result<TreeRef<'a>, crate::decode::Error> {
-        let input = &mut data;
-        match decode::tree.parse_next(input) {
+    pub fn from_bytes(data: &'a [u8], hash_len: usize) -> Result<TreeRef<'a>, crate::decode::Error> {
+        let state = decode::State { hash_len };
+        let mut input = decode::Stream { input: data, state };
+        match decode::tree.parse_next(&mut input) {
             Ok(tag) => Ok(tag),
-            Err(err) => Err(crate::decode::Error::with_err(err, input)),
+            Err(err) => Err(crate::decode::Error::with_err(err, &input)),
         }
     }
 
@@ -190,7 +191,7 @@ impl<'a> Iterator for TreeRefIter<'a> {
         if self.data.is_empty() {
             return None;
         }
-        match decode::fast_entry(self.data) {
+        match decode::fast_entry(self.data, self.hash_len) {
             Some((data_left, entry)) => {
                 self.data = data_left;
                 Some(Ok(entry))
@@ -218,47 +219,52 @@ impl<'a> TryFrom<&'a [u8]> for tree::EntryMode {
 
 mod decode {
     use bstr::ByteSlice;
-    use winnow::{error::ParserError, prelude::*};
+    use winnow::{error::ParserError, prelude::*, Stateful};
 
     use crate::{tree, tree::EntryRef, TreeRef};
 
-    pub fn fast_entry(i: &[u8]) -> Option<(&[u8], EntryRef<'_>)> {
+    pub fn fast_entry(i: &[u8], hash_len: usize) -> Option<(&[u8], EntryRef<'_>)> {
         let (mode, i) = tree::EntryMode::extract_from_bytes(i)?;
         let (filename, i) = i.split_at(i.find_byte(0)?);
         let i = &i[1..];
-        const HASH_LEN_FIXME: usize = 20; // TODO(SHA256): know actual/desired length or we may overshoot
         let (oid, i) = match i.len() {
-            len if len < HASH_LEN_FIXME => return None,
-            _ => i.split_at(20),
+            len if len < hash_len => return None,
+            _ => i.split_at(hash_len),
         };
         Some((
             i,
             EntryRef {
                 mode,
                 filename: filename.as_bstr(),
-                oid: gix_hash::oid::try_from_bytes(oid).expect("we counted exactly 20 bytes"),
+                oid: gix_hash::oid::try_from_bytes(oid)
+                    .unwrap_or_else(|_| panic!("we counted exactly {hash_len} bytes")),
             },
         ))
     }
 
-    pub fn tree<'a, E: ParserError<&'a [u8]>>(i: &mut &'a [u8]) -> ModalResult<TreeRef<'a>, E> {
-        let mut i = &**i;
+    #[derive(Debug)]
+    pub struct State {
+        pub hash_len: usize,
+    }
+
+    pub type Stream<'is> = Stateful<&'is [u8], State>;
+
+    pub fn tree<'a, E: ParserError<&'a [u8]>>(stream: &mut Stream<'a>) -> ModalResult<TreeRef<'a>, E> {
+        let mut i = stream.input;
 
         // Calculate an estimate of the amount of entries to reduce
         // the amount of allocations necessary.
         // Note that this assumes that we want speed over fitting Vecs, this is a trade-off.
-        // TODO(SHA256): know actual/desired length for reduced overallocation
-        const HASH_LEN_FIXME: usize = 20;
         const AVERAGE_FILENAME_LEN: usize = 24;
         const AVERAGE_MODE_LEN: usize = 6;
         const ENTRY_DELIMITER_LEN: usize = 2; // space + trailing zero
         const AVERAGE_TREE_ENTRIES: usize = 16 * 2; // prevent overallocation beyond what's meaningful or what could be dangerous
-        let average_entry_len = ENTRY_DELIMITER_LEN + HASH_LEN_FIXME + AVERAGE_MODE_LEN + AVERAGE_FILENAME_LEN;
+        let average_entry_len = ENTRY_DELIMITER_LEN + stream.state.hash_len + AVERAGE_MODE_LEN + AVERAGE_FILENAME_LEN;
         let upper_bound = i.len() / average_entry_len;
         let mut out = Vec::with_capacity(upper_bound.min(AVERAGE_TREE_ENTRIES));
 
         while !i.is_empty() {
-            let Some((rest, entry)) = fast_entry(i) else {
+            let Some((rest, entry)) = fast_entry(i, stream.state.hash_len) else {
                 #[allow(clippy::unit_arg)]
                 return Err(winnow::error::ErrMode::from_input(&i));
             };