Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions gitoxide-core/src/index/checkout.rs
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,11 @@ where
};
buf.clear();
// …but write nothing
Ok(Some(gix::objs::Data { kind, data: buf }))
Ok(Some(gix::objs::Data {
kind,
hash_kind: id.kind(),
data: buf,
}))
} else {
self.db.try_find(id, buf)
}
Expand All @@ -201,10 +205,11 @@ where
struct Empty;

impl gix::objs::Find for Empty {
fn try_find<'a>(&self, _id: &gix::oid, buffer: &'a mut Vec<u8>) -> Result<Option<gix::objs::Data<'a>>, Error> {
fn try_find<'a>(&self, id: &gix::oid, buffer: &'a mut Vec<u8>) -> Result<Option<gix::objs::Data<'a>>, Error> {
buffer.clear();
Ok(Some(gix::objs::Data {
kind: gix::object::Kind::Blob,
hash_kind: id.kind(),
data: buffer,
}))
}
Expand Down
1 change: 1 addition & 0 deletions gix-diff/tests/diff/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ mod util {
buffer.extend_from_slice(data);
Ok(Some(gix_object::Data {
kind: gix_object::Kind::Blob,
hash_kind: id.kind(),
data: buffer.as_slice(),
}))
}
Expand Down
11 changes: 7 additions & 4 deletions gix-diff/tests/diff/tree_with_rewrites.rs
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,8 @@ fn empty_to_new_tree_without_rename_tracking() -> crate::Result {
{
let (lhs, rhs, mut cache, odb) = repo_with_trees(None, "c1 - initial")?;
let err = gix_diff::tree_with_rewrites(
TreeRefIter::from_bytes(&lhs),
TreeRefIter::from_bytes(&rhs),
TreeRefIter::from_bytes(&lhs, gix_testtools::hash_kind_from_env().unwrap_or_default()),
TreeRefIter::from_bytes(&rhs, gix_testtools::hash_kind_from_env().unwrap_or_default()),
&mut cache,
&mut Default::default(),
&odb,
Expand Down Expand Up @@ -1843,8 +1843,11 @@ mod util {
let (from, to, mut cache, odb) = repo_with_trees(lhs, rhs)?;
let mut out = Vec::new();
let rewrites_info = gix_diff::tree_with_rewrites(
TreeRefIter::from_bytes(&from),
TreeRefIter::from_bytes(&to),
// TODO(SHA256):
// Get hash from env. This requires updating the snapshot at the top of the file as
// well.
TreeRefIter::from_bytes(&from, gix_hash::Kind::Sha1),
TreeRefIter::from_bytes(&to, gix_hash::Kind::Sha1),
&mut cache,
&mut Default::default(),
&odb,
Expand Down
2 changes: 1 addition & 1 deletion gix-merge/src/tree/function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ where
let (mut base_buf, mut side_buf) = (Vec::new(), Vec::new());
let ancestor_tree = objects.find_tree(base_tree, &mut base_buf)?;
let mut editor = tree::Editor::new(ancestor_tree.to_owned(), objects, base_tree.kind());
let ancestor_tree = gix_object::TreeRefIter::from_bytes(&base_buf);
let ancestor_tree = gix_object::TreeRefIter::from_bytes(&base_buf, base_tree.kind());
let tree_conflicts = options.tree_conflicts;

let mut our_changes = Vec::new();
Expand Down
1 change: 1 addition & 0 deletions gix-merge/tests/merge/blob/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ mod util {
buffer.extend_from_slice(data);
Ok(Some(gix_object::Data {
kind: gix_object::Kind::Blob,
hash_kind: id.kind(),
data: buffer.as_slice(),
}))
}
Expand Down
15 changes: 13 additions & 2 deletions gix-object/benches/decode_objects.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,21 @@ fn parse_tag(c: &mut Criterion) {

fn parse_tree(c: &mut Criterion) {
c.bench_function("TreeRef()", |b| {
b.iter(|| black_box(gix_object::TreeRef::from_bytes(TREE)).unwrap());
b.iter(|| {
black_box(gix_object::TreeRef::from_bytes(
TREE,
gix_testtools::hash_kind_from_env().unwrap_or_default(),
))
.unwrap()
});
});
c.bench_function("TreeRefIter()", |b| {
b.iter(|| black_box(gix_object::TreeRefIter::from_bytes(TREE).count()));
b.iter(|| {
black_box(
gix_object::TreeRefIter::from_bytes(TREE, gix_testtools::hash_kind_from_env().unwrap_or_default())
.count(),
)
});
});
}

Expand Down
1 change: 1 addition & 0 deletions gix-object/benches/edit_tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ impl gix_object::Find for StorageOdb {
tree.write_to(buffer).expect("valid trees can always be serialized");
Ok(Some(gix_object::Data {
kind: gix_object::Kind::Tree,
hash_kind: id.kind(),
data: &*buffer,
}))
}
Expand Down
4 changes: 4 additions & 0 deletions gix-object/fuzz/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ cargo-fuzz = true
[dependencies]
libfuzzer-sys = "0.4"

[dependencies.gix-hash]
path = "../../gix-hash"
features = ["sha1", "sha256"]

[dependencies.gix-object]
path = ".."
features = ["sha1"]
Expand Down
3 changes: 2 additions & 1 deletion gix-object/fuzz/fuzz_targets/fuzz_tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,6 @@ use libfuzzer_sys::fuzz_target;
use std::hint::black_box;

fuzz_target!(|tree: &[u8]| {
let _ = black_box(gix_object::TreeRef::from_bytes(tree));
let _ = black_box(gix_object::TreeRef::from_bytes(tree, gix_hash::Kind::Sha1));
let _ = black_box(gix_object::TreeRef::from_bytes(tree, gix_hash::Kind::Sha256));
});
10 changes: 5 additions & 5 deletions gix-object/src/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
use crate::{BlobRef, CommitRef, CommitRefIter, Data, Kind, ObjectRef, TagRef, TagRefIter, TreeRef, TreeRefIter};

impl<'a> Data<'a> {
/// Constructs a new data object from `kind` and `data`.
pub fn new(kind: Kind, data: &'a [u8]) -> Data<'a> {
Data { kind, data }
/// Constructs a new data object from `kind`, `hash_kind` and `data`.
pub fn new(kind: Kind, hash_kind: gix_hash::Kind, data: &'a [u8]) -> Data<'a> {
Data { kind, hash_kind, data }
}
/// Decodes the data in the backing slice into a [`ObjectRef`], allowing to access all of its data
/// conveniently. The cost of parsing an object is negligible.
Expand All @@ -14,7 +14,7 @@ impl<'a> Data<'a> {
/// using [`crate::ObjectRef::into_owned()`].
pub fn decode(&self) -> Result<ObjectRef<'a>, crate::decode::Error> {
Ok(match self.kind {
Kind::Tree => ObjectRef::Tree(TreeRef::from_bytes(self.data)?),
Kind::Tree => ObjectRef::Tree(TreeRef::from_bytes(self.data, self.hash_kind)?),
Kind::Blob => ObjectRef::Blob(BlobRef { data: self.data }),
Kind::Commit => ObjectRef::Commit(CommitRef::from_bytes(self.data)?),
Kind::Tag => ObjectRef::Tag(TagRef::from_bytes(self.data)?),
Expand All @@ -25,7 +25,7 @@ impl<'a> Data<'a> {
/// `None` if this is not a tree object.
pub fn try_into_tree_iter(self) -> Option<TreeRefIter<'a>> {
match self.kind {
Kind::Tree => Some(TreeRefIter::from_bytes(self.data)),
Kind::Tree => Some(TreeRefIter::from_bytes(self.data, self.hash_kind)),
_ => None,
}
}
Expand Down
8 changes: 6 additions & 2 deletions gix-object/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
//! ## Decode Borrowed Objects
//!
//! ```
//! let object = gix_object::ObjectRef::from_loose(b"blob 5\0hello").unwrap();
//! let object = gix_object::ObjectRef::from_loose(b"blob 5\0hello", gix_hash::Kind::Sha1).unwrap();
//! let blob = object.as_blob().unwrap();
//!
//! assert_eq!(blob.data, b"hello");
Expand All @@ -16,7 +16,7 @@
//! ```
//! use gix_object::WriteTo;
//!
//! let object = gix_object::ObjectRef::from_loose(b"blob 5\0hello")
//! let object = gix_object::ObjectRef::from_loose(b"blob 5\0hello", gix_hash::Kind::Sha1)
//! .unwrap()
//! .into_owned()
//! .unwrap();
Expand Down Expand Up @@ -263,6 +263,8 @@ pub struct TreeRef<'a> {
/// A directory snapshot containing files (blobs), directories (trees) and submodules (commits), lazily evaluated.
#[derive(Default, PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
pub struct TreeRefIter<'a> {
/// The hash kind to use in this tree.
hash_kind: gix_hash::Kind,
/// The directories and files contained in this tree.
data: &'a [u8],
}
Expand All @@ -289,6 +291,8 @@ impl Tree {
pub struct Data<'a> {
/// kind of object
pub kind: Kind,
/// The hash kind to use for this piece of data.
pub hash_kind: gix_hash::Kind,
/// decoded, decompressed data, owned by a backing store.
pub data: &'a [u8],
}
Expand Down
12 changes: 8 additions & 4 deletions gix-object/src/object/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ pub enum LooseDecodeError {

impl<'a> ObjectRef<'a> {
/// Deserialize an object from a loose serialisation
pub fn from_loose(data: &'a [u8]) -> Result<ObjectRef<'a>, LooseDecodeError> {
pub fn from_loose(data: &'a [u8], hash_kind: gix_hash::Kind) -> Result<ObjectRef<'a>, LooseDecodeError> {
let (kind, size, offset) = loose_header(data)?;

let body = &data[offset..]
Expand All @@ -200,13 +200,17 @@ impl<'a> ObjectRef<'a> {
message: "object data was shorter than its size declared in the header",
})?;

Ok(Self::from_bytes(kind, body)?)
Ok(Self::from_bytes(kind, hash_kind, body)?)
}

/// Deserialize an object of `kind` from the given `data`.
pub fn from_bytes(kind: Kind, data: &'a [u8]) -> Result<ObjectRef<'a>, crate::decode::Error> {
pub fn from_bytes(
kind: Kind,
hash_kind: gix_hash::Kind,
data: &'a [u8],
) -> Result<ObjectRef<'a>, crate::decode::Error> {
Ok(match kind {
Kind::Tree => ObjectRef::Tree(TreeRef::from_bytes(data)?),
Kind::Tree => ObjectRef::Tree(TreeRef::from_bytes(data, hash_kind)?),
Kind::Blob => ObjectRef::Blob(BlobRef { data }),
Kind::Commit => ObjectRef::Commit(CommitRef::from_bytes(data)?),
Kind::Tag => ObjectRef::Tag(TagRef::from_bytes(data)?),
Expand Down
50 changes: 29 additions & 21 deletions gix-object/src/tree/ref_iter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ where
return ControlFlow::Break(None);
};

let Some(entry) = TreeRefIter::from_bytes(tree.data)
let Some(entry) = TreeRefIter::from_bytes(tree.data, tree.hash_kind)
.filter_map(Result::ok)
.find(|entry| component.eq(entry.filename))
else {
Expand All @@ -55,8 +55,8 @@ where

impl<'a> TreeRefIter<'a> {
/// Instantiate an iterator from the given tree data.
pub fn from_bytes(data: &'a [u8]) -> TreeRefIter<'a> {
TreeRefIter { data }
pub fn from_bytes(data: &'a [u8], hash_kind: gix_hash::Kind) -> TreeRefIter<'a> {
TreeRefIter { data, hash_kind }
}

/// Follow a sequence of `path` components starting from this instance, and look them up in `odb` one by one using `buffer`
Expand All @@ -81,7 +81,7 @@ impl<'a> TreeRefIter<'a> {
buffer.extend_from_slice(self.data);

let mut iter = path.into_iter().peekable();
let mut data = crate::Data::new(crate::Kind::Tree, buffer);
let mut data = crate::Data::new(crate::Kind::Tree, self.hash_kind, buffer);

loop {
data = match next_entry(&mut iter, data) {
Expand Down Expand Up @@ -123,11 +123,14 @@ impl<'a> TreeRefIter<'a> {

impl<'a> TreeRef<'a> {
/// Deserialize a Tree from `data`.
pub fn from_bytes(mut data: &'a [u8]) -> Result<TreeRef<'a>, crate::decode::Error> {
let input = &mut data;
match decode::tree.parse_next(input) {
pub fn from_bytes(data: &'a [u8], hash_kind: gix_hash::Kind) -> Result<TreeRef<'a>, crate::decode::Error> {
let state = decode::State {
hash_len: hash_kind.len_in_bytes(),
};
let mut input = decode::Stream { input: data, state };
match decode::tree.parse_next(&mut input) {
Ok(tag) => Ok(tag),
Err(err) => Err(crate::decode::Error::with_err(err, input)),
Err(err) => Err(crate::decode::Error::with_err(err, &input)),
}
}

Expand Down Expand Up @@ -190,7 +193,7 @@ impl<'a> Iterator for TreeRefIter<'a> {
if self.data.is_empty() {
return None;
}
match decode::fast_entry(self.data) {
match decode::fast_entry(self.data, self.hash_kind.len_in_bytes()) {
Some((data_left, entry)) => {
self.data = data_left;
Some(Ok(entry))
Expand Down Expand Up @@ -218,47 +221,52 @@ impl<'a> TryFrom<&'a [u8]> for tree::EntryMode {

mod decode {
use bstr::ByteSlice;
use winnow::{error::ParserError, prelude::*};
use winnow::{error::ParserError, prelude::*, Stateful};

use crate::{tree, tree::EntryRef, TreeRef};

pub fn fast_entry(i: &[u8]) -> Option<(&[u8], EntryRef<'_>)> {
pub fn fast_entry(i: &[u8], hash_len: usize) -> Option<(&[u8], EntryRef<'_>)> {
let (mode, i) = tree::EntryMode::extract_from_bytes(i)?;
let (filename, i) = i.split_at(i.find_byte(0)?);
let i = &i[1..];
const HASH_LEN_FIXME: usize = 20; // TODO(SHA256): know actual/desired length or we may overshoot
let (oid, i) = match i.len() {
len if len < HASH_LEN_FIXME => return None,
_ => i.split_at(20),
len if len < hash_len => return None,
_ => i.split_at(hash_len),
};
Some((
i,
EntryRef {
mode,
filename: filename.as_bstr(),
oid: gix_hash::oid::try_from_bytes(oid).expect("we counted exactly 20 bytes"),
oid: gix_hash::oid::try_from_bytes(oid)
.unwrap_or_else(|_| panic!("we counted exactly {hash_len} bytes")),
},
))
}

pub fn tree<'a, E: ParserError<&'a [u8]>>(i: &mut &'a [u8]) -> ModalResult<TreeRef<'a>, E> {
let mut i = &**i;
#[derive(Debug)]
pub struct State {
pub hash_len: usize,
}

pub type Stream<'is> = Stateful<&'is [u8], State>;

pub fn tree<'a, E: ParserError<&'a [u8]>>(stream: &mut Stream<'a>) -> ModalResult<TreeRef<'a>, E> {
Comment on lines +247 to +254
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd love it if object_hash: gix_hash::Kind could be a parameter here with everything else unchanged.
Probably this was first attempted and this is what it needs to be for Winnow, but… honestly, maybe the slow version can even be removed in favor of fast_decode. I wonder if anything speaks against that.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As you suspected, that’s what I tried first. :-) But winnow places certain constraints on what it accepts as a parsing function. Just looking at tree locally, I don’t see why it shouldn’t be possible to rewrite it not to need winnow anymore. I haven’t tried yet, but could, either in this PR or in a follow-up.

let mut i = stream.input;

// Calculate an estimate of the amount of entries to reduce
// the amount of allocations necessary.
// Note that this assumes that we want speed over fitting Vecs, this is a trade-off.
// TODO(SHA256): know actual/desired length for reduced overallocation
const HASH_LEN_FIXME: usize = 20;
const AVERAGE_FILENAME_LEN: usize = 24;
const AVERAGE_MODE_LEN: usize = 6;
const ENTRY_DELIMITER_LEN: usize = 2; // space + trailing zero
const AVERAGE_TREE_ENTRIES: usize = 16 * 2; // prevent overallocation beyond what's meaningful or what could be dangerous
let average_entry_len = ENTRY_DELIMITER_LEN + HASH_LEN_FIXME + AVERAGE_MODE_LEN + AVERAGE_FILENAME_LEN;
let average_entry_len = ENTRY_DELIMITER_LEN + stream.state.hash_len + AVERAGE_MODE_LEN + AVERAGE_FILENAME_LEN;
let upper_bound = i.len() / average_entry_len;
let mut out = Vec::with_capacity(upper_bound.min(AVERAGE_TREE_ENTRIES));

while !i.is_empty() {
let Some((rest, entry)) = fast_entry(i) else {
let Some((rest, entry)) = fast_entry(i, stream.state.hash_len) else {
#[allow(clippy::unit_arg)]
return Err(winnow::error::ErrMode::from_input(&i));
};
Expand Down
Loading
Loading