Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion crate-status.md
Original file line number Diff line number Diff line change
Expand Up @@ -843,7 +843,7 @@ A git directory walk.
* [x] pathspec based filtering
* [ ] multi-threaded initialization of icase hash table is always used to accelerate index lookups, even if ignoreCase = false for performance
* [ ] special handling of submodules (for now, submodules or nested repositories are detected, but they can't be walked into naturally)
* [ ] accelerated walk with `untracked`-cache (as provided by `UNTR` extension of `gix_index::File`)
* [x] accelerated walk with `untracked`-cache (as provided by `UNTR` extension of `gix_index::File`)

### gix-index

Expand Down
6 changes: 6 additions & 0 deletions gix-dir/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ test = false
[features]
## Enable support for the SHA-1 hash by forwarding the feature to dependencies.
sha1 = ["gix-discover/sha1", "gix-index/sha1", "gix-object/sha1", "gix-worktree/sha1"]
## Enable support for handling attributes, forwarding the feature to dependencies.
attributes = ["gix-worktree/attributes"]

[dependencies]
gix-trace = { version = "^0.1.18", path = "../gix-trace" }
Expand All @@ -33,6 +35,10 @@ gix-utils = { version = "^0.3.1", path = "../gix-utils", features = ["bstr"] }
bstr = { version = "1.12.0", default-features = false }
thiserror = "2.0.18"

[target.'cfg(unix)'.dependencies]
libc = { version = "0.2.182" }
rustix = { version = "1.1.2", default-features = false, features = ["std", "system"] }

[dev-dependencies]
gix-testtools = { path = "../tests/tools" }
gix-fs = { path = "../gix-fs" }
Expand Down
10 changes: 9 additions & 1 deletion gix-dir/src/walk/function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use bstr::{BStr, BString, ByteSlice};

use crate::{
entry,
walk::{classify, readdir, Action, Context, Delegate, Error, ForDeletionMode, Options, Outcome},
walk::{classify, readdir, untracked_cache, Action, Context, Delegate, Error, ForDeletionMode, Options, Outcome},
EntryRef,
};

Expand Down Expand Up @@ -106,6 +106,10 @@ pub fn walk(
}

let mut state = readdir::State::new(worktree_root, ctx.current_dir, options.for_deletion.is_some());
let untracked_cache = options
.use_untracked_cache
.then(|| untracked_cache::validate(worktree_root, ctx.index, &ctx, options))
.flatten();
let may_collapse = root != worktree_root && state.may_collapse(&current);
let (action, _) = readdir::recursive(
may_collapse,
Expand All @@ -117,6 +121,10 @@ pub fn walk(
delegate,
&mut out,
&mut state,
untracked_cache.as_ref(),
untracked_cache
.as_ref()
.map(|cache: &untracked_cache::Validated<'_>| cache.root_dir()),
)?;
if action.is_continue() {
state.emit_remaining(may_collapse, options, &mut out, delegate);
Expand Down
32 changes: 31 additions & 1 deletion gix-dir/src/walk/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ pub enum ForDeletionMode {
}

/// Options for use in [`walk()`](function::walk()) function.
#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)]
pub struct Options<'a> {
/// If `true`, the filesystem will store paths as decomposed unicode, i.e. `ä` becomes `"a\u{308}"`, which means that
/// we have to turn these forms back from decomposed to precomposed unicode before storing it in the index or generally
Expand Down Expand Up @@ -188,13 +188,36 @@ pub struct Options<'a> {
///
/// In other words, for Git compatibility this flag should be `false`, the default, for `git2` compatibility it should be `true`.
pub symlinks_to_directories_are_ignored_like_directories: bool,
/// If `true`, consult the untracked cache if it is present and otherwise applicable.
pub use_untracked_cache: bool,
/// A set of all git worktree checkouts that are located within the main worktree directory.
///
/// They will automatically be detected as 'tracked', but without providing index information (as there is no actual index entry).
/// Note that the unicode composition must match the `precompose_unicode` field so that paths will match verbatim.
pub worktree_relative_worktree_dirs: Option<&'a BTreeSet<BString>>,
}

impl Default for Options<'_> {
fn default() -> Self {
Self {
precompose_unicode: false,
ignore_case: false,
recurse_repositories: false,
emit_pruned: false,
emit_ignored: None,
for_deletion: None,
classify_untracked_bare_repositories: false,
emit_tracked: false,
emit_untracked: Default::default(),
emit_empty_directories: false,
emit_collapsed: None,
symlinks_to_directories_are_ignored_like_directories: false,
use_untracked_cache: true,
worktree_relative_worktree_dirs: None,
}
}
}

/// All information that is required to perform a dirwalk, and classify paths properly.
pub struct Context<'a> {
/// If not `None`, it will be checked before entering any directory to trigger early interruption.
Expand Down Expand Up @@ -269,6 +292,12 @@ pub struct Outcome {
pub returned_entries: usize,
/// The amount of entries, prior to pathspecs filtering them out or otherwise excluding them.
pub seen_entries: u32,
/// The number of directories whose contents were served entirely from the untracked cache,
/// avoiding a `read_dir` syscall.
pub untracked_cache_hits: u32,
/// The number of directories skipped by the untracked cache due to a failed per-directory
/// stat validation, falling back to a real `read_dir` call instead.
pub untracked_cache_misses: u32,
}

/// The error returned by [`walk()`](function::walk()).
Expand Down Expand Up @@ -306,3 +335,4 @@ pub enum Error {
mod classify;
pub(crate) mod function;
mod readdir;
mod untracked_cache;
174 changes: 174 additions & 0 deletions gix-dir/src/walk/readdir.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,38 @@ pub(super) fn recursive(
delegate: &mut dyn Delegate,
out: &mut Outcome,
state: &mut State,
untracked_cache: Option<&walk::untracked_cache::Validated<'_>>,
untracked_cache_dir: Option<usize>,
) -> Result<(Action, bool), Error> {
if ctx.should_interrupt.is_some_and(|flag| flag.load(Ordering::Relaxed)) {
return Err(Error::Interrupted);
}
let cache_attempted = untracked_cache.zip(untracked_cache_dir);
let cache_valid = cache_attempted.filter(|(cache, dir)| cache.is_dir_valid(*dir, current));
if cache_attempted.is_some() && cache_valid.is_none() {
out.untracked_cache_misses += 1;
}
if let Some((action, prevent_collapse)) = cache_valid
.map(|(cache, dir)| {
recursive_from_untracked_cache(
dir,
may_collapse,
current,
current_bstr,
current_info,
ctx,
opts,
delegate,
out,
state,
cache,
)
})
.transpose()?
{
out.untracked_cache_hits += 1;
return Ok((action, prevent_collapse));
}
out.read_dir_calls += 1;
let entries = gix_fs::read_dir(current, opts.precompose_unicode).map_err(|err| Error::ReadDir {
path: current.to_owned(),
Expand Down Expand Up @@ -96,6 +124,15 @@ pub(super) fn recursive(
delegate,
out,
state,
untracked_cache,
untracked_cache_dir.and_then(|dir| {
untracked_cache.and_then(|cache| {
let component = current_bstr
.rfind_byte(b'/')
.map_or(current_bstr.as_bstr(), |pos| current_bstr[pos + 1..].as_bstr());
cache.child_dir(dir, component)
})
}),
)?;
prevent_collapse |= subdir_prevent_collapse;
if action.is_break() {
Expand Down Expand Up @@ -141,6 +178,143 @@ pub(super) fn recursive(
Ok((res, prevent_collapse))
}

#[allow(clippy::too_many_arguments)]
fn recursive_from_untracked_cache(
cache_dir: usize,
may_collapse: bool,
current: &mut PathBuf,
current_bstr: &mut BString,
current_info: classify::Outcome,
ctx: &mut Context<'_>,
opts: Options<'_>,
delegate: &mut dyn Delegate,
out: &mut Outcome,
state: &mut State,
untracked_cache: &walk::untracked_cache::Validated<'_>,
) -> Result<(Action, bool), Error> {
let Some(cached) = untracked_cache.directory(cache_dir) else {
return Ok((std::ops::ControlFlow::Continue(()), false));
};

let mut num_entries = 0;
let mark = state.mark(may_collapse);
let mut prevent_collapse = current_info.status == Status::Tracked;

// Build the set of sub-directory names so we can skip their `"<name>/"` entries in
// `untracked_entries` — those are handled (with proper stat validation) below.
let subdir_names: std::collections::HashSet<&[u8]> = cached
.sub_directories()
.iter()
.filter_map(|&i| untracked_cache.directory(i))
.map(|d| d.name().as_bytes())
.collect();

for &subdir_idx in cached.sub_directories() {
let Some(subdir) = untracked_cache.directory(subdir_idx) else {
continue;
};
let prev_len = current_bstr.len();
if prev_len != 0 {
current_bstr.push(b'/');
}
current_bstr.extend_from_slice(subdir.name());
current.push(gix_path::from_bstr(subdir.name()));

let info = classify::path(
current,
current_bstr,
if prev_len == 0 { 0 } else { prev_len + 1 },
Some(entry::Kind::Directory),
|| Some(entry::Kind::Directory),
opts,
ctx,
)?;
if can_recurse(current_bstr.as_bstr(), info, opts.for_deletion, false, delegate) {
num_entries += 1;
let subdir_may_collapse = state.may_collapse(current);
let (action, subdir_prevent_collapse) = recursive(
subdir_may_collapse,
current,
current_bstr,
info,
ctx,
opts,
delegate,
out,
state,
Some(untracked_cache),
Some(subdir_idx),
)?;
prevent_collapse |= subdir_prevent_collapse;
if action.is_break() {
return Ok((action, prevent_collapse));
}
}
current_bstr.truncate(prev_len);
current.pop();
}

for file in cached.untracked_entries() {
// Git stores collapsed untracked directories in BOTH `sub_directories` AND as
// `"<name>/"` in `untracked_entries`. Skip the `untracked_entries` copy — the
// sub_directories loop above handles it (with proper per-directory stat
// validation via `recursive()`). Emitting from here would bypass the stat check
// and serve stale cache entries (e.g. if files inside were deleted).
let (file_name, is_collapsed_dir) = file
.as_slice()
.strip_suffix(b"/")
.map_or((file.as_slice(), false), |s| (s, true));
if is_collapsed_dir && subdir_names.contains(file_name) {
continue;
}

num_entries += 1;
let prev_len = current_bstr.len();
if prev_len != 0 {
current_bstr.push(b'/');
}
current_bstr.extend_from_slice(file_name);
current.push(gix_path::from_bstr(bstr::BStr::new(file_name)));
let current_path = current.clone();

let info = classify::path(
current,
current_bstr,
if prev_len == 0 { 0 } else { prev_len + 1 },
None,
|| {
std::fs::symlink_metadata(&current_path)
.ok()
.map(|ft| ft.file_type().into())
},
opts,
ctx,
)?;
if !state.held_for_directory_collapse(current_bstr.as_bstr(), info, &opts) {
let action = emit_entry(Cow::Borrowed(current_bstr.as_bstr()), info, None, opts, out, delegate);
if action.is_break() {
return Ok((action, prevent_collapse));
}
}
current_bstr.truncate(prev_len);
current.pop();
}

let res = mark.reduce_held_entries(
num_entries,
state,
&mut prevent_collapse,
current,
current_bstr.as_bstr(),
current_info,
opts,
out,
ctx,
delegate,
);
Ok((res, prevent_collapse))
}

pub(super) struct State {
/// The entries to hold back until it's clear what to do with them.
pub on_hold: Vec<Entry>,
Expand Down
Loading
Loading