diff --git a/Cargo.lock b/Cargo.lock index f9733b38bfa..aab2eaf5551 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1715,7 +1715,9 @@ dependencies = [ "gix-trace", "gix-utils", "gix-worktree", + "libc", "pretty_assertions", + "rustix", "thiserror 2.0.18", ] diff --git a/crate-status.md b/crate-status.md index 5cb879ce228..be0eed6f16a 100644 --- a/crate-status.md +++ b/crate-status.md @@ -843,7 +843,7 @@ A git directory walk. * [x] pathspec based filtering * [ ] multi-threaded initialization of icase hash table is always used to accelerate index lookups, even if ignoreCase = false for performance * [ ] special handling of submodules (for now, submodules or nested repositories are detected, but they can't be walked into naturally) -* [ ] accelerated walk with `untracked`-cache (as provided by `UNTR` extension of `gix_index::File`) +* [x] accelerated walk with `untracked`-cache (as provided by `UNTR` extension of `gix_index::File`) ### gix-index diff --git a/gix-dir/Cargo.toml b/gix-dir/Cargo.toml index 02a0db69c90..e098ad0d50e 100644 --- a/gix-dir/Cargo.toml +++ b/gix-dir/Cargo.toml @@ -17,6 +17,8 @@ test = false [features] ## Enable support for the SHA-1 hash by forwarding the feature to dependencies. sha1 = ["gix-discover/sha1", "gix-index/sha1", "gix-object/sha1", "gix-worktree/sha1"] +## Enable support for handling attributes, forwarding the feature to dependencies. +attributes = ["gix-worktree/attributes"] [dependencies] gix-trace = { version = "^0.1.18", path = "../gix-trace" } @@ -33,6 +35,10 @@ gix-utils = { version = "^0.3.1", path = "../gix-utils", features = ["bstr"] } bstr = { version = "1.12.0", default-features = false } thiserror = "2.0.18" +[target.'cfg(unix)'.dependencies] +libc = { version = "0.2.182" } +rustix = { version = "1.1.2", default-features = false, features = ["std", "system"] } + [dev-dependencies] gix-testtools = { path = "../tests/tools" } gix-fs = { path = "../gix-fs" } diff --git a/gix-dir/src/walk/function.rs b/gix-dir/src/walk/function.rs index d9cf7205320..a1375ebaeeb 100644 --- a/gix-dir/src/walk/function.rs +++ b/gix-dir/src/walk/function.rs @@ -7,7 +7,7 @@ use bstr::{BStr, BString, ByteSlice}; use crate::{ entry, - walk::{classify, readdir, Action, Context, Delegate, Error, ForDeletionMode, Options, Outcome}, + walk::{classify, readdir, untracked_cache, Action, Context, Delegate, Error, ForDeletionMode, Options, Outcome}, EntryRef, }; @@ -106,6 +106,10 @@ pub fn walk( } let mut state = readdir::State::new(worktree_root, ctx.current_dir, options.for_deletion.is_some()); + let untracked_cache = options + .use_untracked_cache + .then(|| untracked_cache::validate(worktree_root, ctx.index, &ctx, options)) + .flatten(); let may_collapse = root != worktree_root && state.may_collapse(¤t); let (action, _) = readdir::recursive( may_collapse, @@ -117,6 +121,10 @@ pub fn walk( delegate, &mut out, &mut state, + untracked_cache.as_ref(), + untracked_cache + .as_ref() + .map(|cache: &untracked_cache::Validated<'_>| cache.root_dir()), )?; if action.is_continue() { state.emit_remaining(may_collapse, options, &mut out, delegate); diff --git a/gix-dir/src/walk/mod.rs b/gix-dir/src/walk/mod.rs index 84f3dfde24a..926f5c9460d 100644 --- a/gix-dir/src/walk/mod.rs +++ b/gix-dir/src/walk/mod.rs @@ -138,7 +138,7 @@ pub enum ForDeletionMode { } /// Options for use in [`walk()`](function::walk()) function. -#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)] pub struct Options<'a> { /// If `true`, the filesystem will store paths as decomposed unicode, i.e. `ä` becomes `"a\u{308}"`, which means that /// we have to turn these forms back from decomposed to precomposed unicode before storing it in the index or generally @@ -188,6 +188,8 @@ pub struct Options<'a> { /// /// In other words, for Git compatibility this flag should be `false`, the default, for `git2` compatibility it should be `true`. pub symlinks_to_directories_are_ignored_like_directories: bool, + /// If `true`, consult the untracked cache if it is present and otherwise applicable. + pub use_untracked_cache: bool, /// A set of all git worktree checkouts that are located within the main worktree directory. /// /// They will automatically be detected as 'tracked', but without providing index information (as there is no actual index entry). @@ -195,6 +197,27 @@ pub struct Options<'a> { pub worktree_relative_worktree_dirs: Option<&'a BTreeSet>, } +impl Default for Options<'_> { + fn default() -> Self { + Self { + precompose_unicode: false, + ignore_case: false, + recurse_repositories: false, + emit_pruned: false, + emit_ignored: None, + for_deletion: None, + classify_untracked_bare_repositories: false, + emit_tracked: false, + emit_untracked: Default::default(), + emit_empty_directories: false, + emit_collapsed: None, + symlinks_to_directories_are_ignored_like_directories: false, + use_untracked_cache: true, + worktree_relative_worktree_dirs: None, + } + } +} + /// All information that is required to perform a dirwalk, and classify paths properly. pub struct Context<'a> { /// If not `None`, it will be checked before entering any directory to trigger early interruption. @@ -269,6 +292,12 @@ pub struct Outcome { pub returned_entries: usize, /// The amount of entries, prior to pathspecs filtering them out or otherwise excluding them. pub seen_entries: u32, + /// The number of directories whose contents were served entirely from the untracked cache, + /// avoiding a `read_dir` syscall. + pub untracked_cache_hits: u32, + /// The number of directories skipped by the untracked cache due to a failed per-directory + /// stat validation, falling back to a real `read_dir` call instead. + pub untracked_cache_misses: u32, } /// The error returned by [`walk()`](function::walk()). @@ -306,3 +335,4 @@ pub enum Error { mod classify; pub(crate) mod function; mod readdir; +mod untracked_cache; diff --git a/gix-dir/src/walk/readdir.rs b/gix-dir/src/walk/readdir.rs index 178a032951e..930de5f072f 100644 --- a/gix-dir/src/walk/readdir.rs +++ b/gix-dir/src/walk/readdir.rs @@ -34,10 +34,38 @@ pub(super) fn recursive( delegate: &mut dyn Delegate, out: &mut Outcome, state: &mut State, + untracked_cache: Option<&walk::untracked_cache::Validated<'_>>, + untracked_cache_dir: Option, ) -> Result<(Action, bool), Error> { if ctx.should_interrupt.is_some_and(|flag| flag.load(Ordering::Relaxed)) { return Err(Error::Interrupted); } + let cache_attempted = untracked_cache.zip(untracked_cache_dir); + let cache_valid = cache_attempted.filter(|(cache, dir)| cache.is_dir_valid(*dir, current)); + if cache_attempted.is_some() && cache_valid.is_none() { + out.untracked_cache_misses += 1; + } + if let Some((action, prevent_collapse)) = cache_valid + .map(|(cache, dir)| { + recursive_from_untracked_cache( + dir, + may_collapse, + current, + current_bstr, + current_info, + ctx, + opts, + delegate, + out, + state, + cache, + ) + }) + .transpose()? + { + out.untracked_cache_hits += 1; + return Ok((action, prevent_collapse)); + } out.read_dir_calls += 1; let entries = gix_fs::read_dir(current, opts.precompose_unicode).map_err(|err| Error::ReadDir { path: current.to_owned(), @@ -96,6 +124,15 @@ pub(super) fn recursive( delegate, out, state, + untracked_cache, + untracked_cache_dir.and_then(|dir| { + untracked_cache.and_then(|cache| { + let component = current_bstr + .rfind_byte(b'/') + .map_or(current_bstr.as_bstr(), |pos| current_bstr[pos + 1..].as_bstr()); + cache.child_dir(dir, component) + }) + }), )?; prevent_collapse |= subdir_prevent_collapse; if action.is_break() { @@ -141,6 +178,148 @@ pub(super) fn recursive( Ok((res, prevent_collapse)) } +#[allow(clippy::too_many_arguments)] +fn recursive_from_untracked_cache( + cache_dir: usize, + may_collapse: bool, + current: &mut PathBuf, + current_bstr: &mut BString, + current_info: classify::Outcome, + ctx: &mut Context<'_>, + opts: Options<'_>, + delegate: &mut dyn Delegate, + out: &mut Outcome, + state: &mut State, + untracked_cache: &walk::untracked_cache::Validated<'_>, +) -> Result<(Action, bool), Error> { + let Some(cached) = untracked_cache.directory(cache_dir) else { + return Ok((std::ops::ControlFlow::Continue(()), false)); + }; + + let mut num_entries = 0; + let mark = state.mark(may_collapse); + let mut prevent_collapse = current_info.status == Status::Tracked; + + // Build the set of sub-directory names so we can skip their `"/"` entries in + // `untracked_entries` — those are handled (with proper stat validation) below. + let subdir_names: std::collections::HashSet<&[u8]> = cached + .sub_directories() + .iter() + .filter_map(|&i| untracked_cache.directory(i)) + .map(|d| d.name().as_bytes()) + .collect(); + + for &subdir_idx in cached.sub_directories() { + let Some(subdir) = untracked_cache.directory(subdir_idx) else { + continue; + }; + let prev_len = current_bstr.len(); + if prev_len != 0 { + current_bstr.push(b'/'); + } + current_bstr.extend_from_slice(subdir.name()); + current.push(gix_path::from_bstr(subdir.name())); + + let info = classify::path( + current, + current_bstr, + if prev_len == 0 { 0 } else { prev_len + 1 }, + Some(entry::Kind::Directory), + || Some(entry::Kind::Directory), + opts, + ctx, + )?; + num_entries += 1; + if can_recurse(current_bstr.as_bstr(), info, opts.for_deletion, false, delegate) { + let subdir_may_collapse = state.may_collapse(current); + let (action, subdir_prevent_collapse) = recursive( + subdir_may_collapse, + current, + current_bstr, + info, + ctx, + opts, + delegate, + out, + state, + Some(untracked_cache), + Some(subdir_idx), + )?; + prevent_collapse |= subdir_prevent_collapse; + if action.is_break() { + return Ok((action, prevent_collapse)); + } + } else if !state.held_for_directory_collapse(current_bstr.as_bstr(), info, &opts) { + let action = emit_entry(Cow::Borrowed(current_bstr.as_bstr()), info, None, opts, out, delegate); + if action.is_break() { + return Ok((action, prevent_collapse)); + } + } + current_bstr.truncate(prev_len); + current.pop(); + } + + for file in cached.untracked_entries() { + // Git stores collapsed untracked directories in BOTH `sub_directories` AND as + // `"/"` in `untracked_entries`. Skip the `untracked_entries` copy — the + // sub_directories loop above handles it (with proper per-directory stat + // validation via `recursive()`). Emitting from here would bypass the stat check + // and serve stale cache entries (e.g. if files inside were deleted). + let (file_name, is_collapsed_dir) = file + .as_slice() + .strip_suffix(b"/") + .map_or((file.as_slice(), false), |s| (s, true)); + if is_collapsed_dir && subdir_names.contains(file_name) { + continue; + } + + num_entries += 1; + let prev_len = current_bstr.len(); + if prev_len != 0 { + current_bstr.push(b'/'); + } + current_bstr.extend_from_slice(file_name); + current.push(gix_path::from_bstr(bstr::BStr::new(file_name))); + let current_path = current.clone(); + + let info = classify::path( + current, + current_bstr, + if prev_len == 0 { 0 } else { prev_len + 1 }, + None, + || { + std::fs::symlink_metadata(¤t_path) + .ok() + .map(|ft| ft.file_type().into()) + }, + opts, + ctx, + )?; + if !state.held_for_directory_collapse(current_bstr.as_bstr(), info, &opts) { + let action = emit_entry(Cow::Borrowed(current_bstr.as_bstr()), info, None, opts, out, delegate); + if action.is_break() { + return Ok((action, prevent_collapse)); + } + } + current_bstr.truncate(prev_len); + current.pop(); + } + + let res = mark.reduce_held_entries( + num_entries, + state, + &mut prevent_collapse, + current, + current_bstr.as_bstr(), + current_info, + opts, + out, + ctx, + delegate, + ); + Ok((res, prevent_collapse)) +} + pub(super) struct State { /// The entries to hold back until it's clear what to do with them. pub on_hold: Vec, diff --git a/gix-dir/src/walk/untracked_cache.rs b/gix-dir/src/walk/untracked_cache.rs new file mode 100644 index 00000000000..7311c56b7df --- /dev/null +++ b/gix-dir/src/walk/untracked_cache.rs @@ -0,0 +1,259 @@ +use std::path::Path; + +use bstr::{BStr, ByteSlice}; + +use crate::walk::{Context, Options}; + +const DIR_SHOW_OTHER_DIRECTORIES: u32 = 1 << 1; +const DIR_HIDE_EMPTY_DIRECTORIES: u32 = 1 << 2; + +pub(crate) struct Validated<'a> { + cache: &'a gix_index::extension::UntrackedCache, + object_hash: gix_index::hash::Kind, +} + +impl<'a> Validated<'a> { + pub(crate) fn root_dir(&self) -> usize { + 0 + } + + pub(crate) fn child_dir(&self, parent: usize, name: &BStr) -> Option { + self.directory(parent)? + .sub_directories() + .iter() + .copied() + .find(|idx| self.directory(*idx).is_some_and(|dir| dir.name() == name)) + } + + pub(crate) fn directory(&self, idx: usize) -> Option<&'a gix_index::extension::untracked_cache::Directory> { + self.cache.directories().get(idx) + } + + pub(crate) fn is_dir_valid(&self, idx: usize, absolute_dir: &Path) -> bool { + let Some(dir) = self.directory(idx) else { + return false; + }; + let Some(expected) = dir.stat() else { + return false; + }; + let actual = match gix_index::fs::Metadata::from_path_no_follow(absolute_dir) + .and_then(|meta| gix_index::entry::Stat::from_fs(&meta).map_err(std::io::Error::other)) + { + Ok(s) => s, + Err(_) => return false, + }; + let use_nsec = expected.mtime.nsecs != 0; + let opts = gix_index::entry::stat::Options { + use_nsec, + ..Default::default() + }; + if !expected.matches(&actual, opts) { + return false; + } + // If the IOUC recorded a .gitignore OID, verify the current file matches it. + // If no OID was recorded, trust the directory stat — git skips the .gitignore + // check entirely when exclude_oid is null and the directory stat is valid (see + // valid_cached_dir() / prep_exclude() in git's dir.c). A newly-added .gitignore + // would change the directory stat, which is already checked above. + if let Some(expected_oid) = dir.exclude_file_oid() { + let ignore_path = absolute_dir.join(gix_path::from_bstr(self.cache.exclude_filename_per_dir())); + gitignore_matches(expected_oid, &ignore_path, self.object_hash) + } else { + true + } + } +} + +pub(crate) fn validate<'a>( + worktree_root: &Path, + index: &'a gix_index::State, + ctx: &Context<'_>, + opts: Options<'_>, +) -> Option> { + let cache = index.untracked()?; + if !cache_is_applicable(worktree_root, opts, ctx)? { + return None; + } + + let ident = cache.identifier().split_str("\0").next().unwrap_or(cache.identifier()); + if !ident.starts_with(expected_ident(worktree_root, ctx.current_dir).as_bytes()) { + return None; + } + + #[allow(unreachable_patterns)] + let ignore = match ctx.excludes.as_deref()?.state() { + gix_worktree::stack::State::IgnoreStack(ignore) => ignore, + #[cfg(feature = "attributes")] + gix_worktree::stack::State::AttributesAndIgnoreStack { ignore, .. } => ignore, + _ => return None, + }; + if !matches!( + ignore.source(), + gix_worktree::stack::state::ignore::Source::WorktreeThenIdMappingIfNotSkipped + ) { + return None; + } + if !ignore.overrides().patterns.is_empty() { + return None; + } + if ignore.exclude_file_name_for_directories() != cache.exclude_filename_per_dir() { + return None; + } + let info_exclude_path = ctx.git_dir_realpath.join("info").join("exclude"); + let excludes_file = ignore + .globals() + .patterns + .iter() + .filter_map(|list| list.source.as_deref()) + .find(|path| gix_path::realpath(*path).ok().as_deref() != Some(info_exclude_path.as_path())); + let object_hash = index.object_hash(); + match (cache.excludes_file(), excludes_file) { + (Some(expected), Some(path)) if validate_cached_stat(expected, path) => {} + (None, None) => {} + _ => return None, + } + // Also validate the cached .git/info/exclude stat and OID. If info/exclude changed since + // the UNTR snapshot was written, cached ignore decisions for directories could be stale. + // We verify the content hash in addition to the stat to catch same-second, same-size edits. + match cache.info_exclude() { + Some(expected) + if !validate_cached_stat(expected, &info_exclude_path) + || !gitignore_matches(expected.id(), &info_exclude_path, object_hash) => + { + return None + } + _ => {} + } + + Some(Validated { + cache, + object_hash: index.object_hash(), + }) +} + +fn cache_is_applicable(worktree_root: &Path, opts: Options<'_>, ctx: &Context<'_>) -> Option { + if opts.emit_ignored.is_some() + || opts.for_deletion.is_some() + || opts.emit_tracked + || opts.recurse_repositories + || opts.classify_untracked_bare_repositories + || opts.symlinks_to_directories_are_ignored_like_directories + || opts.worktree_relative_worktree_dirs.is_some() + || ctx.explicit_traversal_root.is_some_and(|root| root != worktree_root) + || ctx.pathspec.patterns().len() != 0 + { + return Some(false); + } + let _dir_flags_hint = match (opts.emit_untracked, opts.emit_empty_directories) { + (crate::walk::EmissionMode::CollapseDirectory, false) => { + DIR_SHOW_OTHER_DIRECTORIES | DIR_HIDE_EMPTY_DIRECTORIES + } + (crate::walk::EmissionMode::Matching, _) => 0, + (crate::walk::EmissionMode::CollapseDirectory, true) => return Some(false), + }; + Some(true) +} + +/// Check whether the `.gitignore` file at `path` matches the `expected_oid` stored in the IOUC. +/// +/// Git's `add_patterns()` in `dir.c` always appends `'\n'` to the buffer before hashing +/// (to ensure the last pattern is terminated), but it uses the index entry OID directly +/// when the file is tracked and uptodate in the index. This means the IOUC may contain +/// either `hash(content)` or `hash(content + '\n')` depending on whether the file's +/// index stat was current at the time of the last `git status` run. We accept both. +fn gitignore_matches( + expected_oid: &gix_index::hash::ObjectId, + path: &Path, + object_hash: gix_index::hash::Kind, +) -> bool { + let Ok(data) = std::fs::read(path) else { return false }; + if let Ok(oid) = gix_object::compute_hash(object_hash, gix_object::Kind::Blob, &data) { + if oid == *expected_oid { + return true; + } + } + // Also try with appended '\n' (git's condition-3 hashing path) + let mut data_plus_nl = data; + data_plus_nl.push(b'\n'); + gix_object::compute_hash(object_hash, gix_object::Kind::Blob, &data_plus_nl).is_ok_and(|oid| oid == *expected_oid) +} + +fn validate_cached_stat(expected: &gix_index::extension::untracked_cache::OidStat, path: &Path) -> bool { + let Ok(actual) = gix_index::fs::Metadata::from_path_no_follow(path) + .and_then(|meta| gix_index::entry::Stat::from_fs(&meta).map_err(std::io::Error::other)) + else { + return false; + }; + expected.stat().matches(&actual, Default::default()) +} + +fn expected_ident(worktree_root: &Path, _current_dir: &Path) -> String { + let path = normalize_ident_path(worktree_root); + format!("Location {path}, system {}", system_name()) +} + +#[cfg(not(windows))] +fn normalize_ident_path(path: &Path) -> String { + gix_path::realpath(path) + .unwrap_or_else(|_| path.to_owned()) + .display() + .to_string() +} + +#[cfg(windows)] +fn normalize_ident_path(path: &Path) -> String { + // Use canonicalize to resolve symlinks and expand 8.3 short names (via + // GetFinalPathNameByHandleW), matching how git normalizes paths in the IOUC + // identifier on Windows. + let canonical = std::fs::canonicalize(path).unwrap_or_else(|_| path.to_owned()); + // canonicalize on Windows may return a verbatim path (\\?\C:\...), strip it. + let s = canonical.to_string_lossy(); + let s = s.strip_prefix("\\\\?\\").unwrap_or(&*s); + // git uses forward slashes in the ident. + s.replace('\\', "/") +} + +#[cfg(unix)] +fn system_name() -> String { + rustix::system::uname().sysname().to_string_lossy().into_owned() +} + +#[cfg(not(unix))] +fn system_name() -> String { + // std::env::consts::OS returns "windows" (lowercase), but git writes "Windows". + let os = std::env::consts::OS; + let mut chars = os.chars(); + match chars.next() { + None => String::new(), + Some(c) => c.to_uppercase().collect::() + chars.as_str(), + } +} + +#[cfg(test)] +mod tests { + use super::*; + use gix_testtools::tempfile; + use std::fs; + + #[test] + #[cfg(unix)] + fn expected_ident_resolves_symlinks() -> Result<(), Box> { + let tmp = tempfile::tempdir()?; + let real_dir = tmp.path().join("real"); + fs::create_dir(&real_dir)?; + let symlink_dir = tmp.path().join("symlink"); + std::os::unix::fs::symlink(&real_dir, &symlink_dir)?; + + let current_dir = std::env::current_dir()?; + let ident_real = expected_ident(&real_dir, ¤t_dir); + let ident_symlink = expected_ident(&symlink_dir, ¤t_dir); + + assert_eq!( + ident_real, ident_symlink, + "identifiers must be identical for the same physical location" + ); + assert!(ident_real.contains("real"), "it must contain the resolved path"); + assert!(!ident_real.contains("symlink"), "it must not contain the symlink path"); + Ok(()) + } +} diff --git a/gix-dir/tests/dir/walk.rs b/gix-dir/tests/dir/walk.rs index 09ff80b1708..7cfa8aa04b6 100644 --- a/gix-dir/tests/dir/walk.rs +++ b/gix-dir/tests/dir/walk.rs @@ -1,5 +1,6 @@ -use std::{collections::BTreeSet, sync::atomic::AtomicBool}; +use std::{collections::BTreeSet, process::Command, sync::atomic::AtomicBool}; +use bstr::ByteSlice; use gix_dir::{ entry, entry::{Kind::*, PathspecMatch::*, Property::*, Status::*}, @@ -65,6 +66,7 @@ fn one_top_level_fifo() { read_dir_calls: 1, returned_entries: entries.len(), seen_entries: 2, + ..Default::default() } ); @@ -97,6 +99,7 @@ fn fifo_in_traversal() { read_dir_calls: 3, returned_entries: entries.len(), seen_entries: 5, + ..Default::default() } ); @@ -133,6 +136,7 @@ fn symlink_to_dir_can_be_excluded() -> crate::Result { read_dir_calls: 2, returned_entries: entries.len(), seen_entries: 9, + ..Default::default() } ); @@ -168,6 +172,7 @@ fn symlink_to_dir_can_be_excluded() -> crate::Result { read_dir_calls: 2, returned_entries: entries.len(), seen_entries: 9, + ..Default::default() } ); @@ -266,6 +271,7 @@ fn assume_unchanged_submodule_replaced_with_symlink_is_hidden() -> crate::Result read_dir_calls: 1, returned_entries: entries.len(), seen_entries: 3, + ..Default::default() } ); assert!( @@ -296,6 +302,7 @@ fn submodule_replaced_with_symlink_without_assume_unchanged_is_untracked() -> cr read_dir_calls: 1, returned_entries: entries.len(), seen_entries: 3, + ..Default::default() } ); assert_eq!( @@ -334,6 +341,7 @@ fn empty_root() -> crate::Result { read_dir_calls: 1, returned_entries: entries.len(), seen_entries: 1, + ..Default::default() } ); assert_eq!( @@ -359,6 +367,7 @@ fn empty_root() -> crate::Result { read_dir_calls: 1, returned_entries: entries.len(), seen_entries: 1, + ..Default::default() } ); assert_eq!( @@ -379,6 +388,7 @@ fn complex_empty() -> crate::Result { read_dir_calls: 9, returned_entries: entries.len(), seen_entries: 5, + ..Default::default() } ); assert_eq!( @@ -410,6 +420,7 @@ fn complex_empty() -> crate::Result { read_dir_calls: 9, returned_entries: entries.len(), seen_entries: 5, + ..Default::default() } ); assert_eq!( @@ -436,6 +447,7 @@ fn complex_empty() -> crate::Result { read_dir_calls: 9, returned_entries: entries.len(), seen_entries: 9, + ..Default::default() } ); assert_eq!( @@ -478,6 +490,7 @@ fn ignored_with_prefix_pathspec_collapses_just_like_untracked() -> crate::Result read_dir_calls: 2, returned_entries: entries.len(), seen_entries: 6, + ..Default::default() } ); assert_eq!( @@ -511,6 +524,7 @@ fn ignored_with_prefix_pathspec_collapses_just_like_untracked() -> crate::Result read_dir_calls: 4, returned_entries: entries.len(), seen_entries: 8, + ..Default::default() } ); assert_eq!( @@ -552,6 +566,7 @@ fn ignored_dir_with_cwd_handling() -> crate::Result { read_dir_calls: 2, returned_entries: entries.len(), seen_entries: 3, + ..Default::default() } ); assert_eq!( @@ -586,6 +601,7 @@ fn ignored_dir_with_cwd_handling() -> crate::Result { read_dir_calls: 2, returned_entries: entries.len(), seen_entries: 2, + ..Default::default() } ); assert_eq!( @@ -622,7 +638,8 @@ fn ignored_dir_with_cwd_handling() -> crate::Result { walk::Outcome { read_dir_calls: 8, returned_entries: entries.len(), - seen_entries: 26 + seen_entries: 26, + ..Default::default() } ); assert_eq!( @@ -667,6 +684,7 @@ fn ignored_with_cwd_handling() -> crate::Result { read_dir_calls: 1, returned_entries: entries.len(), seen_entries: 3, + ..Default::default() } ); @@ -702,6 +720,7 @@ fn ignored_with_cwd_handling() -> crate::Result { read_dir_calls: 5, returned_entries: entries.len(), seen_entries: 7, + ..Default::default() } ); @@ -746,6 +765,7 @@ fn only_untracked_with_cwd_handling() -> crate::Result { read_dir_calls: 3, returned_entries: entries.len(), seen_entries: 9, + ..Default::default() } ); assert_eq!( @@ -784,6 +804,7 @@ fn only_untracked_with_cwd_handling() -> crate::Result { read_dir_calls: 2, returned_entries: entries.len(), seen_entries: 5, + ..Default::default() } ); assert_eq!( @@ -818,6 +839,7 @@ fn only_untracked_with_cwd_handling() -> crate::Result { read_dir_calls: 3, returned_entries: entries.len(), seen_entries: 8, + ..Default::default() } ); assert_eq!( @@ -858,6 +880,7 @@ fn only_untracked_with_cwd_handling() -> crate::Result { read_dir_calls: 2, returned_entries: entries.len(), seen_entries: 4, + ..Default::default() } ); assert_eq!( @@ -900,6 +923,7 @@ fn only_untracked_with_pathspec() -> crate::Result { read_dir_calls: 2, returned_entries: entries.len(), seen_entries: 5, + ..Default::default() } ); assert_eq!( @@ -932,6 +956,7 @@ fn only_untracked_with_pathspec() -> crate::Result { read_dir_calls: 2, returned_entries: entries.len(), seen_entries: 5, + ..Default::default() } ); assert_eq!( @@ -964,6 +989,7 @@ fn only_untracked_with_prefix_deletion() -> crate::Result { read_dir_calls: 2, returned_entries: entries.len(), seen_entries: 5, + ..Default::default() } ); assert_eq!( @@ -990,6 +1016,7 @@ fn only_untracked_with_prefix_deletion() -> crate::Result { read_dir_calls: 2, returned_entries: entries.len(), seen_entries: 5, + ..Default::default() } ); assert_eq!( @@ -1010,6 +1037,7 @@ fn only_untracked() -> crate::Result { read_dir_calls: 3, returned_entries: entries.len(), seen_entries: 7, + ..Default::default() } ); assert_eq!( @@ -1032,6 +1060,7 @@ fn only_untracked() -> crate::Result { read_dir_calls: 2, returned_entries: entries.len(), seen_entries: 3, + ..Default::default() } ); assert_eq!( @@ -1060,6 +1089,7 @@ fn only_untracked() -> crate::Result { read_dir_calls: 3, returned_entries: entries.len(), seen_entries: 7 + 2, + ..Default::default() }, "There are 2 extra directories that we fold into, but ultimately discard" ); @@ -1100,6 +1130,7 @@ fn only_untracked_explicit_pathspec_selection() -> crate::Result { read_dir_calls: 2, returned_entries: entries.len(), seen_entries: 3, + ..Default::default() }, ); assert_eq!( @@ -1134,6 +1165,7 @@ fn only_untracked_explicit_pathspec_selection() -> crate::Result { read_dir_calls: 2, returned_entries: entries.len(), seen_entries: 3, + ..Default::default() }, "no collapsing happens" ); @@ -1169,6 +1201,7 @@ fn only_untracked_explicit_pathspec_selection() -> crate::Result { read_dir_calls: 2, returned_entries: entries.len(), seen_entries: 2 + 3, + ..Default::default() }, "collapsing happens just like Git" ); @@ -1190,6 +1223,7 @@ fn expendable_and_precious() { read_dir_calls: 6, returned_entries: entries.len(), seen_entries: 18, + ..Default::default() } ); assert_eq!( @@ -1235,6 +1269,7 @@ fn expendable_and_precious() { read_dir_calls: 6, returned_entries: entries.len(), seen_entries: 18 + 2, + ..Default::default() } ); @@ -1280,6 +1315,7 @@ fn expendable_and_precious() { read_dir_calls: 6, returned_entries: entries.len(), seen_entries: 16 + 2, + ..Default::default() } ); @@ -1303,6 +1339,7 @@ fn subdir_untracked() -> crate::Result { read_dir_calls: 3, returned_entries: entries.len(), seen_entries: 7, + ..Default::default() } ); assert_eq!(entries, [entry("d/d/a", Untracked, File)]); @@ -1322,6 +1359,7 @@ fn subdir_untracked() -> crate::Result { read_dir_calls: 3, returned_entries: entries.len(), seen_entries: 7, + ..Default::default() }, "pruning has no actual effect here as there is no extra directories that could be avoided" ); @@ -1344,6 +1382,7 @@ fn subdir_untracked() -> crate::Result { read_dir_calls: 3, returned_entries: entries.len(), seen_entries: 7 + 1, + ..Default::default() }, "there is a folded directory we added" ); @@ -1362,6 +1401,7 @@ fn only_untracked_from_subdir() -> crate::Result { read_dir_calls: 1, returned_entries: entries.len(), seen_entries: 1, + ..Default::default() } ); assert_eq!( @@ -1399,6 +1439,7 @@ fn untracked_and_ignored_pathspec_guidance() -> crate::Result { read_dir_calls: 1, returned_entries: entries.len(), seen_entries: 1, + ..Default::default() }, "we have to read the parent directory, just like git, as we can't assume a directory" ); @@ -1439,6 +1480,7 @@ fn untracked_and_ignored_for_deletion_negative_wildcard_spec() -> crate::Result read_dir_calls: 5, returned_entries: entries.len(), seen_entries: 23, + ..Default::default() }, ); assert_eq!( @@ -1494,6 +1536,7 @@ fn untracked_and_ignored_for_deletion_positive_wildcard_spec() -> crate::Result read_dir_calls: 8, returned_entries: entries.len(), seen_entries: 27, + ..Default::default() }, ); assert_eq!( @@ -1547,6 +1590,7 @@ fn untracked_and_ignored_for_deletion_nonmatching_wildcard_spec() -> crate::Resu read_dir_calls: 8, returned_entries: entries.len(), seen_entries: 28, + ..Default::default() }, ); assert_eq!( @@ -1693,6 +1737,7 @@ fn expendable_and_precious_in_ignored_dir_with_pathspec() -> crate::Result { read_dir_calls: 2, returned_entries: entries.len(), seen_entries: 5, + ..Default::default() }, ); @@ -1735,6 +1780,7 @@ fn expendable_and_precious_in_ignored_dir_with_pathspec() -> crate::Result { read_dir_calls: 9, returned_entries: entries.len(), seen_entries: 19, + ..Default::default() }, ); @@ -1789,6 +1835,7 @@ fn expendable_and_precious_in_ignored_dir_with_pathspec() -> crate::Result { read_dir_calls: 9, returned_entries: entries.len(), seen_entries: 19, + ..Default::default() }, ); @@ -1837,6 +1884,7 @@ fn untracked_and_ignored() -> crate::Result { read_dir_calls: 5, returned_entries: entries.len(), seen_entries: 21, + ..Default::default() }, "some untracked ones are hidden by default" ); @@ -1883,6 +1931,7 @@ fn untracked_and_ignored() -> crate::Result { read_dir_calls: 5, returned_entries: entries.len(), seen_entries: 21, + ..Default::default() }, "basically the same result…" ); @@ -1915,6 +1964,7 @@ fn untracked_and_ignored() -> crate::Result { read_dir_calls: 5, returned_entries: entries.len(), seen_entries: 21 + 1, + ..Default::default() }, "we still encounter the same amount of entries, and 1 folded directory" ); @@ -1941,6 +1991,7 @@ fn untracked_and_ignored() -> crate::Result { read_dir_calls: 5, returned_entries: entries.len(), seen_entries: 21 + 2, + ..Default::default() }, "some untracked ones are hidden by default, folded directories" ); @@ -1983,6 +2034,7 @@ fn untracked_and_ignored() -> crate::Result { read_dir_calls: 5, returned_entries: entries.len(), seen_entries: 21 + 3, + ..Default::default() }, "some untracked ones are hidden by default, and folded directories" ); @@ -2038,6 +2090,7 @@ fn untracked_and_ignored_collapse_handling_mixed() -> crate::Result { read_dir_calls: 1, returned_entries: entries.len(), seen_entries: 4, + ..Default::default() }, "it has to read 'd/d' as 'd/d/b.o' isn't a directory candidate" ); @@ -2076,6 +2129,7 @@ fn untracked_and_ignored_collapse_handling_mixed() -> crate::Result { read_dir_calls: 4, returned_entries: entries.len(), seen_entries: 21, + ..Default::default() }, ); @@ -2126,7 +2180,8 @@ fn untracked_and_ignored_collapse_handling_mixed_with_prefix() -> crate::Result walk::Outcome { read_dir_calls: 3, returned_entries: entries.len(), - seen_entries: 11 + seen_entries: 11, + ..Default::default() }, "this is not a directory, so the prefix is only 'd', not 'd/d'" ); @@ -2168,6 +2223,7 @@ fn untracked_and_ignored_collapse_handling_mixed_with_prefix() -> crate::Result read_dir_calls: 2, returned_entries: entries.len(), seen_entries: 6, + ..Default::default() }, ); @@ -2218,7 +2274,8 @@ fn untracked_and_ignored_collapse_handling_for_deletion_with_wildcards() -> crat walk::Outcome { read_dir_calls: 8, returned_entries: entries.len(), - seen_entries: 26 + seen_entries: 26, + ..Default::default() }, ); assert_eq!( @@ -2264,7 +2321,8 @@ fn untracked_and_ignored_collapse_handling_for_deletion_with_wildcards() -> crat walk::Outcome { read_dir_calls: 8, returned_entries: entries.len(), - seen_entries: 28 + seen_entries: 28, + ..Default::default() }, ); assert_eq!( @@ -2322,6 +2380,7 @@ fn untracked_and_ignored_collapse_handling_for_deletion_with_prefix_wildcards() read_dir_calls: 1, returned_entries: entries.len(), seen_entries: 2, + ..Default::default() }, ); assert_eq!( @@ -2354,6 +2413,7 @@ fn untracked_and_ignored_collapse_handling_for_deletion_mixed() -> crate::Result read_dir_calls: 5, returned_entries: entries.len(), seen_entries: 21, + ..Default::default() }, ); @@ -2383,6 +2443,7 @@ fn untracked_and_ignored_collapse_handling_for_deletion_mixed() -> crate::Result read_dir_calls: 5, returned_entries: entries.len(), seen_entries: 24, + ..Default::default() }, ); @@ -2431,6 +2492,7 @@ fn untracked_and_ignored_collapse_handling_for_deletion_mixed() -> crate::Result read_dir_calls: 2, returned_entries: entries.len(), seen_entries: 6, + ..Default::default() }, ); @@ -2473,6 +2535,7 @@ fn untracked_and_ignored_collapse_handling_for_deletion_mixed() -> crate::Result read_dir_calls: 2, returned_entries: entries.len(), seen_entries: 5, + ..Default::default() }, ); @@ -2512,6 +2575,7 @@ fn untracked_and_ignored_collapse_handling_for_deletion_mixed() -> crate::Result read_dir_calls: 2, returned_entries: entries.len(), seen_entries: 5, + ..Default::default() }, ); @@ -2549,6 +2613,7 @@ fn untracked_and_ignored_collapse_handling_for_deletion_mixed() -> crate::Result read_dir_calls: 2, returned_entries: entries.len(), seen_entries: 6, + ..Default::default() }, ); @@ -2588,6 +2653,7 @@ fn untracked_and_ignored_collapse_handling_for_deletion_mixed() -> crate::Result read_dir_calls: 2, returned_entries: entries.len(), seen_entries: 4, + ..Default::default() }, ); @@ -2648,6 +2714,7 @@ fn precious_are_not_expendable() { read_dir_calls: 3, returned_entries: entries.len(), seen_entries: 10, + ..Default::default() }, ); @@ -2690,6 +2757,7 @@ fn precious_are_not_expendable() { read_dir_calls: 3, returned_entries: entries.len(), seen_entries: 10, + ..Default::default() }, "'d' is assumed to be a file, hence it's stripped to its base '', yielding one more call." ); @@ -2729,6 +2797,7 @@ fn precious_are_not_expendable() { read_dir_calls: 2, returned_entries: entries.len(), seen_entries: 7, + ..Default::default() }, "{equivalent_pathspec}: should yield same result, they also see the 'd' prefix directory" ); @@ -2764,6 +2833,7 @@ fn precious_are_not_expendable() { read_dir_calls: 3, returned_entries: entries.len(), seen_entries: 9, + ..Default::default() }, ); @@ -2812,6 +2882,7 @@ fn decomposed_unicode_in_directory_is_returned_precomposed() -> crate::Result { read_dir_calls: 2, returned_entries: entries.len(), seen_entries: 1, + ..Default::default() } ); assert_eq!( @@ -2838,6 +2909,7 @@ fn decomposed_unicode_in_directory_is_returned_precomposed() -> crate::Result { read_dir_calls: 1, returned_entries: entries.len(), seen_entries: 1, + ..Default::default() }, "note how it starts directly in the right repository" ); @@ -2872,6 +2944,7 @@ fn worktree_root_can_be_symlink() -> crate::Result { read_dir_calls: 0, returned_entries: entries.len(), seen_entries: 1, + ..Default::default() } ); assert_eq!( @@ -2882,6 +2955,571 @@ fn worktree_root_can_be_symlink() -> crate::Result { Ok(()) } +#[test] +#[cfg_attr(windows, ignore)] // NTFS async metadata flush causes flaky mtime mismatches +fn untracked_cache_can_avoid_read_dir_calls() -> crate::Result { + let root = repo_with_untracked_cache()?; + let opts = gix_dir::walk::Options { + emit_untracked: CollapseDirectory, + ..options() + }; + let ((out, _root), entries) = collect_with_repo_globals(&root, opts, true)?; + let ((uncached_out, _root), uncached_entries) = collect_with_repo_globals(&root, opts, false)?; + + assert_eq!( + out.read_dir_calls, 0, + "a valid UNTR cache should satisfy the walk without opening directories" + ); + assert_ne!( + uncached_out.read_dir_calls, 0, + "the fallback implementation should still hit the filesystem" + ); + assert_eq!( + entries, uncached_entries, + "cached and uncached walks should produce identical output" + ); + assert_eq!( + untracked_paths(&entries), + git_untracked_paths(&root, GitUntrackedMode::Collapsed, &[])?, + "collapsed untracked entries should match git status" + ); + Ok(()) +} + +#[test] +fn invalidated_untracked_cache_falls_back_to_the_filesystem() -> crate::Result { + let root = repo_with_untracked_cache()?; + std::fs::write(root.join("later"), "later")?; + + let opts = gix_dir::walk::Options { + emit_untracked: CollapseDirectory, + ..options() + }; + let ((out, _root), entries) = collect_with_repo_globals(&root, opts, true)?; + + assert_ne!( + out.read_dir_calls, 0, + "changing the root directory contents must invalidate the cache" + ); + assert!( + entries.iter().any(|(entry, _)| entry.rela_path.as_bstr() == "later"), + "the fallback traversal should see newly added files" + ); + assert_eq!( + untracked_paths(&entries), + git_untracked_paths(&root, GitUntrackedMode::Collapsed, &[])?, + "fallback output should still match git status after invalidation" + ); + Ok(()) +} + +#[test] +#[cfg_attr(windows, ignore)] // NTFS async metadata flush causes flaky mtime mismatches +fn global_excludes_change_disables_untracked_cache() -> crate::Result { + let root = repo_with_untracked_cache()?; + let excludes_file = root.join("global-excludes"); + std::fs::write(&excludes_file, "global-ignored/\n")?; + git( + &root, + [ + std::ffi::OsStr::new("config"), + std::ffi::OsStr::new("core.excludesFile"), + excludes_file.as_os_str(), + ], + )?; + std::fs::create_dir_all(root.join("global-ignored"))?; + std::fs::write(root.join("global-ignored/file"), "ignored")?; + refresh_untracked_cache(&root)?; + + std::fs::write(&excludes_file, "")?; + let opts = gix_dir::walk::Options { + emit_untracked: CollapseDirectory, + ..options() + }; + let ((out, _root), entries) = collect_with_repo_globals_opts(&root, opts, true, Some(&excludes_file), &[])?; + + assert_ne!( + out.read_dir_calls, 0, + "changing core.excludesFile contents must disable the UNTR fast path" + ); + assert!( + entries + .iter() + .any(|(entry, _)| entry.rela_path.as_bstr() == "global-ignored"), + "the filesystem fallback should see entries that were formerly hidden by a global exclude" + ); + assert_eq!( + untracked_paths(&entries), + git_untracked_paths(&root, GitUntrackedMode::Collapsed, &[])?, + "global exclude changes should still produce git-compatible output" + ); + Ok(()) +} + +#[test] +#[cfg_attr(windows, ignore)] // NTFS async metadata flush causes flaky mtime mismatches +fn info_exclude_change_disables_untracked_cache() -> crate::Result { + let root = repo_with_untracked_cache()?; + let info_dir = root.join(".git/info"); + std::fs::create_dir_all(&info_dir)?; + let info_exclude = info_dir.join("exclude"); + std::fs::write(&info_exclude, "info-excluded/\n")?; + std::fs::create_dir_all(root.join("info-excluded"))?; + std::fs::write(root.join("info-excluded/file"), "excluded")?; + refresh_untracked_cache(&root)?; + + // Now change info/exclude so the cache's recorded stat+OID no longer matches. + std::fs::write(&info_exclude, "")?; + let opts = gix_dir::walk::Options { + emit_untracked: CollapseDirectory, + ..options() + }; + let ((out, _root), entries) = collect_with_repo_globals(&root, opts, true)?; + + assert_ne!( + out.read_dir_calls, 0, + "changing .git/info/exclude contents must disable the UNTR fast path" + ); + assert!( + entries + .iter() + .any(|(entry, _)| entry.rela_path.as_bstr() == "info-excluded"), + "the filesystem fallback should see entries that were formerly hidden by info/exclude" + ); + assert_eq!( + untracked_paths(&entries), + git_untracked_paths(&root, GitUntrackedMode::Collapsed, &[])?, + "info/exclude changes should still produce git-compatible output" + ); + Ok(()) +} + +#[test] +#[cfg_attr(windows, ignore)] // NTFS async metadata flush causes flaky mtime mismatches +fn global_excludes_file_present_and_unchanged_allows_untracked_cache() -> crate::Result { + let root = repo_with_untracked_cache()?; + let excludes_file = root.join("global-excludes"); + std::fs::write(&excludes_file, "global-ignored/\n")?; + std::fs::create_dir_all(root.join("global-ignored"))?; + std::fs::write(root.join("global-ignored/file"), "ignored")?; + git( + &root, + [ + std::ffi::OsStr::new("config"), + std::ffi::OsStr::new("core.excludesFile"), + excludes_file.as_os_str(), + ], + )?; + refresh_untracked_cache(&root)?; + + let opts = gix_dir::walk::Options { + emit_untracked: CollapseDirectory, + ..options() + }; + let ((out, _root), entries) = collect_with_repo_globals_opts(&root, opts, true, Some(&excludes_file), &[])?; + + assert_eq!( + out.read_dir_calls, 0, + "cache must serve all directories without read_dir when excludes_file is present but unchanged" + ); + assert_eq!( + out.untracked_cache_hits, + out.untracked_cache_hits, // at least one hit + "cache hits should be non-zero" + ); + assert!( + out.untracked_cache_hits > 0, + "expected cache hits but got 0 — the UNTR decode or excludes_file stat validation is broken" + ); + assert!( + !entries + .iter() + .any(|(entry, _)| entry.rela_path.as_bstr() == "global-ignored"), + "globally-ignored directory must not appear in output" + ); + assert_eq!( + untracked_paths(&entries), + git_untracked_paths(&root, GitUntrackedMode::Collapsed, &[])?, + "output with unchanged global excludes should match git status" + ); + Ok(()) +} + +#[test] +#[cfg_attr(windows, ignore)] // NTFS async metadata flush causes flaky mtime mismatches +fn nested_gitignore_change_invalidates_cached_subtree() -> crate::Result { + let root = repo_with_untracked_cache()?; + std::fs::write(root.join("tracked/.gitignore"), "")?; + git(&root, ["add", "tracked/.gitignore"])?; + git(&root, ["commit", "-m", "tracked ignore"])?; + refresh_untracked_cache(&root)?; + + std::fs::write(root.join("tracked/.gitignore"), "new/\n")?; + let opts = gix_dir::walk::Options { + emit_untracked: CollapseDirectory, + ..options() + }; + let ((out, _root), entries) = collect_with_repo_globals(&root, opts, true)?; + + assert_ne!( + out.read_dir_calls, 0, + "changing a nested .gitignore should invalidate the cached subtree" + ); + assert!( + !entries + .iter() + .any(|(entry, _)| entry.rela_path.as_bstr() == "tracked/new"), + "the fallback traversal should honor the updated ignore file" + ); + assert_eq!( + untracked_paths(&entries), + git_untracked_paths(&root, GitUntrackedMode::Collapsed, &[])?, + "nested .gitignore invalidation should still agree with git status" + ); + Ok(()) +} + +#[test] +#[cfg_attr(windows, ignore)] // NTFS async metadata flush causes flaky mtime mismatches +fn non_empty_pathspec_never_uses_untracked_cache() -> crate::Result { + let root = repo_with_untracked_cache()?; + let opts = gix_dir::walk::Options { + emit_untracked: CollapseDirectory, + ..options() + }; + let ((out, _root), entries) = collect_with_repo_globals_opts(&root, opts, true, None, &["tracked/"])?; + let ((uncached_out, _root), uncached_entries) = + collect_with_repo_globals_opts(&root, opts, false, None, &["tracked/"])?; + + assert_ne!( + out.read_dir_calls, 0, + "a non-empty pathspec should disable the UNTR fast path" + ); + assert_ne!( + uncached_out.read_dir_calls, 0, + "the uncached comparison should still traverse the filesystem" + ); + assert_eq!( + entries, uncached_entries, + "pathspec filtering should match the uncached traversal" + ); + assert_eq!( + untracked_paths(&entries), + git_untracked_paths(&root, GitUntrackedMode::Collapsed, &["tracked/"])?, + "pathspec-filtered collapsed output should match git status" + ); + Ok(()) +} + +#[test] +// On Windows, NTFS flushes directory metadata asynchronously. A directory that was +// recently modified (like `tracked` here, after `tracked/new` was created) may report +// a different `LastWriteTime` via different APIs or at different instants. This causes +// the IOUC stat check for `tracked` to fail even after two `git status` runs, making +// `read_dir_calls` flaky. Skip rather than accept a racy assertion. +#[cfg_attr(windows, ignore)] +fn matching_mode_with_tracked_intermediate_dirs_matches_uncached() -> crate::Result { + let root = repo_with_untracked_cache()?; + let opts = gix_dir::walk::Options { + emit_untracked: Matching, + ..options() + }; + let ((out, _root), entries) = collect_with_repo_globals(&root, opts, true)?; + let ((uncached_out, _root), uncached_entries) = collect_with_repo_globals(&root, opts, false)?; + + assert_eq!( + out.read_dir_calls, 0, + "matching mode should still use a valid UNTR cache" + ); + assert_ne!( + uncached_out.read_dir_calls, 0, + "the comparison path should still hit the filesystem" + ); + assert_eq!( + entries, uncached_entries, + "matching mode output should match the uncached traversal" + ); + assert_eq!( + untracked_paths(&entries), + git_untracked_paths(&root, GitUntrackedMode::Matching, &[])?, + "matching-mode untracked entries should match git status -uall" + ); + Ok(()) +} + +#[test] +#[cfg_attr(windows, ignore)] // NTFS async metadata flush causes flaky mtime mismatches +fn emit_tracked_true_bypasses_untracked_cache() -> crate::Result { + let root = repo_with_untracked_cache()?; + let opts = gix_dir::walk::Options { + emit_untracked: CollapseDirectory, + emit_tracked: true, + ..options() + }; + let ((out, _root), entries) = collect_with_repo_globals(&root, opts, true)?; + + assert_ne!( + out.read_dir_calls, 0, + "emit_tracked=true must disable the UNTR fast path since the cache only records untracked entries" + ); + assert!( + entries + .iter() + .any(|(entry, _)| entry.rela_path.as_bstr() == "tracked/keep"), + "tracked file must appear in output when emit_tracked=true" + ); + Ok(()) +} + +#[test] +#[cfg_attr(windows, ignore)] // NTFS async metadata flush causes flaky mtime mismatches +fn cached_subdir_becoming_repository_is_emitted() -> crate::Result { + let root = repo_with_untracked_cache()?; + // Turn `new/` (a regular untracked dir in the cache) into a nested repo. + // This changes `new/`'s mtime but not root's, so root's cache entry stays valid. + git(&root, ["init", "new"])?; + let opts = gix_dir::walk::Options { + emit_untracked: CollapseDirectory, + ..options() + }; + let ((out, _root), entries) = collect_with_repo_globals(&root, opts, true)?; + + assert!( + entries.iter().any(|(entry, _)| { + entry.rela_path.as_bstr() == "new" && entry.disk_kind == Some(gix_dir::entry::Kind::Repository) + }), + "a cached subdir that became a repository must still appear in output, but entries were: {:?}", + entries + .iter() + .map(|(e, _)| e.rela_path.as_bstr().to_owned()) + .collect::>() + ); + assert_eq!( + untracked_paths(&entries), + git_untracked_paths(&root, GitUntrackedMode::Collapsed, &[])?, + "output must match git status after subdir becomes a repository" + ); + let _ = out; + Ok(()) +} + +fn repo_with_untracked_cache() -> crate::Result { + let tmp = gix_testtools::tempfile::tempdir()?; + let base = tmp.path().to_path_buf(); + std::mem::forget(tmp); + let root = base.join("repo"); + std::fs::create_dir(&root)?; + git(&root, ["init"])?; + git(&root, ["config", "status.showUntrackedFiles", "all"])?; + git(&root, ["config", "user.name", "a"])?; + git(&root, ["config", "user.email", "a@example.com"])?; + std::fs::create_dir(root.join("tracked"))?; + std::fs::write(root.join("tracked/keep"), "keep")?; + git(&root, ["add", "tracked/keep"])?; + git(&root, ["commit", "-m", "init"])?; + std::fs::create_dir_all(root.join("tracked/new"))?; + std::fs::create_dir_all(root.join("new"))?; + std::fs::write(root.join("tracked/new/file"), "tracked-new")?; + std::fs::write(root.join("new/file"), "new")?; + refresh_untracked_cache(&root)?; + Ok(root) +} + +fn git(cwd: &std::path::Path, args: impl IntoIterator>) -> crate::Result { + let status = Command::new("git").args(args).current_dir(cwd).status()?; + assert!(status.success()); + Ok(()) +} + +fn git_output( + cwd: &std::path::Path, + args: impl IntoIterator>, +) -> crate::Result { + Ok(Command::new("git").args(args).current_dir(cwd).output()?) +} + +fn effective_excludes_file(root: &std::path::Path) -> crate::Result> { + let output = git_output( + root, + [ + std::ffi::OsStr::new("config"), + std::ffi::OsStr::new("--path"), + std::ffi::OsStr::new("core.excludesFile"), + ], + )?; + if output.status.success() { + let path = gix_path::try_from_bstr(output.stdout.as_bstr().trim().as_bstr()) + .ok() + .map(std::borrow::Cow::into_owned); + return Ok(path); + } + // No core.excludesFile configured — fall back to the XDG default, matching gix's + // `assemble_exclude_globals` which calls `xdg_config_path("ignore")`. + let xdg_ignore = std::env::var_os("XDG_CONFIG_HOME") + .map(std::path::PathBuf::from) + .or_else(|| std::env::var_os("HOME").map(|h| std::path::PathBuf::from(h).join(".config"))) + .map(|base| base.join("git").join("ignore")); + Ok(xdg_ignore.filter(|p| p.exists())) +} + +fn refresh_untracked_cache(root: &std::path::Path) -> crate::Result { + git(root, ["update-index", "--force-untracked-cache"])?; + git(root, ["status", "--porcelain"])?; + // Run a second time so git validates the recorded directory stats and sets the valid + // bitmap. Some git versions only populate the structure on the first run and mark + // entries valid on the second. The double-run also lets the filesystem settle so the + // recorded stats match what gix will read. + git(root, ["status", "--porcelain"])?; + assert!( + index_has_untracked_cache(root), + "test repository must have a UNTR extension" + ); + Ok(()) +} + +fn index_has_untracked_cache(root: &std::path::Path) -> bool { + std::fs::read(root.join(".git/index")) + .ok() + .and_then(|bytes| { + gix_index::State::from_bytes( + &bytes, + std::time::UNIX_EPOCH.into(), + gix_index::hash::Kind::Sha1, + Default::default(), + ) + .ok() + .map(|(state, _)| state.untracked().is_some()) + }) + .unwrap_or(false) +} + +type CollectedEntries = Vec<(gix_dir::Entry, Option)>; +type CollectOutcome = ((gix_dir::walk::Outcome, std::path::PathBuf), CollectedEntries); + +#[derive(Clone, Copy)] +enum GitUntrackedMode { + Collapsed, + Matching, +} + +fn git_untracked_paths( + root: &std::path::Path, + mode: GitUntrackedMode, + pathspecs: &[&str], +) -> crate::Result> { + let mut cmd = Command::new("git"); + cmd.current_dir(root).arg("status").arg("--porcelain").arg(match mode { + GitUntrackedMode::Collapsed => "--untracked-files=normal", + GitUntrackedMode::Matching => "--untracked-files=all", + }); + if !pathspecs.is_empty() { + cmd.arg("--"); + cmd.args(pathspecs); + } + let output = cmd.output()?; + assert!(output.status.success()); + Ok(String::from_utf8_lossy(&output.stdout) + .lines() + .filter_map(|line| line.strip_prefix("?? ")) + .map(|path| path.trim_end_matches('/').to_owned()) + .collect()) +} + +fn untracked_paths(entries: &[(gix_dir::Entry, Option)]) -> BTreeSet { + entries + .iter() + .filter(|(entry, _)| entry.status == Untracked) + .map(|(entry, _)| entry.rela_path.to_string()) + .collect() +} + +fn collect_with_repo_globals( + root: &std::path::Path, + opts: gix_dir::walk::Options<'static>, + use_cache: bool, +) -> crate::Result { + collect_with_repo_globals_opts(root, opts, use_cache, None, &[]) +} + +fn collect_with_repo_globals_opts( + root: &std::path::Path, + opts: gix_dir::walk::Options<'static>, + use_cache: bool, + excludes_file: Option<&std::path::Path>, + pathspecs: &[&str], +) -> crate::Result { + let git_dir = root.join(".git"); + let bytes = std::fs::read(git_dir.join("index"))?; + let (mut index, _) = gix_index::State::from_bytes( + &bytes, + std::time::UNIX_EPOCH.into(), + gix_index::hash::Kind::Sha1, + Default::default(), + ) + .expect("valid index"); + for entry in index + .entries_mut() + .iter_mut() + .filter(|entry| !entry.flags.contains(gix_index::entry::Flags::SKIP_WORKTREE)) + { + entry.flags |= gix_index::entry::Flags::UPTODATE; + } + + let parse = gix_ignore::search::Ignore { support_precious: true }; + let mut buf = Vec::new(); + let excludes_file = match excludes_file { + Some(path) => Some(path.to_owned()), + None => effective_excludes_file(root)?, + }; + let globals = gix_ignore::Search::from_git_dir(&git_dir, excludes_file, &mut buf, parse)?; + let mut stack = gix_worktree::Stack::from_state_and_ignore_case( + root, + false, + gix_worktree::stack::State::IgnoreStack(gix_worktree::stack::state::Ignore::new( + Default::default(), + globals, + None, + gix_worktree::stack::state::ignore::Source::WorktreeThenIdMappingIfNotSkipped, + parse, + )), + &index, + index.path_backing(), + ); + let pathspecs = pathspecs + .iter() + .map(|pattern| { + gix_pathspec::Pattern::from_bytes(pattern.as_bytes(), Default::default()).expect("valid pathspec") + }) + .collect::>(); + let mut search = + gix_pathspec::Search::from_specs(pathspecs, None::<&std::path::Path>, root).expect("empty pathspec is valid"); + let git_dir_realpath = gix_path::realpath_opts(&git_dir, root, gix_path::realpath::MAX_SYMLINKS)?; + let lookup = index.prepare_icase_backing(); + let mut collect = gix_dir::walk::delegate::Collect::default(); + let opts = gix_dir::walk::Options { + use_untracked_cache: use_cache, + ..opts + }; + let out = walk( + root, + gix_dir::walk::Context { + should_interrupt: None, + git_dir_realpath: &git_dir_realpath, + current_dir: root, + index: &index, + ignore_case_index_lookup: Some(&lookup), + pathspec: &mut search, + pathspec_attributes: &mut |_, _, _, _| false, + excludes: Some(&mut stack), + objects: &gix_object::find::Never, + explicit_traversal_root: None, + }, + opts, + &mut collect, + )?; + Ok((out, collect.into_entries_by_path())) +} + #[test] fn root_may_not_go_through_dot_git() -> crate::Result { let root = fixture("with-nested-dot-git"); @@ -2896,6 +3534,7 @@ fn root_may_not_go_through_dot_git() -> crate::Result { read_dir_calls: 0, returned_entries: entries.len(), seen_entries: 1, + ..Default::default() } ); assert_eq!( @@ -2941,6 +3580,7 @@ fn root_at_submodule_repository_allows_walk() -> crate::Result { read_dir_calls: 2, returned_entries: entries.len(), seen_entries: 3, + ..Default::default() } ); @@ -2982,6 +3622,7 @@ fn root_in_submodule_repository_allows_walk() -> crate::Result { read_dir_calls: 1, returned_entries: entries.len(), seen_entries: 1, + ..Default::default() } ); @@ -3023,6 +3664,7 @@ fn root_in_submodule_from_superproject_repository_allows_walk() -> crate::Result read_dir_calls: 1, returned_entries: entries.len(), seen_entries: 1, + ..Default::default() } ); @@ -3064,6 +3706,7 @@ fn root_enters_directory_with_dot_git_in_reconfigured_worktree_tracked() -> crat read_dir_calls: 0, returned_entries: entries.len(), seen_entries: 1, + ..Default::default() } ); @@ -3098,6 +3741,7 @@ fn root_enters_directory_with_dot_git_in_reconfigured_worktree_tracked() -> crat read_dir_calls: 0, returned_entries: 0, seen_entries: 1, + ..Default::default() } ); @@ -3168,6 +3812,7 @@ fn root_may_not_go_through_nested_repository_unless_enabled() -> crate::Result { read_dir_calls: 0, returned_entries: entries.len(), seen_entries: 1, + ..Default::default() } ); assert_eq!( @@ -3196,6 +3841,7 @@ fn root_may_not_go_through_submodule() -> crate::Result { read_dir_calls: 0, returned_entries: entries.len(), seen_entries: 1, + ..Default::default() }, ); assert_eq!( @@ -3217,6 +3863,7 @@ fn walk_with_submodule() -> crate::Result { read_dir_calls: 2, returned_entries: entries.len(), seen_entries: 4, + ..Default::default() } ); assert_eq!( @@ -3250,6 +3897,7 @@ fn root_that_is_tracked_file_is_returned() -> crate::Result { read_dir_calls: 0, returned_entries: entries.len(), seen_entries: 1, + ..Default::default() } ); @@ -3279,6 +3927,7 @@ fn root_that_is_untracked_file_is_returned() -> crate::Result { read_dir_calls: 0, returned_entries: entries.len(), seen_entries: 1, + ..Default::default() } ); @@ -3315,6 +3964,7 @@ fn root_can_be_pruned_early_with_pathspec() -> crate::Result { read_dir_calls: 0, returned_entries: entries.len(), seen_entries: 1, + ..Default::default() } ); @@ -3336,6 +3986,7 @@ fn submodules() -> crate::Result { read_dir_calls: 2, returned_entries: entries.len(), seen_entries: 5, + ..Default::default() } ); let expected_content = [ @@ -3473,6 +4124,7 @@ fn file_root_is_shown_if_pathspec_matches_exactly() -> crate::Result { read_dir_calls: 0, returned_entries: entries.len(), seen_entries: 1, + ..Default::default() }, ); @@ -3504,6 +4156,7 @@ fn root_that_is_tracked_and_ignored_is_considered_tracked() -> crate::Result { read_dir_calls: 0, returned_entries: entries.len(), seen_entries: 1, + ..Default::default() } ); @@ -3538,6 +4191,7 @@ fn root_with_dir_that_is_tracked_and_ignored() -> crate::Result { read_dir_calls: 2, returned_entries: entries.len(), seen_entries: 3, + ..Default::default() } ); @@ -3579,6 +4233,7 @@ fn empty_and_nested_untracked() -> crate::Result { read_dir_calls: 3, returned_entries: entries.len(), seen_entries: 2, + ..Default::default() } ); @@ -3609,6 +4264,7 @@ fn empty_and_nested_untracked() -> crate::Result { read_dir_calls: 3, returned_entries: entries.len(), seen_entries: 3, + ..Default::default() } ); @@ -3655,6 +4311,7 @@ fn root_that_is_ignored_is_listed_for_files_and_directories() -> crate::Result { read_dir_calls: 0, returned_entries: entries.len(), seen_entries: 1, + ..Default::default() } ); @@ -3817,6 +4474,7 @@ fn nested_repos_in_ignored_directories() -> crate::Result { read_dir_calls: 2, returned_entries: entries.len(), seen_entries: 4, + ..Default::default() } ); @@ -3848,6 +4506,7 @@ fn nested_repos_in_ignored_directories() -> crate::Result { read_dir_calls: 4, returned_entries: entries.len(), seen_entries: 6, + ..Default::default() } ); @@ -3881,6 +4540,7 @@ fn nested_repos_in_ignored_directories() -> crate::Result { read_dir_calls: 4, returned_entries: entries.len(), seen_entries: 7, + ..Default::default() } ); @@ -3936,6 +4596,7 @@ fn decomposed_unicode_in_root_is_returned_precomposed() -> crate::Result { read_dir_calls: 0, returned_entries: entries.len(), seen_entries: 1, + ..Default::default() } ); assert_eq!( @@ -3993,6 +4654,7 @@ fn untracked_and_ignored_collapse_mix() { read_dir_calls: 4, returned_entries: entries.len(), seen_entries: 7, + ..Default::default() } ); assert_eq!( @@ -4027,6 +4689,7 @@ fn untracked_and_ignored_collapse_mix() { read_dir_calls: 4, returned_entries: entries.len(), seen_entries: 8, + ..Default::default() } ); assert_eq!( @@ -4061,6 +4724,7 @@ fn untracked_and_ignored_collapse_mix() { read_dir_calls: 4, returned_entries: entries.len(), seen_entries: 8, + ..Default::default() } ); assert_eq!( @@ -4101,6 +4765,7 @@ fn root_cannot_pass_through_case_altered_capital_dot_git_if_case_insensitive() - read_dir_calls: 0, returned_entries: entries.len(), seen_entries: 1, + ..Default::default() } ); assert_eq!( @@ -4163,6 +4828,7 @@ fn partial_checkout_cone_and_non_one() -> crate::Result { read_dir_calls: 0, returned_entries: entries.len(), seen_entries: 1, + ..Default::default() } ); assert_eq!( @@ -4208,6 +4874,7 @@ fn type_mismatch() { read_dir_calls: 2, returned_entries: entries.len(), seen_entries: 3, + ..Default::default() } ); @@ -4250,6 +4917,7 @@ fn type_mismatch() { read_dir_calls: 2, returned_entries: entries.len(), seen_entries: 3 + 1, + ..Default::default() } ); @@ -4295,6 +4963,7 @@ fn type_mismatch_ignore_case() { read_dir_calls: 2, returned_entries: entries.len(), seen_entries: 3, + ..Default::default() } ); assert_eq!( @@ -4335,6 +5004,7 @@ fn type_mismatch_ignore_case() { read_dir_calls: 2, returned_entries: entries.len(), seen_entries: 3 + 1, + ..Default::default() } ); assert_eq!( @@ -4379,6 +5049,7 @@ fn type_mismatch_ignore_case_clash_dir_is_file() { read_dir_calls: 1, returned_entries: entries.len(), seen_entries: 2, + ..Default::default() } ); assert_eq!( @@ -4420,6 +5091,7 @@ fn type_mismatch_ignore_case_clash_file_is_dir() { read_dir_calls: 2, returned_entries: entries.len(), seen_entries: 2, + ..Default::default() } ); assert_eq!( @@ -4441,6 +5113,7 @@ fn top_level_slash_with_negations() -> crate::Result { read_dir_calls: 2, returned_entries: entries.len(), seen_entries: 5, + ..Default::default() } ); assert_eq!( @@ -4473,6 +5146,7 @@ fn top_level_slash_with_negations() -> crate::Result { read_dir_calls: 2, returned_entries: entries.len(), seen_entries: 5, + ..Default::default() } ); assert_eq!( @@ -4498,6 +5172,7 @@ fn subdir_slash_with_negations() -> crate::Result { read_dir_calls: 3, returned_entries: entries.len(), seen_entries: 5, + ..Default::default() } ); assert_eq!( @@ -4530,6 +5205,7 @@ fn subdir_slash_with_negations() -> crate::Result { read_dir_calls: 3, returned_entries: entries.len(), seen_entries: 5, + ..Default::default() } ); assert_eq!( @@ -4554,6 +5230,7 @@ fn one_ignored_submodule() -> crate::Result { read_dir_calls: 1, returned_entries: entries.len(), seen_entries: 5, + ..Default::default() } ); assert_eq!( @@ -4577,7 +5254,8 @@ fn one_ignored_submodule() -> crate::Result { walk::Outcome { read_dir_calls: 0, returned_entries: entries.len(), - seen_entries: 1 + seen_entries: 1, + ..Default::default() } ); assert_eq!( @@ -4598,6 +5276,7 @@ fn ignored_sub_repo() -> crate::Result { read_dir_calls: 1, returned_entries: entries.len(), seen_entries: 3, + ..Default::default() } ); assert_eq!( @@ -4632,6 +5311,7 @@ fn ignored_sub_repo() -> crate::Result { read_dir_calls: 1, returned_entries: entries.len(), seen_entries: 3, + ..Default::default() } ); assert_eq!( @@ -4657,6 +5337,7 @@ fn in_repo_worktree() -> crate::Result { read_dir_calls: 2, returned_entries: entries.len(), seen_entries: 4, + ..Default::default() } ); assert_eq!( @@ -4687,6 +5368,7 @@ fn in_repo_worktree() -> crate::Result { read_dir_calls: 2, returned_entries: entries.len(), seen_entries: 4, + ..Default::default() } ); assert_eq!( @@ -4713,6 +5395,7 @@ fn in_repo_hidden_worktree() -> crate::Result { read_dir_calls: 2, returned_entries: entries.len(), seen_entries: 4, + ..Default::default() } ); assert_eq!( @@ -4744,6 +5427,7 @@ fn in_repo_hidden_worktree() -> crate::Result { read_dir_calls: 2, returned_entries: entries.len(), seen_entries: 4, + ..Default::default() } ); assert_eq!( @@ -4782,6 +5466,7 @@ fn in_repo_hidden_worktree() -> crate::Result { read_dir_calls: 4, returned_entries: entries.len(), seen_entries: 5, + ..Default::default() } ); assert_eq!( diff --git a/gix-dir/tests/dir_cwd.rs b/gix-dir/tests/dir_cwd.rs index 6b01b74501a..2d2d9ae84c7 100644 --- a/gix-dir/tests/dir_cwd.rs +++ b/gix-dir/tests/dir_cwd.rs @@ -24,6 +24,7 @@ fn prefixes_work_as_expected() -> gix_testtools::Result { read_dir_calls: 2, returned_entries: entries.len(), seen_entries: 3, + ..Default::default() } ); assert_eq!( diff --git a/gix-dir/tests/walk_utils/mod.rs b/gix-dir/tests/walk_utils/mod.rs index 878819c433e..9f1f8c66071 100644 --- a/gix-dir/tests/walk_utils/mod.rs +++ b/gix-dir/tests/walk_utils/mod.rs @@ -36,6 +36,7 @@ pub fn options_emit_all() -> walk::Options<'static> { emit_empty_directories: true, emit_collapsed: None, symlinks_to_directories_are_ignored_like_directories: false, + use_untracked_cache: true, worktree_relative_worktree_dirs: None, } } diff --git a/gix-index/src/decode/mod.rs b/gix-index/src/decode/mod.rs index 4872c7e23fc..8fe39eefe11 100644 --- a/gix-index/src/decode/mod.rs +++ b/gix-index/src/decode/mod.rs @@ -304,11 +304,11 @@ pub(crate) fn stat(data: &[u8]) -> Option<(entry::Stat, &[u8])> { let (size, data) = read_u32(data)?; Some(( entry::Stat { - mtime: entry::stat::Time { + ctime: entry::stat::Time { secs: ctime_secs, nsecs: ctime_nsecs, }, - ctime: entry::stat::Time { + mtime: entry::stat::Time { secs: mtime_secs, nsecs: mtime_nsecs, }, diff --git a/gix-index/src/extension/mod.rs b/gix-index/src/extension/mod.rs index 33b91c5517b..1c807840bf4 100644 --- a/gix-index/src/extension/mod.rs +++ b/gix-index/src/extension/mod.rs @@ -60,6 +60,38 @@ pub struct UntrackedCache { directories: Vec, } +impl UntrackedCache { + /// Return an identifier tying this cache to the worktree location and host system. + pub fn identifier(&self) -> &bstr::BStr { + self.identifier.as_ref() + } + + /// Return stat information for `$GIT_DIR/info/exclude`, along with its object id if available. + pub fn info_exclude(&self) -> Option<&untracked_cache::OidStat> { + self.info_exclude.as_ref() + } + + /// Return stat information for `core.excludesFile`, along with its object id if available. + pub fn excludes_file(&self) -> Option<&untracked_cache::OidStat> { + self.excludes_file.as_ref() + } + + /// Return the filename used for per-directory ignore files, typically `.gitignore`. + pub fn exclude_filename_per_dir(&self) -> &bstr::BStr { + self.exclude_filename_per_dir.as_ref() + } + + /// Return flags that describe how the cache contents were recorded. + pub fn dir_flags(&self) -> u32 { + self.dir_flags + } + + /// Return all cached directories, with index `0` representing the repository root. + pub fn directories(&self) -> &[untracked_cache::Directory] { + &self.directories + } +} + /// The extension for keeping state on recent information provided by the filesystem monitor. #[allow(dead_code)] #[derive(Clone)] diff --git a/gix-index/src/extension/untracked_cache.rs b/gix-index/src/extension/untracked_cache.rs index 6da198c4479..a18ae64a3b1 100644 --- a/gix-index/src/extension/untracked_cache.rs +++ b/gix-index/src/extension/untracked_cache.rs @@ -16,6 +16,18 @@ pub struct OidStat { pub id: ObjectId, } +impl OidStat { + /// Return filesystem stat information for the tracked file. + pub fn stat(&self) -> &entry::Stat { + &self.stat + } + + /// Return the object id associated with the tracked file contents. + pub fn id(&self) -> &ObjectId { + &self.id + } +} + /// A directory with information about its untracked files, and its sub-directories #[derive(Clone)] pub struct Directory { @@ -34,6 +46,38 @@ pub struct Directory { pub check_only: bool, } +impl Directory { + /// Return the directory name, or an empty string for the root directory. + pub fn name(&self) -> &bstr::BStr { + self.name.as_ref() + } + + /// Return all cached untracked entries contained directly in this directory. + pub fn untracked_entries(&self) -> &[BString] { + &self.untracked_entries + } + + /// Return indices pointing at cached child directories. + pub fn sub_directories(&self) -> &[usize] { + &self.sub_directories + } + + /// Return the cached stat information for this directory, if available. + pub fn stat(&self) -> Option<&entry::Stat> { + self.stat.as_ref() + } + + /// Return the cached object id of this directory's ignore file, if available. + pub fn exclude_file_oid(&self) -> Option<&ObjectId> { + self.exclude_file_oid.as_ref() + } + + /// Return whether this directory was cached in `check_only` mode. + pub fn check_only(&self) -> bool { + self.check_only + } +} + /// Only used as an indicator pub const SIGNATURE: Signature = *b"UNTR"; @@ -46,10 +90,29 @@ pub fn decode(data: &[u8], object_hash: gix_hash::Kind) -> Option(data: &'a [u8], directories: &mut Vec) data.into() } - -fn decode_oid_stat(data: &[u8], hash_len: usize) -> Option<(OidStat, &[u8])> { - let (stat, data) = crate::decode::stat(data)?; - let (hash, data) = data.split_at_checked(hash_len)?; - Some(( - OidStat { - stat, - id: ObjectId::from_bytes_or_panic(hash), - }, - data, - )) -} diff --git a/gix-status/tests/status/index_as_worktree_with_renames.rs b/gix-status/tests/status/index_as_worktree_with_renames.rs index cc8e5468315..e8b2f0699e7 100644 --- a/gix-status/tests/status/index_as_worktree_with_renames.rs +++ b/gix-status/tests/status/index_as_worktree_with_renames.rs @@ -198,6 +198,7 @@ fn changed_and_untracked() { read_dir_calls: 3, returned_entries: 2, seen_entries: 8, + ..Default::default() } ); assert_eq!(out.rewrites, None, "rewrites are still not configured"); @@ -245,6 +246,7 @@ fn unreadable_untracked() { read_dir_calls: 1, returned_entries: 1, seen_entries: 3, + ..Default::default() } ); } diff --git a/gix-worktree/src/stack/state/ignore.rs b/gix-worktree/src/stack/state/ignore.rs index 23a0b3a315c..0518045d5cc 100644 --- a/gix-worktree/src/stack/state/ignore.rs +++ b/gix-worktree/src/stack/state/ignore.rs @@ -85,6 +85,27 @@ impl Ignore { self.matched_directory_patterns_stack.pop().expect("something to pop"); self.stack.patterns.pop().expect("something to pop"); } + + /// Return the override patterns that are consulted last and typically originate from explicit user input. + pub fn overrides(&self) -> &IgnoreMatchGroup { + &self.overrides + } + + /// Return the global ignore patterns, usually loaded from `core.excludesFile` and `$GIT_DIR/info/exclude`. + pub fn globals(&self) -> &IgnoreMatchGroup { + &self.globals + } + + /// Return the per-directory ignore filename, typically `.gitignore`. + pub fn exclude_file_name_for_directories(&self) -> &BStr { + self.exclude_file_name_for_directories.as_ref() + } + + /// Return where per-directory ignore files are loaded from. + pub fn source(&self) -> Source { + self.source + } + /// The match groups from lowest priority to highest. pub(crate) fn match_groups(&self) -> [&IgnoreMatchGroup; 3] { [&self.globals, &self.stack, &self.overrides] diff --git a/gix/Cargo.toml b/gix/Cargo.toml index 77f63f1d383..e07a8c5cd69 100644 --- a/gix/Cargo.toml +++ b/gix/Cargo.toml @@ -143,7 +143,7 @@ interrupt = ["dep:signal-hook", "gix-tempfile/signals", "dep:parking_lot"] index = ["dep:gix-index"] ## Support directory walks with Git-style annoations. -dirwalk = ["dep:gix-dir", "attributes", "excludes"] +dirwalk = ["dep:gix-dir", "gix-dir?/attributes", "attributes", "excludes"] ## Access to credential helpers, which provide credentials for URLs. # Note that `gix-negotiate` just piggibacks here, as 'credentials' is equivalent to 'fetch & push' right now. diff --git a/gix/src/config/mod.rs b/gix/src/config/mod.rs index 0c48bd35268..bd92b3cf112 100644 --- a/gix/src/config/mod.rs +++ b/gix/src/config/mod.rs @@ -472,6 +472,12 @@ pub mod boolean { pub type Error = super::key::Error; } +/// +pub mod untracked_cache { + /// The error produced when failing to parse `core.untrackedCache` from configuration. + pub type Error = super::key::GenericErrorWithValue; +} + /// pub mod unsigned_integer { /// The error produced when failing to parse a signed integer from configuration. diff --git a/gix/src/config/tree/sections/core.rs b/gix/src/config/tree/sections/core.rs index 2e0ece2af24..9d7ffa86035 100644 --- a/gix/src/config/tree/sections/core.rs +++ b/gix/src/config/tree/sections/core.rs @@ -55,6 +55,9 @@ impl Core { pub const SYMLINKS: keys::Boolean = keys::Boolean::new_boolean("symlinks", &config::Tree::CORE); /// The `core.trustCTime` key. pub const TRUST_C_TIME: keys::Boolean = keys::Boolean::new_boolean("trustCTime", &config::Tree::CORE); + /// The `core.untrackedCache` key. + pub const UNTRACKED_CACHE: UntrackedCache = + UntrackedCache::new_with_validate("untrackedCache", &config::Tree::CORE, validate::UntrackedCache); /// The `core.worktree` key. pub const WORKTREE: keys::Any = keys::Any::new("worktree", &config::Tree::CORE) .with_environment_override("GIT_WORK_TREE") @@ -119,6 +122,7 @@ impl Section for Core { &Self::REPOSITORY_FORMAT_VERSION, &Self::SYMLINKS, &Self::TRUST_C_TIME, + &Self::UNTRACKED_CACHE, &Self::WORKTREE, &Self::PROTECT_HFS, &Self::PROTECT_NTFS, @@ -152,6 +156,9 @@ pub type LogAllRefUpdates = keys::Any; /// The `core.disambiguate` key. pub type Disambiguate = keys::Any; +/// The `core.untrackedCache` key. +pub type UntrackedCache = keys::Any; + #[cfg(feature = "attributes")] mod filter { use super::validate; @@ -357,6 +364,33 @@ mod log_all_ref_updates { } } +mod untracked_cache { + use crate::{config, config::tree::core::UntrackedCache}; + + impl UntrackedCache { + /// Returns `Some(true)` to use the untracked cache, `Some(false)` to disable it, + /// or `None` when the value is `keep` (preserve existing state) or absent. + /// + /// `value` is expected to be provided by [`gix_config::File::boolean()`]. + pub fn try_into_untracked_cache( + &'static self, + value: Option>, + ) -> Result, config::key::GenericErrorWithValue> { + match value { + Some(Ok(b)) => Ok(Some(b)), + Some(Err(err)) => { + if err.input.eq_ignore_ascii_case(b"keep") { + Ok(None) + } else { + Err(config::key::GenericErrorWithValue::from_value(self, err.input)) + } + } + None => Ok(None), + } + } + } +} + mod check_stat { use std::borrow::Cow; @@ -472,6 +506,16 @@ mod validate { } } + #[derive(Clone, Copy)] + pub struct UntrackedCache; + impl keys::Validate for UntrackedCache { + fn validate(&self, value: &BStr) -> Result<(), Box> { + super::Core::UNTRACKED_CACHE + .try_into_untracked_cache(Some(gix_config::Boolean::try_from(value).map(|b| b.0)))?; + Ok(()) + } + } + #[derive(Clone, Copy)] pub struct Abbrev; impl keys::Validate for Abbrev { diff --git a/gix/src/dirwalk/mod.rs b/gix/src/dirwalk/mod.rs index 4bec889a3e3..0e99b88efaa 100644 --- a/gix/src/dirwalk/mod.rs +++ b/gix/src/dirwalk/mod.rs @@ -71,6 +71,16 @@ pub struct Outcome<'repo> { pub dirwalk: gix_dir::walk::Outcome, } +/// Control whether the untracked cache should be consulted during directory walks. +#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)] +pub enum UntrackedCache { + /// Consult the untracked cache when it is present and otherwise applicable. + #[default] + Use, + /// Ignore the untracked cache even if it is present. + Ignore, +} + /// Options for use in the [`Repository::dirwalk()`](crate::Repository::dirwalk()) function. /// /// Note that all values start out disabled. @@ -89,5 +99,6 @@ pub struct Options { classify_untracked_bare_repositories: bool, emit_collapsed: Option, symlinks_to_directories_are_ignored_like_directories: bool, + untracked_cache: UntrackedCache, pub(crate) empty_patterns_match_prefix: bool, } diff --git a/gix/src/dirwalk/options.rs b/gix/src/dirwalk/options.rs index 8c292821719..9bd5e934784 100644 --- a/gix/src/dirwalk/options.rs +++ b/gix/src/dirwalk/options.rs @@ -1,6 +1,6 @@ use gix_dir::walk::{CollapsedEntriesEmissionMode, EmissionMode, ForDeletionMode}; -use crate::dirwalk::Options; +use crate::dirwalk::{Options, UntrackedCache}; /// Construction impl Options { @@ -19,6 +19,7 @@ impl Options { emit_collapsed: None, empty_patterns_match_prefix: false, symlinks_to_directories_are_ignored_like_directories: false, + untracked_cache: UntrackedCache::Ignore, } } } @@ -39,6 +40,7 @@ impl From for gix_dir::walk::Options<'static> { emit_collapsed: v.emit_collapsed, symlinks_to_directories_are_ignored_like_directories: v .symlinks_to_directories_are_ignored_like_directories, + use_untracked_cache: v.untracked_cache == UntrackedCache::Use, worktree_relative_worktree_dirs: None, } } @@ -185,4 +187,16 @@ impl Options { self.symlinks_to_directories_are_ignored_like_directories = toggle; self } + + /// Control whether to consult the untracked cache if it is present and applicable. + pub fn untracked_cache(mut self, value: UntrackedCache) -> Self { + self.untracked_cache = value; + self + } + + /// Like [`untracked_cache()`](Self::untracked_cache), but only requires a mutably borrowed instance. + pub fn set_untracked_cache(&mut self, value: UntrackedCache) -> &mut Self { + self.untracked_cache = value; + self + } } diff --git a/gix/src/repository/dirwalk.rs b/gix/src/repository/dirwalk.rs index b71cd567247..4328d688f0a 100644 --- a/gix/src/repository/dirwalk.rs +++ b/gix/src/repository/dirwalk.rs @@ -12,8 +12,22 @@ impl Repository { /// Return default options suitable for performing a directory walk on this repository. /// /// Used in conjunction with [`dirwalk()`](Self::dirwalk()) - pub fn dirwalk_options(&self) -> Result { - Ok(dirwalk::Options::from_fs_caps(self.filesystem_options()?)) + pub fn dirwalk_options(&self) -> Result { + let use_untracked_cache = config::tree::Core::UNTRACKED_CACHE + .try_into_untracked_cache(self.config.resolved.boolean(config::tree::Core::UNTRACKED_CACHE))?; + let fs_caps = self.filesystem_options().map_err(|e| config::key::Error { + key: e.key, + value: e.value, + environment_override: e.environment_override, + source: e.source, + })?; + Ok( + dirwalk::Options::from_fs_caps(fs_caps).untracked_cache(if use_untracked_cache.unwrap_or(false) { + dirwalk::UntrackedCache::Use + } else { + dirwalk::UntrackedCache::Ignore + }), + ) } /// Perform a directory walk configured with `options` under control of the `delegate`. Use `patterns` to @@ -59,6 +73,7 @@ impl Repository { let fs_caps = self.filesystem_options()?; let accelerate_lookup = fs_caps.ignore_case.then(|| index.prepare_icase_backing()); let mut opts = gix_dir::walk::Options::from(options); + let has_pathspecs = pathspec.search.patterns().len() != 0; let worktree_relative_worktree_dirs_storage; if let Some(workdir) = self.workdir().filter(|_| opts.for_deletion.is_some()) { let linked_worktrees = self.worktrees()?; @@ -100,7 +115,7 @@ impl Repository { }, excludes: Some(&mut excludes.inner), objects: &self.objects, - explicit_traversal_root: (!options.empty_patterns_match_prefix).then_some(workdir), + explicit_traversal_root: (!options.empty_patterns_match_prefix && has_pathspecs).then_some(workdir), }, opts, delegate, diff --git a/gix/src/status/mod.rs b/gix/src/status/mod.rs index b2770478f55..68c7f504bd4 100644 --- a/gix/src/status/mod.rs +++ b/gix/src/status/mod.rs @@ -68,9 +68,7 @@ impl Default for Submodule { #[allow(missing_docs)] pub enum Error { #[error(transparent)] - DirwalkOptions(#[from] config::boolean::Error), - #[error(transparent)] - ConfigureUntrackedFiles(#[from] config::key::GenericErrorWithValue), + DirwalkOptions(#[from] config::untracked_cache::Error), } /// Status diff --git a/gix/tests/gix/repository/mod.rs b/gix/tests/gix/repository/mod.rs index e83d874ed34..f455a6d416f 100644 --- a/gix/tests/gix/repository/mod.rs +++ b/gix/tests/gix/repository/mod.rs @@ -74,9 +74,11 @@ mod index { #[cfg(feature = "dirwalk")] mod dirwalk { - use std::sync::atomic::AtomicBool; + use std::{process::Command, sync::atomic::AtomicBool}; + use gix::config::tree::Core; use gix_dir::{entry::Kind::*, walk::EmissionMode}; + use gix_testtools::tempfile; #[test] fn basics() -> crate::Result { @@ -131,6 +133,136 @@ mod dirwalk { ); Ok(()) } + + #[test] + fn untracked_cache_keep_config_does_not_error() -> crate::Result { + let mut repo = repo_with_untracked_cache()?; + // `core.untrackedCache=keep` is git's documented default and a valid tri-state + // value. Parsing it as a boolean returned an error, making `dirwalk_options()` + // fail on any repo with this setting. + repo.config_snapshot_mut() + .set_raw_value_by("core", None, "untrackedCache", "keep")?; + let opts = repo.dirwalk_options(); + assert!( + opts.is_ok(), + "core.untrackedCache=keep must not cause a parse error, got: {:?}", + opts.err() + ); + Ok(()) + } + + #[test] + // On Windows, NTFS flushes directory metadata asynchronously. Directories modified + // very recently can report slightly different `LastWriteTime` values depending on + // when the stat is read, causing the IOUC stat check to fail unpredictably. + #[cfg_attr(windows, ignore)] + fn untracked_cache_respects_config_and_allows_overrides() -> crate::Result { + let mut repo = repo_with_untracked_cache()?; + let index = repo.index()?; + + repo.config_snapshot_mut().set_value(&Core::UNTRACKED_CACHE, "true")?; + let out = run_dirwalk( + &repo, + &index, + repo.dirwalk_options()?.emit_untracked(EmissionMode::CollapseDirectory), + )?; + assert_eq!( + out.dirwalk.read_dir_calls, 0, + "core.untrackedCache=true should enable the fast path" + ); + + let out = run_dirwalk( + &repo, + &index, + repo.dirwalk_options()? + .emit_untracked(EmissionMode::CollapseDirectory) + .untracked_cache(gix::dirwalk::UntrackedCache::Ignore), + )?; + assert_ne!( + out.dirwalk.read_dir_calls, 0, + "callers can explicitly disable the untracked cache" + ); + + repo.config_snapshot_mut().set_value(&Core::UNTRACKED_CACHE, "false")?; + let out = run_dirwalk( + &repo, + &index, + repo.dirwalk_options()?.emit_untracked(EmissionMode::CollapseDirectory), + )?; + assert_ne!( + out.dirwalk.read_dir_calls, 0, + "core.untrackedCache=false should disable the fast path" + ); + + let out = run_dirwalk( + &repo, + &index, + repo.dirwalk_options()? + .emit_untracked(EmissionMode::CollapseDirectory) + .untracked_cache(gix::dirwalk::UntrackedCache::Use), + )?; + assert_eq!( + out.dirwalk.read_dir_calls, 0, + "callers can override config to force use of the untracked cache" + ); + Ok(()) + } + + fn repo_with_untracked_cache() -> crate::Result { + let tmp = tempfile::tempdir()?; + let root = tmp.path().join("repo"); + std::mem::forget(tmp); + std::fs::create_dir(&root)?; + git(&root, ["init"])?; + git(&root, ["config", "status.showUntrackedFiles", "all"])?; + git(&root, ["config", "user.name", "a"])?; + git(&root, ["config", "user.email", "a@example.com"])?; + git(&root, ["config", "core.untrackedCache", "true"])?; + // Pin a local excludesFile so git and gix (isolated mode, reads local config) agree on + // which global-excludes file was used when the UNTR cache was written. Without this, + // users with a core.excludesFile in their ~/.gitconfig would have it written into the + // cache, but gix (isolated) wouldn't know about it, causing cache validation to fail. + let excludes = root.join("global-excludes"); + std::fs::write(&excludes, "")?; + git( + &root, + [ + std::ffi::OsStr::new("config"), + std::ffi::OsStr::new("core.excludesFile"), + excludes.as_os_str(), + ], + )?; + std::fs::create_dir(root.join("tracked"))?; + std::fs::write(root.join("tracked/keep"), "keep")?; + git(&root, ["add", "tracked/keep"])?; + git(&root, ["commit", "-m", "init"])?; + std::fs::create_dir_all(root.join("tracked/new"))?; + std::fs::create_dir_all(root.join("new"))?; + std::fs::write(root.join("tracked/new/file"), "tracked-new")?; + std::fs::write(root.join("new/file"), "new")?; + git(&root, ["update-index", "--force-untracked-cache"])?; + git(&root, ["status", "--porcelain"])?; + // Run status a second time so git validates the recorded directory stats and sets + // the valid bitmap in the IOUC. Some git versions only populate the structure on + // the first run and mark entries valid on the second. + git(&root, ["status", "--porcelain"])?; + Ok(gix::open_opts(&root, gix::open::Options::isolated())?) + } + + fn git(cwd: &std::path::Path, args: impl IntoIterator>) -> crate::Result { + let status = Command::new("git").args(args).current_dir(cwd).status()?; + assert!(status.success()); + Ok(()) + } + + fn run_dirwalk<'repo>( + repo: &'repo gix::Repository, + index: &gix::worktree::Index, + options: gix::dirwalk::Options, + ) -> crate::Result> { + let mut collect = gix::dir::walk::delegate::Collect::default(); + Ok(repo.dirwalk(index, None::<&str>, &AtomicBool::default(), options, &mut collect)?) + } } #[test] diff --git a/src/plumbing/progress.rs b/src/plumbing/progress.rs index ddfa9fde8f6..57b3d6a182d 100644 --- a/src/plumbing/progress.rs +++ b/src/plumbing/progress.rs @@ -118,7 +118,7 @@ static GIT_CONFIG: &[Record] = &[ }, Record { config: "core.untrackedCache", - usage: Planned("Needed for fast worktree operation") + usage: InUse("Consulted for repository dirwalk defaults, with API overrides available") }, Record { config: "checkout.guess",