diff --git a/src/archive/sevenz.rs b/src/archive/sevenz.rs index a45248e61..9d608a784 100644 --- a/src/archive/sevenz.rs +++ b/src/archive/sevenz.rs @@ -18,7 +18,8 @@ use crate::{ info, list::{FileInArchive, ListFileType}, utils::{ - BytesFmt, FileVisibilityPolicy, PathFmt, cd_into_same_dir_as, ensure_parent_dir_exists, is_same_file_as_output, + BytesFmt, FileVisibilityPolicy, PathFmt, cd_into_same_dir_as, common_ancestor, ensure_parent_dir_exists, + has_duplicate_basenames, is_same_file_as_output, }, warning, }; @@ -134,14 +135,35 @@ where let mut writer = sevenz_rust2::ArchiveWriter::new(writer)?; let output_handle = Handle::from_path(output_path); + // When multiple inputs share the same basename, archive entry names would collide. + // In that case, use the common ancestor of all paths and store entries relative to it, + // including enough parent path components to disambiguate. + let use_common_ancestor = files.len() > 1 && has_duplicate_basenames(files); + let common_ancestor = if use_common_ancestor { + Some(common_ancestor(files)) + } else { + None + }; + for filename in files { - let previous_location = cd_into_same_dir_as(filename)?; + let previous_location = if let Some(ref ancestor) = common_ancestor { + let previous = env::current_dir()?; + env::set_current_dir(ancestor)?; + previous + } else { + cd_into_same_dir_as(filename)? + }; // Unwrap safety: // paths should be canonicalized by now, and the root directory rejected. - let filename = filename.file_name().unwrap(); + // When using common ancestor, include enough parent path to avoid collisions. + let walker_root = if let Some(ref ancestor) = common_ancestor { + filename.strip_prefix(ancestor).unwrap().to_path_buf() + } else { + PathBuf::from(filename.file_name().unwrap()) + }; - for entry in file_visibility_policy.build_walker(filename) { + for entry in file_visibility_policy.build_walker(&walker_root) { let entry = entry?; let path = entry.path(); diff --git a/src/archive/tar.rs b/src/archive/tar.rs index 843cd3cf7..128e496b8 100644 --- a/src/archive/tar.rs +++ b/src/archive/tar.rs @@ -144,14 +144,35 @@ where let output_handle = Handle::from_path(output_path); let mut seen_inode: HashMap<(u64, u64), PathBuf> = HashMap::new(); + // When multiple inputs share the same basename, archive entry names would collide. + // In that case, use the common ancestor of all paths and store entries relative to it, + // including enough parent path components to disambiguate. + let use_common_ancestor = explicit_paths.len() > 1 && utils::has_duplicate_basenames(explicit_paths); + let common_ancestor = if use_common_ancestor { + Some(utils::common_ancestor(explicit_paths)) + } else { + None + }; + for explicit_path in explicit_paths { - let previous_location = utils::cd_into_same_dir_as(explicit_path)?; + let previous_location = if let Some(ref ancestor) = common_ancestor { + let previous = env::current_dir()?; + env::set_current_dir(ancestor)?; + previous + } else { + utils::cd_into_same_dir_as(explicit_path)? + }; // Unwrap expectation: // paths should be canonicalized by now, and the root directory rejected. - let filename = explicit_path.file_name().unwrap(); + // When using common ancestor, include enough parent path to avoid collisions. + let walker_root = if let Some(ref ancestor) = common_ancestor { + explicit_path.strip_prefix(ancestor).unwrap().as_os_str() + } else { + explicit_path.file_name().unwrap() + }; - let iter = file_visibility_policy.workaround_build_walker_or_broken_link_path(explicit_path, filename); + let iter = file_visibility_policy.workaround_build_walker_or_broken_link_path(explicit_path, walker_root); for entry in iter { let path = entry?; diff --git a/src/archive/zip.rs b/src/archive/zip.rs index eae9d327a..feb47186c 100644 --- a/src/archive/zip.rs +++ b/src/archive/zip.rs @@ -21,9 +21,9 @@ use crate::{ info, info_accessible, list::{FileInArchive, ListFileType}, utils::{ - BytesFmt, FileType, FileVisibilityPolicy, PathFmt, canonicalize, cd_into_same_dir_as, create_symlink, - ensure_parent_dir_exists, get_invalid_utf8_paths, is_same_file_as_output, pretty_format_list_of_paths, - read_file_type, strip_cur_dir, + BytesFmt, FileType, FileVisibilityPolicy, PathFmt, canonicalize, cd_into_same_dir_as, common_ancestor, + create_symlink, ensure_parent_dir_exists, get_invalid_utf8_paths, has_duplicate_basenames, + is_same_file_as_output, pretty_format_list_of_paths, read_file_type, strip_cur_dir, }, warning, }; @@ -180,14 +180,35 @@ where return Err(error.into()); } + // When multiple inputs share the same basename, archive entry names would collide. + // In that case, use the common ancestor of all paths and store entries relative to it, + // including enough parent path components to disambiguate. + let use_common_ancestor = input_filenames.len() > 1 && has_duplicate_basenames(input_filenames); + let common_ancestor = if use_common_ancestor { + Some(common_ancestor(input_filenames)) + } else { + None + }; + for explicit_path in input_filenames { - let previous_location = cd_into_same_dir_as(explicit_path)?; + let previous_location = if let Some(ref ancestor) = common_ancestor { + let previous = env::current_dir()?; + env::set_current_dir(ancestor)?; + previous + } else { + cd_into_same_dir_as(explicit_path)? + }; // Unwrap safety: // paths should be canonicalized by now, and the root directory rejected. - let filename = explicit_path.file_name().unwrap(); + // When using common ancestor, include enough parent path to avoid collisions. + let walker_root = if let Some(ref ancestor) = common_ancestor { + explicit_path.strip_prefix(ancestor).unwrap().as_os_str() + } else { + explicit_path.file_name().unwrap() + }; - let iter = file_visibility_policy.workaround_build_walker_or_broken_link_path(explicit_path, filename); + let iter = file_visibility_policy.workaround_build_walker_or_broken_link_path(explicit_path, walker_root); for entry in iter { let path = entry?; diff --git a/src/utils/fs.rs b/src/utils/fs.rs index b9df5e219..eefcef6f9 100644 --- a/src/utils/fs.rs +++ b/src/utils/fs.rs @@ -138,6 +138,56 @@ pub fn cd_into_same_dir_as(filename: &Path) -> Result { Ok(previous_location) } +/// Check if any of the given paths share the same basename (file_name). +/// +/// Returns `true` if there are duplicate basenames among the paths. +/// Used to detect when archive entry names would collide. +pub fn has_duplicate_basenames(paths: &[PathBuf]) -> bool { + use std::{collections::HashSet, ffi::OsStr}; + + let basenames: Vec<&OsStr> = paths.iter().filter_map(|p| p.file_name()).collect(); + + let mut seen = HashSet::new(); + for name in &basenames { + if !seen.insert(name) { + return true; + } + } + false +} + +/// Compute the longest common ancestor directory shared by all given paths. +/// +/// All paths should be absolute (canonicalized). Returns the longest path +/// prefix that is a common ancestor of all input paths. +/// +/// For example: +/// - `/a/b/Scripts` and `/c/d/Scripts` → `/` +/// - `/home/user/a/Scripts` and `/home/user/b/Scripts` → `/home/user` +pub fn common_ancestor(paths: &[PathBuf]) -> PathBuf { + use std::path::Component; + + if paths.is_empty() { + return PathBuf::new(); + } + + let components: Vec> = paths.iter().map(|p| p.components().collect()).collect(); + + let min_len = components.iter().map(|c| c.len()).min().unwrap_or(0); + + let mut common = PathBuf::new(); + for i in 0..min_len { + let component = components[0][i]; + if components.iter().all(|c| c[i] == component) { + common.push(component.as_os_str()); + } else { + break; + } + } + + common +} + /// Check if a path refers to the same file as the output handle. pub fn is_same_file_as_output(path: &Path, output_handle: &Handle) -> bool { if matches!(Handle::from_path(path), Ok(x) if &x == output_handle) { diff --git a/tests/integration.rs b/tests/integration.rs index 4c30533b7..900b16dd3 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -1569,3 +1569,87 @@ fn decompress_concatenated_lz4_frames() { encoder.finish().unwrap() }); } + +/// Test that compressing multiple directories with the same basename works correctly. +/// This is the fix for issue #978: when two input directories share the same name +/// (e.g., `/a/parent1/Scripts` and `/a/parent2/Scripts`), ouch should include enough +/// parent path to disambiguate them in the archive, rather than collapsing both into +/// a single "Scripts" entry. +#[test] +fn compress_dirs_with_same_basename_tar() { + let (_tempdir, dir) = testdir().unwrap(); + + // Create two directories with the same basename but different parents + let dir_a = dir.join("parent1").join("Scripts"); + let dir_b = dir.join("parent2").join("Scripts"); + fs::create_dir_all(&dir_a).unwrap(); + fs::create_dir_all(&dir_b).unwrap(); + + // Create distinct files in each + fs::write(dir_a.join("a.txt"), "content from A").unwrap(); + fs::write(dir_b.join("b.txt"), "content from B").unwrap(); + + let archive = dir.join("archive.tar"); + ouch!("-A", "c", &dir_a, &dir_b, &archive); + + let after = dir.join("after"); + ouch!("-A", "d", &archive, "-d", &after); + + // Both directories should be present with their parent disambiguators + assert!( + after.join("parent1").join("Scripts").join("a.txt").exists(), + "parent1/Scripts/a.txt missing from archive" + ); + assert!( + after.join("parent2").join("Scripts").join("b.txt").exists(), + "parent2/Scripts/b.txt missing from archive" + ); + + // Verify file contents + assert_eq!( + "content from A", + fs::read_to_string(after.join("parent1").join("Scripts").join("a.txt")).unwrap() + ); + assert_eq!( + "content from B", + fs::read_to_string(after.join("parent2").join("Scripts").join("b.txt")).unwrap() + ); +} + +/// Same as above but for zip format +#[test] +fn compress_dirs_with_same_basename_zip() { + let (_tempdir, dir) = testdir().unwrap(); + + let dir_a = dir.join("parent1").join("Scripts"); + let dir_b = dir.join("parent2").join("Scripts"); + fs::create_dir_all(&dir_a).unwrap(); + fs::create_dir_all(&dir_b).unwrap(); + + fs::write(dir_a.join("a.txt"), "content from A").unwrap(); + fs::write(dir_b.join("b.txt"), "content from B").unwrap(); + + let archive = dir.join("archive.zip"); + ouch!("-A", "c", &dir_a, &dir_b, &archive); + + let after = dir.join("after"); + ouch!("-A", "d", &archive, "-d", &after); + + assert!( + after.join("parent1").join("Scripts").join("a.txt").exists(), + "parent1/Scripts/a.txt missing from zip archive" + ); + assert!( + after.join("parent2").join("Scripts").join("b.txt").exists(), + "parent2/Scripts/b.txt missing from zip archive" + ); + + assert_eq!( + "content from A", + fs::read_to_string(after.join("parent1").join("Scripts").join("a.txt")).unwrap() + ); + assert_eq!( + "content from B", + fs::read_to_string(after.join("parent2").join("Scripts").join("b.txt")).unwrap() + ); +}