Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 26 additions & 4 deletions src/archive/sevenz.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ use crate::{
info,
list::{FileInArchive, ListFileType},
utils::{
BytesFmt, FileVisibilityPolicy, PathFmt, cd_into_same_dir_as, ensure_parent_dir_exists, is_same_file_as_output,
BytesFmt, FileVisibilityPolicy, PathFmt, cd_into_same_dir_as, common_ancestor, ensure_parent_dir_exists,
has_duplicate_basenames, is_same_file_as_output,
},
warning,
};
Expand Down Expand Up @@ -134,14 +135,35 @@ where
let mut writer = sevenz_rust2::ArchiveWriter::new(writer)?;
let output_handle = Handle::from_path(output_path);

// When multiple inputs share the same basename, archive entry names would collide.
// In that case, use the common ancestor of all paths and store entries relative to it,
// including enough parent path components to disambiguate.
let use_common_ancestor = files.len() > 1 && has_duplicate_basenames(files);
let common_ancestor = if use_common_ancestor {
Some(common_ancestor(files))
} else {
None
};

for filename in files {
let previous_location = cd_into_same_dir_as(filename)?;
let previous_location = if let Some(ref ancestor) = common_ancestor {
let previous = env::current_dir()?;
env::set_current_dir(ancestor)?;
previous
} else {
cd_into_same_dir_as(filename)?
};

// Unwrap safety:
// paths should be canonicalized by now, and the root directory rejected.
let filename = filename.file_name().unwrap();
// When using common ancestor, include enough parent path to avoid collisions.
let walker_root = if let Some(ref ancestor) = common_ancestor {
filename.strip_prefix(ancestor).unwrap().to_path_buf()
} else {
PathBuf::from(filename.file_name().unwrap())
};

for entry in file_visibility_policy.build_walker(filename) {
for entry in file_visibility_policy.build_walker(&walker_root) {
let entry = entry?;
let path = entry.path();

Expand Down
27 changes: 24 additions & 3 deletions src/archive/tar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -144,14 +144,35 @@ where
let output_handle = Handle::from_path(output_path);
let mut seen_inode: HashMap<(u64, u64), PathBuf> = HashMap::new();

// When multiple inputs share the same basename, archive entry names would collide.
// In that case, use the common ancestor of all paths and store entries relative to it,
// including enough parent path components to disambiguate.
let use_common_ancestor = explicit_paths.len() > 1 && utils::has_duplicate_basenames(explicit_paths);
let common_ancestor = if use_common_ancestor {
Some(utils::common_ancestor(explicit_paths))
} else {
None
};

for explicit_path in explicit_paths {
let previous_location = utils::cd_into_same_dir_as(explicit_path)?;
let previous_location = if let Some(ref ancestor) = common_ancestor {
let previous = env::current_dir()?;
env::set_current_dir(ancestor)?;
previous
} else {
utils::cd_into_same_dir_as(explicit_path)?
};

// Unwrap expectation:
// paths should be canonicalized by now, and the root directory rejected.
let filename = explicit_path.file_name().unwrap();
// When using common ancestor, include enough parent path to avoid collisions.
let walker_root = if let Some(ref ancestor) = common_ancestor {
explicit_path.strip_prefix(ancestor).unwrap().as_os_str()
} else {
explicit_path.file_name().unwrap()
};

let iter = file_visibility_policy.workaround_build_walker_or_broken_link_path(explicit_path, filename);
let iter = file_visibility_policy.workaround_build_walker_or_broken_link_path(explicit_path, walker_root);

for entry in iter {
let path = entry?;
Expand Down
33 changes: 27 additions & 6 deletions src/archive/zip.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@ use crate::{
info, info_accessible,
list::{FileInArchive, ListFileType},
utils::{
BytesFmt, FileType, FileVisibilityPolicy, PathFmt, canonicalize, cd_into_same_dir_as, create_symlink,
ensure_parent_dir_exists, get_invalid_utf8_paths, is_same_file_as_output, pretty_format_list_of_paths,
read_file_type, strip_cur_dir,
BytesFmt, FileType, FileVisibilityPolicy, PathFmt, canonicalize, cd_into_same_dir_as, common_ancestor,
create_symlink, ensure_parent_dir_exists, get_invalid_utf8_paths, has_duplicate_basenames,
is_same_file_as_output, pretty_format_list_of_paths, read_file_type, strip_cur_dir,
},
warning,
};
Expand Down Expand Up @@ -180,14 +180,35 @@ where
return Err(error.into());
}

// When multiple inputs share the same basename, archive entry names would collide.
// In that case, use the common ancestor of all paths and store entries relative to it,
// including enough parent path components to disambiguate.
let use_common_ancestor = input_filenames.len() > 1 && has_duplicate_basenames(input_filenames);
let common_ancestor = if use_common_ancestor {
Some(common_ancestor(input_filenames))
} else {
None
};

for explicit_path in input_filenames {
let previous_location = cd_into_same_dir_as(explicit_path)?;
let previous_location = if let Some(ref ancestor) = common_ancestor {
let previous = env::current_dir()?;
env::set_current_dir(ancestor)?;
previous
} else {
cd_into_same_dir_as(explicit_path)?
};

// Unwrap safety:
// paths should be canonicalized by now, and the root directory rejected.
let filename = explicit_path.file_name().unwrap();
// When using common ancestor, include enough parent path to avoid collisions.
let walker_root = if let Some(ref ancestor) = common_ancestor {
explicit_path.strip_prefix(ancestor).unwrap().as_os_str()
} else {
explicit_path.file_name().unwrap()
};

let iter = file_visibility_policy.workaround_build_walker_or_broken_link_path(explicit_path, filename);
let iter = file_visibility_policy.workaround_build_walker_or_broken_link_path(explicit_path, walker_root);

for entry in iter {
let path = entry?;
Expand Down
50 changes: 50 additions & 0 deletions src/utils/fs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,56 @@ pub fn cd_into_same_dir_as(filename: &Path) -> Result<PathBuf> {
Ok(previous_location)
}

/// Check if any of the given paths share the same basename (file_name).
///
/// Returns `true` if there are duplicate basenames among the paths.
/// Used to detect when archive entry names would collide.
pub fn has_duplicate_basenames(paths: &[PathBuf]) -> bool {
use std::{collections::HashSet, ffi::OsStr};

let basenames: Vec<&OsStr> = paths.iter().filter_map(|p| p.file_name()).collect();

let mut seen = HashSet::new();
for name in &basenames {
if !seen.insert(name) {
return true;
}
}
false
}

/// Compute the longest common ancestor directory shared by all given paths.
///
/// All paths should be absolute (canonicalized). Returns the longest path
/// prefix that is a common ancestor of all input paths.
///
/// For example:
/// - `/a/b/Scripts` and `/c/d/Scripts` → `/`
/// - `/home/user/a/Scripts` and `/home/user/b/Scripts` → `/home/user`
pub fn common_ancestor(paths: &[PathBuf]) -> PathBuf {
use std::path::Component;

if paths.is_empty() {
return PathBuf::new();
}

let components: Vec<Vec<Component>> = paths.iter().map(|p| p.components().collect()).collect();

let min_len = components.iter().map(|c| c.len()).min().unwrap_or(0);

let mut common = PathBuf::new();
for i in 0..min_len {
let component = components[0][i];
if components.iter().all(|c| c[i] == component) {
common.push(component.as_os_str());
} else {
break;
}
}

common
}

/// Check if a path refers to the same file as the output handle.
pub fn is_same_file_as_output(path: &Path, output_handle: &Handle) -> bool {
if matches!(Handle::from_path(path), Ok(x) if &x == output_handle) {
Expand Down
84 changes: 84 additions & 0 deletions tests/integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1569,3 +1569,87 @@ fn decompress_concatenated_lz4_frames() {
encoder.finish().unwrap()
});
}

/// Test that compressing multiple directories with the same basename works correctly.
/// This is the fix for issue #978: when two input directories share the same name
/// (e.g., `/a/parent1/Scripts` and `/a/parent2/Scripts`), ouch should include enough
/// parent path to disambiguate them in the archive, rather than collapsing both into
/// a single "Scripts" entry.
#[test]
fn compress_dirs_with_same_basename_tar() {
let (_tempdir, dir) = testdir().unwrap();

// Create two directories with the same basename but different parents
let dir_a = dir.join("parent1").join("Scripts");
let dir_b = dir.join("parent2").join("Scripts");
fs::create_dir_all(&dir_a).unwrap();
fs::create_dir_all(&dir_b).unwrap();

// Create distinct files in each
fs::write(dir_a.join("a.txt"), "content from A").unwrap();
fs::write(dir_b.join("b.txt"), "content from B").unwrap();

let archive = dir.join("archive.tar");
ouch!("-A", "c", &dir_a, &dir_b, &archive);

let after = dir.join("after");
ouch!("-A", "d", &archive, "-d", &after);

// Both directories should be present with their parent disambiguators
assert!(
after.join("parent1").join("Scripts").join("a.txt").exists(),
"parent1/Scripts/a.txt missing from archive"
);
assert!(
after.join("parent2").join("Scripts").join("b.txt").exists(),
"parent2/Scripts/b.txt missing from archive"
);

// Verify file contents
assert_eq!(
"content from A",
fs::read_to_string(after.join("parent1").join("Scripts").join("a.txt")).unwrap()
);
assert_eq!(
"content from B",
fs::read_to_string(after.join("parent2").join("Scripts").join("b.txt")).unwrap()
);
}

/// Same as above but for zip format
#[test]
fn compress_dirs_with_same_basename_zip() {
let (_tempdir, dir) = testdir().unwrap();

let dir_a = dir.join("parent1").join("Scripts");
let dir_b = dir.join("parent2").join("Scripts");
fs::create_dir_all(&dir_a).unwrap();
fs::create_dir_all(&dir_b).unwrap();

fs::write(dir_a.join("a.txt"), "content from A").unwrap();
fs::write(dir_b.join("b.txt"), "content from B").unwrap();

let archive = dir.join("archive.zip");
ouch!("-A", "c", &dir_a, &dir_b, &archive);

let after = dir.join("after");
ouch!("-A", "d", &archive, "-d", &after);

assert!(
after.join("parent1").join("Scripts").join("a.txt").exists(),
"parent1/Scripts/a.txt missing from zip archive"
);
assert!(
after.join("parent2").join("Scripts").join("b.txt").exists(),
"parent2/Scripts/b.txt missing from zip archive"
);

assert_eq!(
"content from A",
fs::read_to_string(after.join("parent1").join("Scripts").join("a.txt")).unwrap()
);
assert_eq!(
"content from B",
fs::read_to_string(after.join("parent2").join("Scripts").join("b.txt")).unwrap()
);
}
Loading