diff --git a/src/gitattributes.rs b/src/gitattributes.rs new file mode 100644 index 0000000000..1739fb2d4f --- /dev/null +++ b/src/gitattributes.rs @@ -0,0 +1,171 @@ +use std::path::Path; +use std::process::Command; + +/// Corresponds to the diff attribute. See man gitattributes, +/// specifically **Generating diff text** section. +#[derive(Debug, PartialEq)] +pub(crate) enum DiffAttribute { + /// The file must be treated as text + Set, + + /// The file must be treated as binary, and no diff will be shown. + /// Sometimes used for generated text files as well. + Unset, + + /// The file type (text vs binary) will be autodetected. + /// This is the default setting. + Unspecified, + + /// Diff will be shown using the specified diff driver. + /// This is ignored by difft. + Other(String), +} + +impl From<&str> for DiffAttribute { + fn from(s: &str) -> Self { + match s { + "set" => Self::Set, + "unset" => Self::Unset, + "unspecified" => Self::Unspecified, + s => Self::Other(s.to_owned()), + } + } +} +/// Runs `git check-attr diff binary` to get the diff and binary attributes of the path. Returns +/// [`Option::None`] when either `git` is not available, file is not inside git directory, or +/// something else went wrong. +pub(crate) fn check_diff_attr(path: &Path) -> Option { + let res = Command::new("git") + .args(["check-attr", "diff", "binary", "-z", "--"]) + .arg(path) + .output(); + + match res { + Ok(output) => { + // Running git outside of git repository, or perhaps many other error conditions, will + // result in this. Since we run git check-attr eagerly, and can't distinguish the error + // conditions easily and reliably, log just a debug message. + if !output.status.success() { + debug!( + "git check-attr exited with status {}: \"{}\"", + output.status, + output.stderr.escape_ascii() + ); + return None; + } + + let output = &output.stdout; + match parse_output(output) { + Some(res) => { + trace!("git check-attr result: {:?}", res); + return Some(res); + } + None => { + warn!( + "malformed git check-attr output: \"{}\"", + output.escape_ascii() + ); + } + } + } + Err(err) => { + debug!("failed to execute git: {err}"); + } + } + + None +} + +fn parse_output(output: &[u8]) -> Option { + // The git check-attr -z output format is repeated + // NUL NUL NUL + // + // This function assumes git check-attr diff binary -z -- PATH, so the output contains + // attributes only for the specified file, and thus we don't even look at path. + + let mut binary_set = false; + let mut result = DiffAttribute::Unspecified; + + let mut it = output.split(|&b| b == b'\0'); + while let Some(path) = it.next() { + if path.is_empty() { + // Bogus "path" after the last NUL. + // It would be a tad cleaner if split_terminator was available for &[u8]... + break; + } + let attribute = it.next()?; + let info = it.next()?; + + match attribute { + b"diff" => { + // this is only lossy for custom driver variant, which we don't support, and it's + // quite unlikely to contain invalid UTF-8 (who puts their binaries in files named + // by invalid strings?) + let s = String::from_utf8_lossy(info); + result = s.as_ref().into(); + } + b"binary" => { + if info == b"set" { + binary_set = true; + } + } + _ => { + warn!( + "unexpected attribute in git check-attr output: \"{}\"", + attribute.escape_ascii() + ); + } + } + } + + if binary_set { + // assume user doesn't want to see the diff, even if they specify diff=whatever + result = DiffAttribute::Unset; + } + + Some(result) +} + +#[cfg(test)] +mod tests { + use pretty_assertions::assert_eq; + + use super::*; + + #[test] + fn test_parse_output() { + // The test case assumes fictional git repository with the following .gitattributes + // contents to ignore generated protobuf files, but see generated gRPC files, and uses + // imaginary "hex" diff driver for wtf.bin files: + // + // /protos/*.pb.go -diff + // /protos/*_grpc.pb.go diff + // wtf.bin binary diff=hex + // + + // This is a plaintext file, no attributes, assume regular text diff + assert_eq!( + parse_output(b"protos/difft.proto\x00diff\x00unspecified\x00binary\x00unspecified\x00"), + Some(DiffAttribute::Unspecified) + ); + + // This is a generated file with diff attribute unset (we don't want to look into its diff) + assert_eq!( + parse_output(b"protos/difft.pb.go\x00diff\x00unset\x00binary\x00unspecified\x00"), + Some(DiffAttribute::Unset) + ); + + // diff attribute is explicitly re-enabled for this file + assert_eq!( + parse_output(b"protos/difft_grpc.pb.go\x00diff\x00set\x00binary\x00unspecified\x00"), + Some(DiffAttribute::Set) + ); + + // Although diff=hex basically undoes the effect of binary macro attribute, we assume the + // user doesn't want to see them here. difft doesn't run git diff drivers, after all. + assert_eq!( + parse_output(b"wtf.bin\x00diff\x00hex\x00wtf.bin\x00binary\x00set\x00"), + Some(DiffAttribute::Unset) + ); + } +} diff --git a/src/main.rs b/src/main.rs index 09f0f665b2..35593cf4f2 100644 --- a/src/main.rs +++ b/src/main.rs @@ -47,6 +47,7 @@ mod diff; mod display; mod exit_codes; mod files; +mod gitattributes; mod hash; mod line_parser; mod lines; @@ -75,6 +76,7 @@ use crate::files::{ guess_content, read_file_or_die, read_files_or_die, read_or_die, relative_paths_in_either, ProbableFileKind, }; +use crate::gitattributes::{check_diff_attr, DiffAttribute}; use crate::parse::guess_language::{ guess, language_globs, language_name, Language, LanguageOverride, }; @@ -414,8 +416,11 @@ fn diff_file( let (mut lhs_src, mut rhs_src) = match ( guess_content(&lhs_bytes, lhs_path, binary_overrides), guess_content(&rhs_bytes, rhs_path, binary_overrides), + check_diff_attr(Path::new(display_path)), ) { - (ProbableFileKind::Binary, _) | (_, ProbableFileKind::Binary) => { + (ProbableFileKind::Binary, _, _) + | (_, ProbableFileKind::Binary, _) + | (_, _, Some(DiffAttribute::Unset)) => { let has_byte_changes = if lhs_bytes == rhs_bytes { None } else { @@ -434,7 +439,7 @@ fn diff_file( has_syntactic_changes: false, }; } - (ProbableFileKind::Text(lhs_src), ProbableFileKind::Text(rhs_src)) => (lhs_src, rhs_src), + (ProbableFileKind::Text(lhs_src), ProbableFileKind::Text(rhs_src), _) => (lhs_src, rhs_src), }; if diff_options.strip_cr {