diff --git a/doc/lsd.md b/doc/lsd.md index 78a120465..c42f5ec89 100644 --- a/doc/lsd.md +++ b/doc/lsd.md @@ -131,6 +131,9 @@ lsd is a ls command with a lot of pretty colours and some other stuff to enrich `--sort ...` : Sort by WORD instead of name [possible values: size, time, version, extension, git] +`--respect-locale` +: Respect locale when sorting by name. + `-U`, `--no-sort` : Do not sort. List entries in directory order diff --git a/doc/samples/config-sample.yaml b/doc/samples/config-sample.yaml index 3da640e5d..fd6196688 100644 --- a/doc/samples/config-sample.yaml +++ b/doc/samples/config-sample.yaml @@ -109,6 +109,9 @@ sorting: # When "classic" is set, this is set to "none". # Possible values: first, last, none dir-grouping: none + # Whether to respect locale when sorting by name. + # Possible values: false, true + respect-locale: false # == No Symlink == # Whether to omit showing symlink targets diff --git a/src/app.rs b/src/app.rs index c8d80c7cb..25565bd77 100644 --- a/src/app.rs +++ b/src/app.rs @@ -125,6 +125,10 @@ pub struct Cli { #[arg(short, long)] pub reverse: bool, + /// Use locale-aware sorting for names + #[arg(long)] + pub respect_locale: bool, + /// Sort the directories then the files #[arg(long, value_name = "MODE", value_parser = ["none", "first", "last"])] pub group_dirs: Option, diff --git a/src/config_file.rs b/src/config_file.rs index 3a5dbe53b..0ce782c86 100644 --- a/src/config_file.rs +++ b/src/config_file.rs @@ -71,6 +71,7 @@ pub struct Sorting { pub column: Option, pub reverse: Option, pub dir_grouping: Option, + pub respect_locale: Option, } #[derive(Eq, PartialEq, Debug, Deserialize)] @@ -324,6 +325,9 @@ sorting: # When "classic" is set, this is set to "none". # Possible values: first, last, none dir-grouping: none + # Whether to respect locale when sorting by name. + # Possible values: false, true + respect-locale: false # == No Symlink == # Whether to omit showing symlink targets @@ -421,6 +425,7 @@ mod tests { column: Some(SortColumn::Name), reverse: Some(false), dir_grouping: Some(DirGrouping::None), + respect_locale: Some(false), }), no_symlink: Some(false), total_size: Some(false), diff --git a/src/flags/sorting.rs b/src/flags/sorting.rs index 3e895933d..b60b235cd 100644 --- a/src/flags/sorting.rs +++ b/src/flags/sorting.rs @@ -14,6 +14,7 @@ pub struct Sorting { pub column: SortColumn, pub order: SortOrder, pub dir_grouping: DirGrouping, + pub respect_locale: bool, } impl Sorting { @@ -25,12 +26,40 @@ impl Sorting { let column = SortColumn::configure_from(cli, config); let order = SortOrder::configure_from(cli, config); let dir_grouping = DirGrouping::configure_from(cli, config); + let respect_locale = Self::respect_locale_from(cli, config); Self { column, order, dir_grouping, + respect_locale, } } + + /// Get the "respect_locale" boolean from [Cli], a [Config] or the [Default] value. The first + /// value that is not [None] is used. The order of precedence for the value used is: + /// - [respect_locale_from_cli](Sorting::respect_locale_from_cli) + /// - [Config.sorting.respect_locale] + /// - [Default::default] + fn respect_locale_from(cli: &Cli, config: &Config) -> bool { + if let Some(value) = Self::respect_locale_from_cli(cli) { + return value; + } + if let Some(sorting) = &config.sorting { + if let Some(respect_locale) = sorting.respect_locale { + return respect_locale; + } + } + + Default::default() + } + + /// Get a potential "respect_locale" boolean from [Cli]. + /// + /// If the "respect_locale" argument is passed, this returns `true` in a [Some]. Otherwise this + /// returns [None]. + fn respect_locale_from_cli(cli: &Cli) -> Option { + if cli.respect_locale { Some(true) } else { None } + } } /// The flag showing which column to use for sorting. @@ -293,6 +322,7 @@ mod test_sort_column { column: None, reverse: None, dir_grouping: None, + respect_locale: None, }); assert_eq!(None, SortColumn::from_config(&c)); @@ -305,6 +335,7 @@ mod test_sort_column { column: Some(SortColumn::Extension), reverse: None, dir_grouping: None, + respect_locale: None, }); assert_eq!(Some(SortColumn::Extension), SortColumn::from_config(&c)); } @@ -316,6 +347,7 @@ mod test_sort_column { column: Some(SortColumn::Name), reverse: None, dir_grouping: None, + respect_locale: None, }); assert_eq!(Some(SortColumn::Name), SortColumn::from_config(&c)); } @@ -327,6 +359,7 @@ mod test_sort_column { column: Some(SortColumn::Time), reverse: None, dir_grouping: None, + respect_locale: None, }); assert_eq!(Some(SortColumn::Time), SortColumn::from_config(&c)); } @@ -338,6 +371,7 @@ mod test_sort_column { column: Some(SortColumn::Size), reverse: None, dir_grouping: None, + respect_locale: None, }); assert_eq!(Some(SortColumn::Size), SortColumn::from_config(&c)); } @@ -349,6 +383,7 @@ mod test_sort_column { column: Some(SortColumn::Version), reverse: None, dir_grouping: None, + respect_locale: None, }); assert_eq!(Some(SortColumn::Version), SortColumn::from_config(&c)); } @@ -360,6 +395,7 @@ mod test_sort_column { column: Some(SortColumn::GitStatus), reverse: None, dir_grouping: None, + respect_locale: None, }); assert_eq!(Some(SortColumn::GitStatus), SortColumn::from_config(&c)); } @@ -409,6 +445,7 @@ mod test_sort_order { column: None, reverse: None, dir_grouping: None, + respect_locale: None, }); assert_eq!(None, SortOrder::from_config(&c)); } @@ -420,6 +457,7 @@ mod test_sort_order { column: None, reverse: Some(true), dir_grouping: None, + respect_locale: None, }); assert_eq!(Some(SortOrder::Reverse), SortOrder::from_config(&c)); } @@ -431,6 +469,7 @@ mod test_sort_order { column: None, reverse: Some(false), dir_grouping: None, + respect_locale: None, }); assert_eq!(Some(SortOrder::Default), SortOrder::from_config(&c)); } @@ -513,6 +552,7 @@ mod test_dir_grouping { column: None, reverse: None, dir_grouping: Some(DirGrouping::First), + respect_locale: None, }); assert_eq!(Some(DirGrouping::First), DirGrouping::from_config(&c)); } @@ -524,6 +564,7 @@ mod test_dir_grouping { column: None, reverse: None, dir_grouping: Some(DirGrouping::Last), + respect_locale: None, }); assert_eq!(Some(DirGrouping::Last), DirGrouping::from_config(&c)); } @@ -535,6 +576,7 @@ mod test_dir_grouping { column: None, reverse: None, dir_grouping: None, + respect_locale: None, }); assert_eq!(None, DirGrouping::from_config(&c)); } @@ -546,6 +588,7 @@ mod test_dir_grouping { column: None, reverse: None, dir_grouping: Some(DirGrouping::Last), + respect_locale: None, }); c.classic = Some(true); assert_eq!(Some(DirGrouping::None), DirGrouping::from_config(&c)); diff --git a/src/meta/name.rs b/src/meta/name.rs index 788c8907e..d5eed84f3 100644 --- a/src/meta/name.rs +++ b/src/meta/name.rs @@ -195,6 +195,26 @@ impl Name { pub fn file_type(&self) -> FileType { self.file_type } + + // Locale-aware comparison using strcoll for matching the behavior of `ls`. + #[cfg(unix)] + pub fn cmp_locale(&self, other: &Self) -> Ordering { + use std::ffi::CString; + use std::sync::Once; + static LOCALE_INIT: Once = Once::new(); + LOCALE_INIT.call_once(|| unsafe { + libc::setlocale(libc::LC_ALL, b"\0".as_ptr() as *const libc::c_char); + }); + let a = CString::new(self.name.as_str()).unwrap_or_default(); + let b = CString::new(other.name.as_str()).unwrap_or_default(); + let result = unsafe { libc::strcoll(a.as_ptr(), b.as_ptr()) }; + result.cmp(&0) + } + + #[cfg(not(unix))] + pub fn cmp_locale(&self, other: &Self) -> Ordering { + self.cmp(other) + } } impl Ord for Name { @@ -230,6 +250,8 @@ mod test { use crate::url::Url; use crossterm::style::{Color, Stylize}; use std::cmp::Ordering; + #[cfg(unix)] + use std::ffi::CString; use std::fs::{self, File}; #[cfg(unix)] use std::os::unix::fs::symlink; @@ -593,6 +615,121 @@ mod test { assert!(name_1 == name_2); } + // Helper function for locale testing, + // triggers the Once inside cmp_locale then override locale for testing + #[cfg(unix)] + fn set_locale_for_cmp(locale: &str) { + // Trigger Once::call_once inside cmp_locale so it won't override later + let dummy = Name::new( + Path::new("x"), + FileType::File { + uid: false, + exec: false, + }, + ); + let _ = dummy.cmp_locale(&dummy); + let loc = CString::new(locale).unwrap(); + unsafe { + libc::setlocale(libc::LC_ALL, loc.as_ptr()); + } + } + + #[test] + #[serial_test::serial] + #[cfg(unix)] + fn test_cmp_locale_c_name_struct() { + set_locale_for_cmp("C"); + + let name_upper = Name::new( + Path::new("B"), + FileType::File { + uid: false, + exec: false, + }, + ); + let name_lower = Name::new( + Path::new("a"), + FileType::File { + uid: false, + exec: false, + }, + ); + let name_dot_lower = Name::new( + Path::new(".a"), + FileType::File { + uid: false, + exec: false, + }, + ); + let name_dot_upper = Name::new( + Path::new(".A"), + FileType::File { + uid: false, + exec: false, + }, + ); + + // In C locale: "B" (0x42) < "a" (0x61) by byte order + assert_eq!(name_upper.cmp_locale(&name_lower), Ordering::Less); + assert_eq!(name_lower.cmp_locale(&name_upper), Ordering::Greater); + + // In C locale: dot (0x2E) < uppercase (0x41+) < lowercase (0x61+) + // ".a" < "a", ".a" < "A", ".A" < "a", ".A" < "A" + assert_eq!(name_dot_lower.cmp_locale(&name_lower), Ordering::Less); + assert_eq!(name_dot_lower.cmp_locale(&name_upper), Ordering::Less); + assert_eq!(name_dot_upper.cmp_locale(&name_lower), Ordering::Less); + assert_eq!(name_dot_upper.cmp_locale(&name_upper), Ordering::Less); + } + + #[test] + #[serial_test::serial] + #[cfg(unix)] + fn test_cmp_locale_en_us_utf8_name_struct() { + set_locale_for_cmp("en_US.UTF-8"); + + let name_upper = Name::new( + Path::new("B"), + FileType::File { + uid: false, + exec: false, + }, + ); + let name_lower = Name::new( + Path::new("a"), + FileType::File { + uid: false, + exec: false, + }, + ); + let name_dot_lower = Name::new( + Path::new(".a"), + FileType::File { + uid: false, + exec: false, + }, + ); + let name_dot_upper = Name::new( + Path::new(".A"), + FileType::File { + uid: false, + exec: false, + }, + ); + + // In en_US.UTF-8: "a" sorts before "B" (alphabetic order) + assert_eq!(name_lower.cmp_locale(&name_upper), Ordering::Less); + assert_eq!(name_upper.cmp_locale(&name_lower), Ordering::Greater); + + // In en_US.UTF-8: dot is mostly ignored for primary sort key + // ".a" < "a", ".a" < "A" (dot as secondary tiebreaker) + assert_eq!(name_dot_lower.cmp_locale(&name_lower), Ordering::Less); + assert_eq!(name_dot_lower.cmp_locale(&name_upper), Ordering::Less); + // ".A" > "a" (primary key: A vs a, A comes after a in en_US) + assert_eq!(name_dot_upper.cmp_locale(&name_lower), Ordering::Greater); + // ".A" < "A" (same letter, dot version sorts first) + assert_eq!(name_dot_upper.cmp_locale(&name_upper), Ordering::Less); + } + #[test] fn test_parent_relative_path() { let name = Name::new( diff --git a/src/sort.rs b/src/sort.rs index dcbb2dc2a..2f5eb0969 100644 --- a/src/sort.rs +++ b/src/sort.rs @@ -18,7 +18,13 @@ pub fn assemble_sorters(flags: &Flags) -> Vec<(SortOrder, SortFn)> { }; match flags.sorting.column { - SortColumn::Name => sorters.push((flags.sorting.order, by_name)), + SortColumn::Name => { + if flags.sorting.respect_locale { + sorters.push((flags.sorting.order, by_name_locale)); + } else { + sorters.push((flags.sorting.order, by_name)); + } + } SortColumn::Size => sorters.push((flags.sorting.order, by_size)), SortColumn::Time => sorters.push((flags.sorting.order, by_date)), SortColumn::Version => sorters.push((flags.sorting.order, by_version)), @@ -61,6 +67,10 @@ fn by_name(a: &Meta, b: &Meta) -> Ordering { a.name.cmp(&b.name) } +fn by_name_locale(a: &Meta, b: &Meta) -> Ordering { + a.name.cmp_locale(&b.name) +} + fn by_date(a: &Meta, b: &Meta) -> Ordering { b.date.cmp(&a.date).then(a.name.cmp(&b.name)) }