From 8015f0c37a703829e024c7f2c7321b514211f8e8 Mon Sep 17 00:00:00 2001 From: ClementTsang <34804052+ClementTsang@users.noreply.github.com> Date: Sun, 31 Aug 2025 00:52:55 -0400 Subject: [PATCH 01/14] driveby refactor --- src/collection/processes/linux/mod.rs | 7 +------ src/collection/processes/unix/user_table.rs | 2 +- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/src/collection/processes/linux/mod.rs b/src/collection/processes/linux/mod.rs index 9c349c392..992722e3e 100644 --- a/src/collection/processes/linux/mod.rs +++ b/src/collection/processes/linux/mod.rs @@ -200,12 +200,7 @@ fn read_proc( }; let user = uid - .and_then(|uid| { - user_table - .get_uid_to_username_mapping(uid) - .map(Into::into) - .ok() - }) + .and_then(|uid| user_table.uid_to_username(uid).map(Into::into).ok()) .unwrap_or_else(|| "N/A".into()); let time = if let Ok(ticks_per_sec) = u32::try_from(rustix::param::clock_ticks_per_second()) { diff --git a/src/collection/processes/unix/user_table.rs b/src/collection/processes/unix/user_table.rs index 036fe60e3..a9d483783 100644 --- a/src/collection/processes/unix/user_table.rs +++ b/src/collection/processes/unix/user_table.rs @@ -8,7 +8,7 @@ pub struct UserTable { } impl UserTable { - pub fn get_uid_to_username_mapping(&mut self, uid: libc::uid_t) -> CollectionResult { + pub fn uid_to_username(&mut self, uid: libc::uid_t) -> CollectionResult { if let Some(user) = self.uid_user_mapping.get(&uid) { Ok(user.clone()) } else { From bb741cf6b4780b6726afd7f5db002b43f0effd31 Mon Sep 17 00:00:00 2001 From: ClementTsang <34804052+ClementTsang@users.noreply.github.com> Date: Sun, 31 Aug 2025 01:20:21 -0400 Subject: [PATCH 02/14] reduce some allocations in amd step --- Cargo.toml | 2 +- src/collection/amd.rs | 15 +++++++++------ src/collection/processes/linux/mod.rs | 4 +--- src/collection/processes/unix/user_table.rs | 4 +++- 4 files changed, 14 insertions(+), 11 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 3f2df0a1d..4da487544 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -124,7 +124,7 @@ strum = { version = "0.27.2", features = ["derive"], optional = true } libc = "0.2.175" [target.'cfg(target_os = "linux")'.dependencies] -rustix = { version = "1.0.8", features = ["fs", "param"] } +rustix = { version = "1.0.8", features = ["fs", "param", "alloc"] } [target.'cfg(target_os = "macos")'.dependencies] core-foundation = "0.10.1" diff --git a/src/collection/amd.rs b/src/collection/amd.rs index 18fe5d739..6b8130376 100644 --- a/src/collection/amd.rs +++ b/src/collection/amd.rs @@ -162,7 +162,7 @@ fn diff_usage(pre: u64, cur: u64, interval: &Duration) -> u64 { } // from amdgpu_top: https://github.com/Umio-Yasuno/amdgpu_top/blob/c961cf6625c4b6d63fda7f03348323048563c584/crates/libamdgpu_top/src/stat/fdinfo/proc_info.rs#L13-L27 -fn get_amdgpu_pid_fds(pid: u32, device_path: Vec) -> Option> { +fn get_amdgpu_pid_fds(pid: u32, device_path: &[String]) -> Option> { let Ok(fd_list) = fs::read_dir(format!("/proc/{pid}/fd/")) else { return None; }; @@ -170,10 +170,13 @@ fn get_amdgpu_pid_fds(pid: u32, device_path: Vec) -> Option> { let valid_fds: Vec = fd_list .filter_map(|fd_link| { let dir_entry = fd_link.map(|fd_link| fd_link.path()).ok()?; - let link = fs::read_link(&dir_entry).ok()?; + let link = rustix::fs::readlink(&dir_entry, vec![]).ok()?; // e.g. "/dev/dri/renderD128" or "/dev/dri/card0" - if device_path.iter().any(|path| link.starts_with(path)) { + if device_path + .iter() + .any(|path| link.to_string_lossy().starts_with(path)) + { dir_entry.file_name()?.to_str()?.parse::().ok() } else { None @@ -188,7 +191,7 @@ fn get_amdgpu_pid_fds(pid: u32, device_path: Vec) -> Option> { } } -fn get_amdgpu_drm(device_path: &Path) -> Option> { +fn get_amdgpu_drm(device_path: &Path) -> Option> { let mut drm_devices = Vec::new(); let drm_root = device_path.join("drm"); @@ -212,7 +215,7 @@ fn get_amdgpu_drm(device_path: &Path) -> Option> { continue; } - drm_devices.push(PathBuf::from(format!("/dev/dri/{drm_name}"))); + drm_devices.push(format!("/dev/dri/{drm_name}")); } if drm_devices.is_empty() { @@ -254,7 +257,7 @@ fn get_amd_fdinfo(device_path: &Path) -> Option> { for pid in pids { // collect file descriptors that point to our device renderers - let Some(fds) = get_amdgpu_pid_fds(pid, drm_paths.clone()) else { + let Some(fds) = get_amdgpu_pid_fds(pid, &drm_paths) else { continue; }; diff --git a/src/collection/processes/linux/mod.rs b/src/collection/processes/linux/mod.rs index 992722e3e..a64952f7c 100644 --- a/src/collection/processes/linux/mod.rs +++ b/src/collection/processes/linux/mod.rs @@ -137,7 +137,7 @@ fn read_proc( thread_parent: Option, ) -> CollectionResult<(ProcessHarvest, u64)> { let Process { - pid: _pid, + pid: _, uid, stat, io, @@ -366,8 +366,6 @@ pub(crate) fn linux_process_data( prev_non_idle, } = prev_proc; - // TODO: [PROC THREADS] Add threads - let CpuUsage { mut cpu_usage, cpu_fraction, diff --git a/src/collection/processes/unix/user_table.rs b/src/collection/processes/unix/user_table.rs index a9d483783..b2836a51c 100644 --- a/src/collection/processes/unix/user_table.rs +++ b/src/collection/processes/unix/user_table.rs @@ -8,11 +8,13 @@ pub struct UserTable { } impl UserTable { + /// Get the username associated with a UID. On first access of a name, it will + /// be cached for future accesses. pub fn uid_to_username(&mut self, uid: libc::uid_t) -> CollectionResult { if let Some(user) = self.uid_user_mapping.get(&uid) { Ok(user.clone()) } else { - // SAFETY: getpwuid returns a null pointer if no passwd entry is found for the uid. + // SAFETY: getpwuid returns a null pointer if no passwd entry is found for the uid which we check. let passwd = unsafe { libc::getpwuid(uid) }; if passwd.is_null() { From f5833c5bfb02bf151b94525eab0fb4ebe6109207 Mon Sep 17 00:00:00 2001 From: ClementTsang <34804052+ClementTsang@users.noreply.github.com> Date: Sun, 31 Aug 2025 01:25:54 -0400 Subject: [PATCH 03/14] some todos --- src/collection/amd.rs | 1 + src/collection/processes/linux/process.rs | 1 + 2 files changed, 2 insertions(+) diff --git a/src/collection/amd.rs b/src/collection/amd.rs index 6b8130376..cf59011ef 100644 --- a/src/collection/amd.rs +++ b/src/collection/amd.rs @@ -170,6 +170,7 @@ fn get_amdgpu_pid_fds(pid: u32, device_path: &[String]) -> Option> { let valid_fds: Vec = fd_list .filter_map(|fd_link| { let dir_entry = fd_link.map(|fd_link| fd_link.path()).ok()?; + // TODO: Could we reuse the buffer in some way? let link = rustix::fs::readlink(&dir_entry, vec![]).ok()?; // e.g. "/dev/dri/renderD128" or "/dev/dri/card0" diff --git a/src/collection/processes/linux/process.rs b/src/collection/processes/linux/process.rs index 6d6f5b326..b6c745be9 100644 --- a/src/collection/processes/linux/process.rs +++ b/src/collection/processes/linux/process.rs @@ -250,6 +250,7 @@ impl Process { .next_back() .and_then(|s| s.to_string_lossy().parse::().ok()) .or_else(|| { + // TODO: Could we reuse the buffer in some way? rustix::fs::readlinkat(rustix::fs::CWD, pid_path.as_path(), vec![]) .ok() .and_then(|s| s.to_string_lossy().parse::().ok()) From 85eee5dcdcdf7efd8742c1163739060d03eee6a8 Mon Sep 17 00:00:00 2001 From: ClementTsang <34804052+ClementTsang@users.noreply.github.com> Date: Sun, 31 Aug 2025 02:12:07 -0400 Subject: [PATCH 04/14] hmmm --- src/collection.rs | 5 +++++ src/collection/processes/linux/mod.rs | 14 ++++++++----- src/collection/processes/linux/process.rs | 25 +++++++++++++++-------- 3 files changed, 30 insertions(+), 14 deletions(-) diff --git a/src/collection.rs b/src/collection.rs index 93a16fa10..1d07d8a2d 100644 --- a/src/collection.rs +++ b/src/collection.rs @@ -171,6 +171,8 @@ pub struct DataCollector { prev_idle: f64, #[cfg(target_os = "linux")] prev_non_idle: f64, + #[cfg(target_os = "linux")] + process_buffer: String, #[cfg(feature = "battery")] battery_manager: Option, @@ -224,6 +226,9 @@ impl DataCollector { gpus_total_mem: None, last_list_collection_time: last_collection_time, should_run_less_routine_tasks: true, + #[cfg(target_os = "linux")] + // SAFETY: This is an empty string. + process_buffer: unsafe { String::from_utf8_unchecked(Vec::with_capacity(16_384)) } } } diff --git a/src/collection/processes/linux/mod.rs b/src/collection/processes/linux/mod.rs index a64952f7c..ac8e84a31 100644 --- a/src/collection/processes/linux/mod.rs +++ b/src/collection/processes/linux/mod.rs @@ -404,14 +404,16 @@ pub(crate) fn linux_process_data( }; // TODO: Maybe pre-allocate these buffers in the future w/ routine cleanup. - let mut buffer = String::new(); + // SAFETY: This is safe, we're converting an empty string. let mut process_threads_to_check = HashMap::new(); let mut process_vector: Vec = pids .filter_map(|pid_path| { - if let Ok((process, threads)) = - Process::from_path(pid_path, &mut buffer, args.get_process_threads) - { + if let Ok((process, threads)) = Process::from_path( + pid_path, + &mut collector.process_buffer, + args.get_process_threads, + ) { let pid = process.pid; let prev_proc_details = prev_process_details.entry(pid).or_default(); @@ -455,7 +457,9 @@ pub(crate) fn linux_process_data( // Get thread data. for (pid, tid_paths) in process_threads_to_check { for tid_path in tid_paths { - if let Ok((process, _)) = Process::from_path(tid_path, &mut buffer, false) { + if let Ok((process, _)) = + Process::from_path(tid_path, &mut collector.process_buffer, false) + { let tid = process.pid; let prev_proc_details = prev_process_details.entry(tid).or_default(); diff --git a/src/collection/processes/linux/process.rs b/src/collection/processes/linux/process.rs index b6c745be9..711467603 100644 --- a/src/collection/processes/linux/process.rs +++ b/src/collection/processes/linux/process.rs @@ -3,7 +3,7 @@ use std::{ fs::File, - io::{self, BufRead, BufReader, Read}, + io::{self, BufRead, BufReader}, path::PathBuf, sync::OnceLock, }; @@ -65,10 +65,13 @@ impl Stat { /// Get process stats from a file; this assumes the file is located at /// `/proc//stat`. For documentation, see /// [here](https://manpages.ubuntu.com/manpages/noble/man5/proc_pid_stat.5.html) as a reference. - fn from_file(mut f: File, buffer: &mut String) -> anyhow::Result { + fn from_file(fd: OwnedFd, buffer: &mut String) -> anyhow::Result { // Since this is just one line, we can read it all at once. However, since it // (technically) might have non-utf8 characters, we can't just use read_to_string. - f.read_to_end(unsafe { buffer.as_mut_vec() })?; + // + // TODO: Can we read per delim. token to avoid memory? + // SAFETY: We are only going to be reading strings. + rustix::io::read(&fd, unsafe { buffer.as_mut_vec() })?; // TODO: Is this needed? let line = buffer.trim(); @@ -136,13 +139,15 @@ pub(crate) struct Io { impl Io { #[inline] - fn from_file(f: File, buffer: &mut String) -> anyhow::Result { + fn from_file(fd: OwnedFd, buffer: &mut String) -> anyhow::Result { const NUM_FIELDS: u16 = 0; // Make sure to update this if you want more fields! enum Fields { ReadBytes, WriteBytes, } + let f = File::from(fd); + let mut read_fields = 0; let mut reader = BufReader::new(f); @@ -272,7 +277,7 @@ impl Process { // Stat is pretty long, do this first to pre-allocate up-front. let stat = - open_at(&mut root, "stat", &pid_dir).and_then(|file| Stat::from_file(file, buffer))?; + open_at(&mut root, "stat", &pid_dir).and_then(|fd| Stat::from_file(fd, buffer))?; reset(&mut root, buffer); let cmdline = if cmdline(&mut root, &pid_dir, buffer).is_ok() { @@ -306,20 +311,22 @@ impl Process { #[inline] fn cmdline(root: &mut PathBuf, fd: &OwnedFd, buffer: &mut String) -> anyhow::Result<()> { - let _ = open_at(root, "cmdline", fd).map(|mut file| file.read_to_string(buffer))?; + // SAFETY: This is safe, we are only writing strings. + let _ = open_at(root, "cmdline", fd) + .map(|cmdline_fd| rustix::io::read(cmdline_fd, unsafe { buffer.as_mut_vec() }))?; Ok(()) } -/// Opens a path. Note that this function takes in a mutable root - this will +/// Opens a path and return the file. Note that this function takes in a mutable root - this will /// mutate it to avoid allocations. You probably will want to pop the most /// recent child after if you need to use the buffer again. #[inline] -fn open_at(root: &mut PathBuf, child: &str, fd: &OwnedFd) -> anyhow::Result { +fn open_at(root: &mut PathBuf, child: &str, fd: &OwnedFd) -> anyhow::Result { root.push(child); let new_fd = rustix::fs::openat(fd, &*root, OFlags::RDONLY | OFlags::CLOEXEC, Mode::empty())?; - Ok(File::from(new_fd)) + Ok(new_fd) } #[inline] From d510c74ee7c55de78a6c7e36043c637f80eb9fe1 Mon Sep 17 00:00:00 2001 From: ClementTsang <34804052+ClementTsang@users.noreply.github.com> Date: Mon, 1 Sep 2025 04:42:26 -0400 Subject: [PATCH 05/14] try custom read_link --- src/collection.rs | 4 +-- src/collection/amd.rs | 20 ++++++------ src/collection/linux/utils.rs | 30 +++++++++++++++++- src/collection/processes/linux/mod.rs | 6 ++-- src/collection/processes/linux/process.rs | 38 +++++++++++------------ 5 files changed, 64 insertions(+), 34 deletions(-) diff --git a/src/collection.rs b/src/collection.rs index 1d07d8a2d..447af3b63 100644 --- a/src/collection.rs +++ b/src/collection.rs @@ -227,8 +227,8 @@ impl DataCollector { last_list_collection_time: last_collection_time, should_run_less_routine_tasks: true, #[cfg(target_os = "linux")] - // SAFETY: This is an empty string. - process_buffer: unsafe { String::from_utf8_unchecked(Vec::with_capacity(16_384)) } + // TODO: Maybe pre-allocate this? I've tried this before with 16_384 bytes and it was ok? + process_buffer: String::new() } } diff --git a/src/collection/amd.rs b/src/collection/amd.rs index cf59011ef..84bd153de 100644 --- a/src/collection/amd.rs +++ b/src/collection/amd.rs @@ -11,7 +11,10 @@ use std::{ use hashbrown::{HashMap, HashSet}; use super::linux::utils::is_device_awake; -use crate::{app::layout_manager::UsedWidgets, collection::memory::MemData}; +use crate::{ + app::layout_manager::UsedWidgets, + collection::{linux::utils::read_link, memory::MemData}, +}; // TODO: May be able to clean up some of these, Option for example is a bit redundant. pub struct AmdGpuData { @@ -162,7 +165,7 @@ fn diff_usage(pre: u64, cur: u64, interval: &Duration) -> u64 { } // from amdgpu_top: https://github.com/Umio-Yasuno/amdgpu_top/blob/c961cf6625c4b6d63fda7f03348323048563c584/crates/libamdgpu_top/src/stat/fdinfo/proc_info.rs#L13-L27 -fn get_amdgpu_pid_fds(pid: u32, device_path: &[String]) -> Option> { +fn get_amdgpu_pid_fds(pid: u32, device_path: &[String], buffer: &mut Vec) -> Option> { let Ok(fd_list) = fs::read_dir(format!("/proc/{pid}/fd/")) else { return None; }; @@ -170,14 +173,12 @@ fn get_amdgpu_pid_fds(pid: u32, device_path: &[String]) -> Option> { let valid_fds: Vec = fd_list .filter_map(|fd_link| { let dir_entry = fd_link.map(|fd_link| fd_link.path()).ok()?; - // TODO: Could we reuse the buffer in some way? - let link = rustix::fs::readlink(&dir_entry, vec![]).ok()?; + let link = read_link(&dir_entry, buffer).ok()?; + + crate::info!("link: {link}"); // e.g. "/dev/dri/renderD128" or "/dev/dri/card0" - if device_path - .iter() - .any(|path| link.to_string_lossy().starts_with(path)) - { + if device_path.iter().any(|path| link.starts_with(path)) { dir_entry.file_name()?.to_str()?.parse::().ok() } else { None @@ -228,6 +229,7 @@ fn get_amdgpu_drm(device_path: &Path) -> Option> { fn get_amd_fdinfo(device_path: &Path) -> Option> { let mut fdinfo = HashMap::new(); + let mut buffer = Vec::new(); let drm_paths = get_amdgpu_drm(device_path)?; @@ -258,7 +260,7 @@ fn get_amd_fdinfo(device_path: &Path) -> Option> { for pid in pids { // collect file descriptors that point to our device renderers - let Some(fds) = get_amdgpu_pid_fds(pid, &drm_paths) else { + let Some(fds) = get_amdgpu_pid_fds(pid, &drm_paths, &mut buffer) else { continue; }; diff --git a/src/collection/linux/utils.rs b/src/collection/linux/utils.rs index ff148a6ff..444cd26c0 100644 --- a/src/collection/linux/utils.rs +++ b/src/collection/linux/utils.rs @@ -1,4 +1,6 @@ -use std::{fs, path::Path}; +use std::{borrow::Cow, fs, os::unix::ffi::OsStrExt, path::Path}; + +use libc::PATH_MAX; /// Whether the temperature should *actually* be read during enumeration. /// Will return false if the state is not D0/unknown, or if it does not support @@ -28,3 +30,29 @@ pub fn is_device_awake(device: &Path) -> bool { true } } + +/// A custom implementation to read a symlink while allowing for buffer reuse. +/// +/// If successful, then a [`Cow`] will be returned referencing the contents of `buffer`. +pub(crate) fn read_link<'a>(path: &Path, buffer: &'a mut Vec) -> std::io::Result> { + let c_path = std::ffi::CString::new(path.as_os_str().as_bytes())?; + + if buffer.len() < PATH_MAX as usize { + buffer.resize(PATH_MAX as usize, 0); + } + + // SAFETY: this is a libc API; we must check the length which we do below. + let len = unsafe { + libc::readlink( + c_path.as_ptr(), + buffer.as_mut_ptr() as *mut libc::c_char, + buffer.len(), + ) + }; + + if len < 0 { + return Err(std::io::Error::last_os_error()); + } + Ok(String::from_utf8_lossy(&buffer[..len as usize])) +} + diff --git a/src/collection/processes/linux/mod.rs b/src/collection/processes/linux/mod.rs index ac8e84a31..04f59a34c 100644 --- a/src/collection/processes/linux/mod.rs +++ b/src/collection/processes/linux/mod.rs @@ -342,6 +342,10 @@ pub(crate) struct ReadProcArgs { pub(crate) fn linux_process_data( collector: &mut DataCollector, time_difference_in_secs: u64, ) -> CollectionResult> { + if collector.should_run_less_routine_tasks { + collector.process_buffer = String::new(); + } + let total_memory = collector.total_memory(); let prev_proc = PrevProc { prev_idle: &mut collector.prev_idle, @@ -403,8 +407,6 @@ pub(crate) fn linux_process_data( get_process_threads: get_threads, }; - // TODO: Maybe pre-allocate these buffers in the future w/ routine cleanup. - // SAFETY: This is safe, we're converting an empty string. let mut process_threads_to_check = HashMap::new(); let mut process_vector: Vec = pids diff --git a/src/collection/processes/linux/process.rs b/src/collection/processes/linux/process.rs index 711467603..94a5b3cff 100644 --- a/src/collection/processes/linux/process.rs +++ b/src/collection/processes/linux/process.rs @@ -3,7 +3,7 @@ use std::{ fs::File, - io::{self, BufRead, BufReader}, + io::{self, BufRead, BufReader, Read}, path::PathBuf, sync::OnceLock, }; @@ -16,7 +16,10 @@ use rustix::{ path::Arg, }; -use crate::collection::processes::{Pid, linux::is_str_numeric}; +use crate::collection::{ + linux::utils::read_link, + processes::{Pid, linux::is_str_numeric}, +}; static PAGESIZE: OnceLock = OnceLock::new(); @@ -65,13 +68,10 @@ impl Stat { /// Get process stats from a file; this assumes the file is located at /// `/proc//stat`. For documentation, see /// [here](https://manpages.ubuntu.com/manpages/noble/man5/proc_pid_stat.5.html) as a reference. - fn from_file(fd: OwnedFd, buffer: &mut String) -> anyhow::Result { + fn from_file(mut f: File, buffer: &mut String) -> anyhow::Result { // Since this is just one line, we can read it all at once. However, since it // (technically) might have non-utf8 characters, we can't just use read_to_string. - // - // TODO: Can we read per delim. token to avoid memory? - // SAFETY: We are only going to be reading strings. - rustix::io::read(&fd, unsafe { buffer.as_mut_vec() })?; + f.read_to_end(unsafe { buffer.as_mut_vec() })?; // TODO: Is this needed? let line = buffer.trim(); @@ -139,15 +139,13 @@ pub(crate) struct Io { impl Io { #[inline] - fn from_file(fd: OwnedFd, buffer: &mut String) -> anyhow::Result { + fn from_file(f: File, buffer: &mut String) -> anyhow::Result { const NUM_FIELDS: u16 = 0; // Make sure to update this if you want more fields! enum Fields { ReadBytes, WriteBytes, } - let f = File::from(fd); - let mut read_fields = 0; let mut reader = BufReader::new(f); @@ -255,10 +253,12 @@ impl Process { .next_back() .and_then(|s| s.to_string_lossy().parse::().ok()) .or_else(|| { - // TODO: Could we reuse the buffer in some way? - rustix::fs::readlinkat(rustix::fs::CWD, pid_path.as_path(), vec![]) + // SAFETY: We can do this safely, we plan to only put a valid string in here. + let buffer = unsafe { buffer.as_mut_vec() }; + + read_link(pid_path.as_path(), buffer) .ok() - .and_then(|s| s.to_string_lossy().parse::().ok()) + .and_then(|s| s.parse::().ok()) }) .ok_or_else(|| anyhow!("PID for {pid_path:?} was not found"))?; @@ -277,7 +277,7 @@ impl Process { // Stat is pretty long, do this first to pre-allocate up-front. let stat = - open_at(&mut root, "stat", &pid_dir).and_then(|fd| Stat::from_file(fd, buffer))?; + open_at(&mut root, "stat", &pid_dir).and_then(|file| Stat::from_file(file, buffer))?; reset(&mut root, buffer); let cmdline = if cmdline(&mut root, &pid_dir, buffer).is_ok() { @@ -311,22 +311,20 @@ impl Process { #[inline] fn cmdline(root: &mut PathBuf, fd: &OwnedFd, buffer: &mut String) -> anyhow::Result<()> { - // SAFETY: This is safe, we are only writing strings. - let _ = open_at(root, "cmdline", fd) - .map(|cmdline_fd| rustix::io::read(cmdline_fd, unsafe { buffer.as_mut_vec() }))?; + let _ = open_at(root, "cmdline", fd).map(|mut file| file.read_to_string(buffer))?; Ok(()) } -/// Opens a path and return the file. Note that this function takes in a mutable root - this will +/// Opens a path. Note that this function takes in a mutable root - this will /// mutate it to avoid allocations. You probably will want to pop the most /// recent child after if you need to use the buffer again. #[inline] -fn open_at(root: &mut PathBuf, child: &str, fd: &OwnedFd) -> anyhow::Result { +fn open_at(root: &mut PathBuf, child: &str, fd: &OwnedFd) -> anyhow::Result { root.push(child); let new_fd = rustix::fs::openat(fd, &*root, OFlags::RDONLY | OFlags::CLOEXEC, Mode::empty())?; - Ok(new_fd) + Ok(File::from(new_fd)) } #[inline] From 80d2ef21f0b3e167e64ff01a6588f99a8e716c81 Mon Sep 17 00:00:00 2001 From: ClementTsang <34804052+ClementTsang@users.noreply.github.com> Date: Mon, 1 Sep 2025 04:43:11 -0400 Subject: [PATCH 06/14] fix name --- src/collection/processes/unix/process_ext.rs | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/collection/processes/unix/process_ext.rs b/src/collection/processes/unix/process_ext.rs index d303323b3..9498f0e85 100644 --- a/src/collection/processes/unix/process_ext.rs +++ b/src/collection/processes/unix/process_ext.rs @@ -89,12 +89,7 @@ pub(crate) trait UnixProcessExt { process_state, uid, user: uid - .and_then(|uid| { - user_table - .get_uid_to_username_mapping(uid) - .map(Into::into) - .ok() - }) + .and_then(|uid| user_table.uid_to_username(uid).map(Into::into).ok()) .unwrap_or_else(|| "N/A".into()), time: if process_val.start_time() == 0 { // Workaround for sysinfo occasionally returning a start time equal to UNIX From 7b88dbe27c8ad2304f35b1bc48d9062ba2fcd965 Mon Sep 17 00:00:00 2001 From: ClementTsang <34804052+ClementTsang@users.noreply.github.com> Date: Mon, 1 Sep 2025 04:43:23 -0400 Subject: [PATCH 07/14] fmt --- src/collection/linux/utils.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/collection/linux/utils.rs b/src/collection/linux/utils.rs index 444cd26c0..9358fa629 100644 --- a/src/collection/linux/utils.rs +++ b/src/collection/linux/utils.rs @@ -55,4 +55,3 @@ pub(crate) fn read_link<'a>(path: &Path, buffer: &'a mut Vec) -> std::io::Re } Ok(String::from_utf8_lossy(&buffer[..len as usize])) } - From a226a10be8ccc7f5721a4980591c058c65a2eb45 Mon Sep 17 00:00:00 2001 From: ClementTsang <34804052+ClementTsang@users.noreply.github.com> Date: Mon, 1 Sep 2025 05:13:14 -0400 Subject: [PATCH 08/14] what --- Cargo.toml | 2 +- src/collection/amd.rs | 14 +++++++------- src/collection/linux/utils.rs | 11 ++++++++++- src/collection/processes/linux/process.rs | 1 + 4 files changed, 19 insertions(+), 9 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 4da487544..3f2df0a1d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -124,7 +124,7 @@ strum = { version = "0.27.2", features = ["derive"], optional = true } libc = "0.2.175" [target.'cfg(target_os = "linux")'.dependencies] -rustix = { version = "1.0.8", features = ["fs", "param", "alloc"] } +rustix = { version = "1.0.8", features = ["fs", "param"] } [target.'cfg(target_os = "macos")'.dependencies] core-foundation = "0.10.1" diff --git a/src/collection/amd.rs b/src/collection/amd.rs index 84bd153de..0c4729d8e 100644 --- a/src/collection/amd.rs +++ b/src/collection/amd.rs @@ -171,15 +171,15 @@ fn get_amdgpu_pid_fds(pid: u32, device_path: &[String], buffer: &mut Vec) -> }; let valid_fds: Vec = fd_list - .filter_map(|fd_link| { - let dir_entry = fd_link.map(|fd_link| fd_link.path()).ok()?; - let link = read_link(&dir_entry, buffer).ok()?; - - crate::info!("link: {link}"); + .flatten() + .filter_map(|entry| { + let path = entry.path(); + let link = read_link(path.as_path(), buffer).ok()?; // e.g. "/dev/dri/renderD128" or "/dev/dri/card0" if device_path.iter().any(|path| link.starts_with(path)) { - dir_entry.file_name()?.to_str()?.parse::().ok() + // TODO: Should we bother parsing here? + path.file_name()?.to_str()?.parse::().ok() } else { None } @@ -265,7 +265,6 @@ fn get_amd_fdinfo(device_path: &Path) -> Option> { }; let mut usage: AmdGpuProc = Default::default(); - let mut observed_ids: HashSet = HashSet::new(); for fd in fds { @@ -277,6 +276,7 @@ fn get_amd_fdinfo(device_path: &Path) -> Option> { let mut fdinfo_lines = fdinfo_data .lines() .skip_while(|l| !l.starts_with("drm-client-id")); + if let Some(id) = fdinfo_lines.next().and_then(|fdinfo_line| { const LEN: usize = "drm-client-id:\t".len(); fdinfo_line.get(LEN..)?.parse().ok() diff --git a/src/collection/linux/utils.rs b/src/collection/linux/utils.rs index 9358fa629..46629f95a 100644 --- a/src/collection/linux/utils.rs +++ b/src/collection/linux/utils.rs @@ -31,12 +31,21 @@ pub fn is_device_awake(device: &Path) -> bool { } } -/// A custom implementation to read a symlink while allowing for buffer reuse. +/// A custom implementation to read a symlink while allowing for buffer reuse. If the path is +/// not a symlink, this will also return an error. /// /// If successful, then a [`Cow`] will be returned referencing the contents of `buffer`. pub(crate) fn read_link<'a>(path: &Path, buffer: &'a mut Vec) -> std::io::Result> { + if !path.is_symlink() { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + "path is not a symlink", + )); + } + let c_path = std::ffi::CString::new(path.as_os_str().as_bytes())?; + buffer.clear(); if buffer.len() < PATH_MAX as usize { buffer.resize(PATH_MAX as usize, 0); } diff --git a/src/collection/processes/linux/process.rs b/src/collection/processes/linux/process.rs index 94a5b3cff..1ca6e76a7 100644 --- a/src/collection/processes/linux/process.rs +++ b/src/collection/processes/linux/process.rs @@ -271,6 +271,7 @@ impl Process { }; let mut root = pid_path; + buffer.clear(); // NB: Whenever you add a new stat, make sure to pop the root and clear the // buffer! From e3398d8f0a0c5dc7eb706e9f5e03e4cf893fd313 Mon Sep 17 00:00:00 2001 From: ClementTsang <34804052+ClementTsang@users.noreply.github.com> Date: Mon, 1 Sep 2025 05:23:24 -0400 Subject: [PATCH 09/14] driveby fix for dash cmdline bin name --- src/collection/processes/linux/mod.rs | 42 ++++++++++++++++----------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/src/collection/processes/linux/mod.rs b/src/collection/processes/linux/mod.rs index 04f59a34c..dd5e6bcaf 100644 --- a/src/collection/processes/linux/mod.rs +++ b/src/collection/processes/linux/mod.rs @@ -224,12 +224,6 @@ fn read_proc( // If the comm fits then we'll default to whatever is set. // If it doesn't, we need to do some magic to determine what it's // supposed to be. - // - // We follow something similar to how htop does it to identify a valid name based on the cmdline. - // - https://github.com/htop-dev/htop/blob/bcb18ef82269c68d54a160290e5f8b2e939674ec/Process.c#L268 (kinda) - // - https://github.com/htop-dev/htop/blob/bcb18ef82269c68d54a160290e5f8b2e939674ec/Process.c#L573 - // - // Also note that cmdline is (for us) separated by \0. // TODO: We might want to re-evaluate if we want to do it like this, // as it turns out I was dumb and sometimes comm != process name... @@ -241,7 +235,7 @@ fn read_proc( // // Stuff like htop also offers the option to "highlight" basename and comm in command. Might be neat? let name = if comm.len() >= MAX_STAT_NAME_LEN { - name_from_cmdline(&cmdline) + binary_name_from_cmdline(&cmdline) } else { comm }; @@ -296,7 +290,12 @@ fn read_proc( )) } -fn name_from_cmdline(cmdline: &str) -> String { +/// We follow something similar to how htop does it to identify a valid name based on the cmdline. +/// - https://github.com/htop-dev/htop/blob/bcb18ef82269c68d54a160290e5f8b2e939674ec/Process.c#L268 (kinda) +/// - https://github.com/htop-dev/htop/blob/bcb18ef82269c68d54a160290e5f8b2e939674ec/Process.c#L573 +/// +/// Also note that cmdline is (for us) separated by \0. +fn binary_name_from_cmdline(cmdline: &str) -> String { let mut start = 0; let mut end = cmdline.len(); @@ -309,7 +308,12 @@ fn name_from_cmdline(cmdline: &str) -> String { } } - cmdline[start..end].to_string() + // Bit of a hack to handle cases like "firefox -blah" + let partial = &cmdline[start..end]; + partial + .split_once(" -") + .map(|(name, _)| name.to_string()) + .unwrap_or_else(|| partial.to_string()) } pub(crate) struct PrevProc<'a> { @@ -537,16 +541,20 @@ mod tests { #[test] fn test_name_from_cmdline() { - assert_eq!(name_from_cmdline("/usr/bin/btm"), "btm"); - assert_eq!(name_from_cmdline("/usr/bin/btm\0--asdf\0--asdf/gkj"), "btm"); - assert_eq!(name_from_cmdline("/usr/bin/btm:"), "btm"); - assert_eq!(name_from_cmdline("/usr/bin/b tm"), "b tm"); - assert_eq!(name_from_cmdline("/usr/bin/b tm:"), "b tm"); - assert_eq!(name_from_cmdline("/usr/bin/b tm\0--test"), "b tm"); - assert_eq!(name_from_cmdline("/usr/bin/b tm:\0--test"), "b tm"); + assert_eq!(binary_name_from_cmdline("/usr/bin/btm"), "btm"); + assert_eq!(binary_name_from_cmdline("/usr/bin/btm\0--asdf\0--asdf/gkj"), "btm"); + assert_eq!(binary_name_from_cmdline("/usr/bin/btm:"), "btm"); + assert_eq!(binary_name_from_cmdline("/usr/bin/b tm"), "b tm"); + assert_eq!(binary_name_from_cmdline("/usr/bin/b tm:"), "b tm"); + assert_eq!(binary_name_from_cmdline("/usr/bin/b tm\0--test"), "b tm"); + assert_eq!(binary_name_from_cmdline("/usr/bin/b tm:\0--test"), "b tm"); assert_eq!( - name_from_cmdline("/usr/bin/b t m:\0--\"test thing\""), + binary_name_from_cmdline("/usr/bin/b t m:\0--\"test thing\""), "b t m" ); + assert_eq!( + binary_name_from_cmdline("firefox -contentproc -isForBrowser -prefsHandle 0"), + "firefox" + ); } } From 462cf76061b14885f27778dfe969ef7f3e6225db Mon Sep 17 00:00:00 2001 From: ClementTsang <34804052+ClementTsang@users.noreply.github.com> Date: Mon, 1 Sep 2025 05:23:50 -0400 Subject: [PATCH 10/14] fmt --- src/collection/processes/linux/mod.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/collection/processes/linux/mod.rs b/src/collection/processes/linux/mod.rs index dd5e6bcaf..c3fed440a 100644 --- a/src/collection/processes/linux/mod.rs +++ b/src/collection/processes/linux/mod.rs @@ -542,7 +542,10 @@ mod tests { #[test] fn test_name_from_cmdline() { assert_eq!(binary_name_from_cmdline("/usr/bin/btm"), "btm"); - assert_eq!(binary_name_from_cmdline("/usr/bin/btm\0--asdf\0--asdf/gkj"), "btm"); + assert_eq!( + binary_name_from_cmdline("/usr/bin/btm\0--asdf\0--asdf/gkj"), + "btm" + ); assert_eq!(binary_name_from_cmdline("/usr/bin/btm:"), "btm"); assert_eq!(binary_name_from_cmdline("/usr/bin/b tm"), "b tm"); assert_eq!(binary_name_from_cmdline("/usr/bin/b tm:"), "b tm"); From 8343fb778e1d0e8c44150ad898ba146d1bfc53b2 Mon Sep 17 00:00:00 2001 From: ClementTsang <34804052+ClementTsang@users.noreply.github.com> Date: Mon, 1 Sep 2025 05:25:59 -0400 Subject: [PATCH 11/14] probably not needed to check for symlink --- src/collection/linux/utils.rs | 12 ++++++------ src/collection/processes/linux/process.rs | 9 ++++++--- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/src/collection/linux/utils.rs b/src/collection/linux/utils.rs index 46629f95a..b8d017ba9 100644 --- a/src/collection/linux/utils.rs +++ b/src/collection/linux/utils.rs @@ -36,12 +36,12 @@ pub fn is_device_awake(device: &Path) -> bool { /// /// If successful, then a [`Cow`] will be returned referencing the contents of `buffer`. pub(crate) fn read_link<'a>(path: &Path, buffer: &'a mut Vec) -> std::io::Result> { - if !path.is_symlink() { - return Err(std::io::Error::new( - std::io::ErrorKind::InvalidInput, - "path is not a symlink", - )); - } + // if !path.is_symlink() { + // return Err(std::io::Error::new( + // std::io::ErrorKind::InvalidInput, + // "path is not a symlink", + // )); + // } let c_path = std::ffi::CString::new(path.as_os_str().as_bytes())?; diff --git a/src/collection/processes/linux/process.rs b/src/collection/processes/linux/process.rs index 1ca6e76a7..ad3cf7aee 100644 --- a/src/collection/processes/linux/process.rs +++ b/src/collection/processes/linux/process.rs @@ -256,9 +256,13 @@ impl Process { // SAFETY: We can do this safely, we plan to only put a valid string in here. let buffer = unsafe { buffer.as_mut_vec() }; - read_link(pid_path.as_path(), buffer) + let out = read_link(pid_path.as_path(), buffer) .ok() - .and_then(|s| s.parse::().ok()) + .and_then(|s| s.parse::().ok()); + + buffer.clear(); + + out }) .ok_or_else(|| anyhow!("PID for {pid_path:?} was not found"))?; @@ -271,7 +275,6 @@ impl Process { }; let mut root = pid_path; - buffer.clear(); // NB: Whenever you add a new stat, make sure to pop the root and clear the // buffer! From 2900ed03c0f2d5834513dabd4a673169d220901c Mon Sep 17 00:00:00 2001 From: ClementTsang <34804052+ClementTsang@users.noreply.github.com> Date: Mon, 1 Sep 2025 05:28:03 -0400 Subject: [PATCH 12/14] why was this collecting --- src/collection/amd.rs | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/src/collection/amd.rs b/src/collection/amd.rs index 0c4729d8e..171681987 100644 --- a/src/collection/amd.rs +++ b/src/collection/amd.rs @@ -237,26 +237,24 @@ fn get_amd_fdinfo(device_path: &Path) -> Option> { return None; }; - let pids: Vec = proc_dir - .filter_map(|dir_entry| { - // check if pid is valid - let dir_entry = dir_entry.ok()?; - let metadata = dir_entry.metadata().ok()?; - - if !metadata.is_dir() { - return None; - } + let pids = proc_dir.filter_map(|dir_entry| { + // check if pid is valid + let dir_entry = dir_entry.ok()?; + let metadata = dir_entry.metadata().ok()?; - let pid = dir_entry.file_name().to_str()?.parse::().ok()?; + if !metadata.is_dir() { + return None; + } - // skip init process - if pid == 1 { - return None; - } + let pid = dir_entry.file_name().to_str()?.parse::().ok()?; - Some(pid) - }) - .collect(); + // skip init process/systemd + if pid == 1 { + return None; + } + + Some(pid) + }); for pid in pids { // collect file descriptors that point to our device renderers From 58565213b4b12a42ea20b36bbd328d3285ba0cd8 Mon Sep 17 00:00:00 2001 From: Clement Tsang <34804052+ClementTsang@users.noreply.github.com> Date: Mon, 1 Sep 2025 17:18:20 -0400 Subject: [PATCH 13/14] okay so temp commit before I rewrite this whole thing --- src/collection/amd.rs | 10 ++++++++++ src/collection/processes/linux/process.rs | 3 +++ 2 files changed, 13 insertions(+) diff --git a/src/collection/amd.rs b/src/collection/amd.rs index 171681987..dd7799ca7 100644 --- a/src/collection/amd.rs +++ b/src/collection/amd.rs @@ -227,9 +227,11 @@ fn get_amdgpu_drm(device_path: &Path) -> Option> { } } +// Based on https://github.com/Umio-Yasuno/amdgpu_top/blob/c961cf6625c4b6d63fda7f03348323048563c584/crates/libamdgpu_top/src/stat/fdinfo/proc_info.rs#L13-L27. fn get_amd_fdinfo(device_path: &Path) -> Option> { let mut fdinfo = HashMap::new(); let mut buffer = Vec::new(); + const SYSTEMD_TO_SKIP: &[&str] = &["/lib/systemd", "/usr/lib/systemd"]; let drm_paths = get_amdgpu_drm(device_path)?; @@ -253,6 +255,13 @@ fn get_amd_fdinfo(device_path: &Path) -> Option> { return None; } + // TODO: We can instead refer to our already-obtained processes? I think we could maybe just do + // this with processes at the same time... + let cmdline = fs::read_to_string(format!("/proc/{pid}/cmdline")).ok()?; + if SYSTEMD_TO_SKIP.iter().any(|path| cmdline.starts_with(path)) { + return None; + } + Some(pid) }); @@ -335,6 +344,7 @@ pub fn get_amd_vecs(widgets_to_harvest: &UsedWidgets, prev_time: Instant) -> Opt let mut proc_vec = Vec::with_capacity(num_gpu); let mut total_mem = 0; + // TODO: We can optimize this to do this all in one pass, rather than for loop + for loop. This reduces syscalls. for device_path in device_path_list { let device_name = get_amd_name(&device_path) .unwrap_or(amd_gpu_marketing::AMDGPU_DEFAULT_NAME.to_string()); diff --git a/src/collection/processes/linux/process.rs b/src/collection/processes/linux/process.rs index ad3cf7aee..a55736fab 100644 --- a/src/collection/processes/linux/process.rs +++ b/src/collection/processes/linux/process.rs @@ -76,6 +76,9 @@ impl Stat { // TODO: Is this needed? let line = buffer.trim(); + // TODO: comm is max 16, so we could in theory pre-allocate this. Also get it from /proc/pid/comm instead? + // They slightly differ though, see https://unix.stackexchange.com/questions/769962/thread-name-is-proc-pid-comm-always-identical-to-the-name-line-of-proc-pid-s + let (comm, rest) = { let start_paren = line .find('(') From 73edcd9b29e3fdccb0fee97b11fddd6d54355aac Mon Sep 17 00:00:00 2001 From: ClementTsang <34804052+ClementTsang@users.noreply.github.com> Date: Sun, 14 Sep 2025 19:00:43 -0400 Subject: [PATCH 14/14] hmmmm --- src/collection/processes/linux/gpu.rs | 44 +++++++++++++++++++++++++++ src/collection/processes/linux/mod.rs | 1 + 2 files changed, 45 insertions(+) create mode 100644 src/collection/processes/linux/gpu.rs diff --git a/src/collection/processes/linux/gpu.rs b/src/collection/processes/linux/gpu.rs new file mode 100644 index 000000000..d2f8ec0c9 --- /dev/null +++ b/src/collection/processes/linux/gpu.rs @@ -0,0 +1,44 @@ +//! Extract GPU process information on Linux. + +use std::{os::fd::BorrowedFd, path::Path}; + +use hashbrown::HashSet; +use rustix::fs::{Mode, OFlags}; + +use crate::collection::processes::Pid; + +fn is_drm_fd(fd: &BorrowedFd<'_>) -> bool { + true +} + +/// Get fdinfo for a process given the PID. +/// +/// Based on the method from nvtop [here](https://github.com/Syllo/nvtop/blob/339ee0b10a64ec51f43d27357b0068a40f16e9e4/src/extract_processinfo_fdinfo.c#L101). +pub(crate) fn get_fdinfo(pid: Pid, seen_fds: &mut HashSet) { + let fdinfo_path = format!("/proc/{pid}/fdinfo"); + let fdinfo_path = Path::new(&fdinfo_path); + + let Ok(fd_entries) = std::fs::read_dir(fdinfo_path) else { + return; + }; + + for fd_entry in fd_entries.flatten() { + let path = fd_entry.path(); + + if !path.is_file() { + continue; + } + + if !(path.to_string_lossy().chars().all(|c| c.is_ascii_digit())) { + continue; + } + + let Ok(fd) = rustix::fs::openat( + pid_path.as_path(), + OFlags::PATH | OFlags::DIRECTORY | OFlags::CLOEXEC, + Mode::empty(), + ) else { + continue; + }; + } +} diff --git a/src/collection/processes/linux/mod.rs b/src/collection/processes/linux/mod.rs index c3fed440a..d59302314 100644 --- a/src/collection/processes/linux/mod.rs +++ b/src/collection/processes/linux/mod.rs @@ -1,5 +1,6 @@ //! Process data collection for Linux. +mod gpu; mod process; use std::{