Skip to content
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 33 additions & 3 deletions src/tsc_now.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

//! This module will be compiled when it's either linux_x86 or linux_x86_64.

#[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
use core::sync::atomic::compiler_fence;
#[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
use core::sync::atomic::Ordering;
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: move this import to the block use it below. And I remember we prefer use std:: since this crate requires std.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Addressed. The original code is generated by LLM with full qualified path, so I told LLM to import the function or method instead.

  • Use use imports instead of fully qualified paths. For example, import std::sync::atomic::compiler_fence and use compiler_fence(...) rather than std::sync::atomic::compiler_fence(...).

Now I added another convention:

  • Scope use imports inside cfg blocks rather than at module top level when they are only needed there

use std::cell::UnsafeCell;
use std::fs::read_to_string;
use std::io::ErrorKind;
Expand Down Expand Up @@ -132,6 +136,7 @@ fn has_invariant_tsc() -> bool {
use core::arch::x86_64::__cpuid;

#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[allow(unused_unsafe)]
Comment thread
tisonkun marked this conversation as resolved.
Outdated
unsafe {
let cpuid_invariant_tsc_bts = 1 << 8;
__cpuid(0x80000000).eax >= 0x80000007
Expand Down Expand Up @@ -163,15 +168,22 @@ fn _cycles_per_sec() -> (u64, Instant, u64) {
let mut last_tsc;
let mut old_cycles = 0.0;

loop {
'outer: loop {
let (t1, tsc1) = monotonic_with_tsc();
loop {
let (t2, tsc2) = monotonic_with_tsc();
last_monotonic = t2;
last_tsc = tsc2;
let elapsed_nanos = (t2 - t1).as_nanos();
if elapsed_nanos > 10_000_000 {
cycles_per_sec = (tsc2 - tsc1) as f64 * 1_000_000_000.0 / elapsed_nanos as f64;
// Even with fence added in monotonic_with_tsc(), tsc2 < tsc1 is still possible
// if the thread migrates to a different CPU core between samples
// (cores may have slightly different TSC offsets). checked_sub
// prevents overflow; we retry from the outer loop with fresh tsc1.
let Some(delta) = tsc2.checked_sub(tsc1) else {
continue 'outer;
};
cycles_per_sec = delta as f64 * 1_000_000_000.0 / elapsed_nanos as f64;
break;
}
}
Expand All @@ -189,7 +201,25 @@ fn _cycles_per_sec() -> (u64, Instant, u64) {
/// get interrupted in half way may happen, they aren't guaranteed
/// to represent the same instant.
fn monotonic_with_tsc() -> (Instant, u64) {
(Instant::now(), tsc())
// RDTSC is not serializing; LFENCE ensures Instant::now() completes first.
#[cfg(any(target_arch = "x86_64", target_feature = "sse2"))]
{
#[cfg(target_arch = "x86")]
use core::arch::x86::_mm_lfence;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::_mm_lfence;
let t = Instant::now();
unsafe {
_mm_lfence();
}
(t, tsc())
}
#[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
{
let t = Instant::now();
compiler_fence(Ordering::SeqCst);
(t, tsc())
}
}

#[inline]
Expand Down
Loading