From b2666fa0e613bde4aa6186ac9645a1d0c0323abf Mon Sep 17 00:00:00 2001 From: Sayed Adel Date: Tue, 30 Sep 2025 20:34:45 +0300 Subject: [PATCH] BLD: fallback to `__builtin_prefetch` on `clang-cl` with `-mno-mmx` `clang-cl` requires `MMX` for ``_mm_prefetch`. With `-mno-mmx`, `__MMX__` is undefined and the build fails in cache_control.h. Use `__builtin_prefetch` in this configuration; keep `_mm_prefetch` everywhere else. Fixes NumPy's CI failure on Windows without MMX. --- hwy/cache_control.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/hwy/cache_control.h b/hwy/cache_control.h index b03341ba94..90743cd3f2 100644 --- a/hwy/cache_control.h +++ b/hwy/cache_control.h @@ -98,9 +98,10 @@ template HWY_INLINE HWY_ATTR_CACHE void Prefetch(const T* p) { (void)p; #ifndef HWY_DISABLE_CACHE_CONTROL -#if HWY_ARCH_X86 +// Use _mm_prefetch on x86/x64, except when clang-cl is compiled with -mno-mmx. +#if HWY_ARCH_X86 && !(HWY_COMPILER_CLANGCL && !defined(__MMX__)) _mm_prefetch(reinterpret_cast(p), _MM_HINT_T0); -#elif HWY_COMPILER_GCC // includes clang +#elif HWY_COMPILER_GCC || HWY_COMPILER_CLANGCL // includes clang // Hint=0 (NTA) behavior differs, but skipping outer caches is probably not // desirable, so use the default 3 (keep in caches). __builtin_prefetch(p, /*write=*/0, /*hint=*/3);