diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml index 5229955a..4b79385f 100644 --- a/.github/workflows/pr.yml +++ b/.github/workflows/pr.yml @@ -47,7 +47,6 @@ jobs: # Install rust curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y source "${HOME}/.cargo/env" - rustup default nightly wget https://apt.llvm.org/llvm.sh bash ./llvm.sh 21 apt install -y --no-install-recommends libmlir-21-dev mlir-21-tools libpolly-21-dev @@ -66,7 +65,7 @@ jobs: - name: Clippy run: | source "${HOME}/.cargo/env" - cargo clippy + cargo clippy -- --deny clippy::all --allow clippy::missing-safety-doc --allow clippy::type_complexity - name: Test (compile only) run: | diff --git a/cuda-async/src/device_future.rs b/cuda-async/src/device_future.rs index 9f78b34e..2b20acfc 100644 --- a/cuda-async/src/device_future.rs +++ b/cuda-async/src/device_future.rs @@ -33,7 +33,7 @@ pub enum DeviceFutureState { } /// Shared state between a CUDA stream callback and the async waker. -#[derive(Debug)] +#[derive(Debug, Default)] pub struct StreamCallbackState { pub(crate) waker: AtomicWaker, pub(crate) complete: AtomicBool, @@ -42,10 +42,7 @@ pub struct StreamCallbackState { impl StreamCallbackState { /// Creates a new callback state with the completion flag unset. pub fn new() -> Self { - Self { - waker: AtomicWaker::new(), - complete: AtomicBool::new(false), - } + Self::default() } /// Marks the operation as complete and wakes the associated task. pub fn signal(&self) { diff --git a/cuda-async/src/device_operation.rs b/cuda-async/src/device_operation.rs index 6a226499..45fce080 100644 --- a/cuda-async/src/device_operation.rs +++ b/cuda-async/src/device_operation.rs @@ -391,7 +391,7 @@ pub trait DeviceOp: /// - `dup`, `copy_host_vec_to_device` /// /// See [`Scope`](crate::cuda_graph::Scope) for the full safety proof. -pub trait GraphNode: DeviceOp {} +pub trait GraphNode {} // Arc @@ -987,10 +987,12 @@ where if !self.computed.load(Ordering::Acquire) { // Safety: This block is guaranteed to execute at most once. // Put the input in a box so the pointer is dropped when this block exits. - let input = unsafe { (&mut *self.input.get()).take() }.ok_or(device_error( - context.get_device_id(), - "Select operation failed.", - ))?; + let input = self.input.get(); + let input = unsafe { input.as_mut() }; + let input = input + .unwrap() + .take() + .ok_or_else(|| device_error(context.get_device_id(), "Select operation failed."))?; let (left, right) = input.execute(context)?; // Update internal state. unsafe { @@ -1002,12 +1004,14 @@ where Ok(()) } unsafe fn left(&self) -> T1 { - let left = unsafe { (&mut *self.left.get()).take() }.unwrap(); - left + let cell = self.left.get(); + let cell = unsafe { cell.as_mut() }; + cell.unwrap().take().unwrap() } unsafe fn right(&self) -> T2 { - let right = unsafe { (&mut *self.right.get()).take() }.unwrap(); - right + let cell = self.right.get(); + let cell = unsafe { cell.as_mut() }; + cell.unwrap().take().unwrap() } } diff --git a/cuda-async/src/launch.rs b/cuda-async/src/launch.rs index a7abe072..ceb0db7c 100644 --- a/cuda-async/src/launch.rs +++ b/cuda-async/src/launch.rs @@ -33,9 +33,9 @@ impl Drop for AsyncKernelLaunch { let _ = self .args .iter() - .map(|arg| { + .map(|&arg| { // Reconstruct the boxes. Pointers will be dropped when they go out of scope. - unsafe { Box::from_raw(*arg) } + unsafe { Box::from_raw(arg as *mut usize) } }) .collect::>(); } diff --git a/cuda-core/src/cudarc_shim.rs b/cuda-core/src/cudarc_shim.rs index 7afaac98..18491992 100644 --- a/cuda-core/src/cudarc_shim.rs +++ b/cuda-core/src/cudarc_shim.rs @@ -40,7 +40,6 @@ pub(crate) mod primary_ctx { } /// Low-level device query operations. - #[allow(dead_code)] pub(crate) mod device { @@ -191,7 +190,6 @@ pub(crate) mod ctx { } /// Low-level CUDA stream operations. - #[allow(dead_code)] pub(crate) mod stream { use super::{DriverError, IntoResult}; diff --git a/cutile-ir/src/bytecode/encoding.rs b/cutile-ir/src/bytecode/encoding.rs index f575b9e1..32b72cdc 100644 --- a/cutile-ir/src/bytecode/encoding.rs +++ b/cutile-ir/src/bytecode/encoding.rs @@ -20,16 +20,13 @@ pub struct EncodingWriter { impl EncodingWriter { pub fn new() -> Self { - Self { - buf: Vec::new(), - required_alignment: 1, - } + Self::default() } pub fn with_capacity(cap: usize) -> Self { Self { buf: Vec::with_capacity(cap), - required_alignment: 1, + ..Self::default() } } @@ -223,6 +220,15 @@ impl EncodingWriter { } } +impl Default for EncodingWriter { + fn default() -> Self { + Self { + buf: Default::default(), + required_alignment: 1, + } + } +} + /// Patch a `u32` value at `offset` in the buffer (little-endian). pub fn patch_u32(buf: &mut [u8], offset: usize, value: u32) { buf[offset..offset + 4].copy_from_slice(&value.to_le_bytes()); @@ -270,7 +276,7 @@ fn convert_to_f8( // Handle special values. if f64_exp == 0x7FF { // Inf or NaN - if f64_man != 0 || (nan_only_all_ones && f64_man == 0) { + if f64_man != 0 || nan_only_all_ones { // NaN (or Inf mapped to NaN for formats without infinities) if nan_only_all_ones { return (sign << 7) | ((max_exp as u8) << man_bits) | man_mask; diff --git a/cutile-ir/src/ir/fmt.rs b/cutile-ir/src/ir/fmt.rs index b6dfd1f6..d17641f3 100644 --- a/cutile-ir/src/ir/fmt.rs +++ b/cutile-ir/src/ir/fmt.rs @@ -1072,7 +1072,7 @@ impl<'a> ModulePrinter<'a> { let pad = " ".repeat(self.indent); // Operands: [lb, ub, step, init_values...] - let lb = op.operands.get(0).map(|v| v.index()); + let lb = op.operands.first().map(|v| v.index()); let ub = op.operands.get(1).map(|v| v.index()); let step = op.operands.get(2).map(|v| v.index()); let init_values = &op.operands[3.min(op.operands.len())..]; @@ -2601,7 +2601,7 @@ fn format_dense_i32_array(attr: &Attribute) -> String { .collect(); format!("[{}]", elems.join(", ")) } - _ => format!("{}", format_attr(attr)), + _ => format_attr(attr).to_string(), } } diff --git a/cutile-ir/src/ir/types.rs b/cutile-ir/src/ir/types.rs index 8bcce05a..1b572053 100644 --- a/cutile-ir/src/ir/types.rs +++ b/cutile-ir/src/ir/types.rs @@ -266,11 +266,7 @@ fn strip_prefix_suffix<'a>(s: &'a str, prefix: &str, _suffix: &str) -> Option<&' } } // No matching close — try without nesting (just strip last char if it's '>'). - if after_prefix.ends_with('>') { - Some(&after_prefix[..after_prefix.len() - 1]) - } else { - None - } + after_prefix.strip_suffix('>') } fn parse_scalar(s: &str) -> Option { @@ -314,7 +310,7 @@ fn parse_tile(inner: &str) -> Option { before .trim_end_matches('x') .split('x') - .map(|d| parse_dim(d)) + .map(parse_dim) .collect() }; let ptr_inner_start = ptr_start + "ptr<".len(); @@ -368,7 +364,7 @@ fn parse_tensor_view(inner: &str) -> Option { let strides = if let Some(sp) = strides_part { let sp = sp.trim_start_matches('[').trim_end_matches(']'); - sp.split(',').map(|s| parse_dim(s)).collect() + sp.split(',').map(parse_dim).collect() } else { vec![DYNAMIC; shape.len()] }; diff --git a/cutile-macro/src/_module.rs b/cutile-macro/src/_module.rs index 6bc66756..20585200 100644 --- a/cutile-macro/src/_module.rs +++ b/cutile-macro/src/_module.rs @@ -411,7 +411,7 @@ pub fn trait_(mut item: ItemTrait) -> Result { ); let res = match attributes { Some(attributes) - if attributes.name_as_str().as_deref() == Some("cuda_tile :: variadic_trait") => + if attributes.name_as_str() == Some("cuda_tile :: variadic_trait".into()) => { desugar_variadic_trait_decl(&item)? } diff --git a/cutile-macro/src/rank_instantiation.rs b/cutile-macro/src/rank_instantiation.rs index f7d66b19..cb14990f 100644 --- a/cutile-macro/src/rank_instantiation.rs +++ b/cutile-macro/src/rank_instantiation.rs @@ -1109,9 +1109,7 @@ impl RankInstantiator { /// Rewrite a free-fn signature (generics, args, return) and its body. pub fn rewrite_function(mut self, item: &ItemFn) -> Result { let mut item = item.clone(); - if let Err(e) = rewrite_fn_sig(&mut item.sig, &self.bindings) { - return Err(e); - } + rewrite_fn_sig(&mut item.sig, &self.bindings)?; self.visit_block_mut(&mut item.block); self.into_result(item) } @@ -1125,9 +1123,7 @@ impl RankInstantiator { Ok(t) => *item.self_ty = t, Err(e) => return Err(e), } - if let Err(e) = rewrite_generics_for_rank(&mut item.generics, &self.bindings) { - return Err(e); - } + rewrite_generics_for_rank(&mut item.generics, &self.bindings)?; if let Some(trait_) = &mut item.trait_ { let path = &mut trait_.1; if path.segments.is_empty() { @@ -1138,9 +1134,7 @@ impl RankInstantiator { } let last_seg = path.segments.last_mut().unwrap(); if let PathArguments::AngleBracketed(path_args) = &mut last_seg.arguments { - if let Err(e) = rewrite_generic_args_for_rank(path_args, &self.bindings) { - return Err(e); - } + rewrite_generic_args_for_rank(path_args, &self.bindings)? } } @@ -1164,8 +1158,8 @@ impl RankInstantiator { } let mut result = fn_impl.clone(); self.rewrite_impl_method(&original_self_ty, &mut result); - if self.error.is_some() { - return Err(self.error.unwrap()); + if let Some(error) = self.error { + return Err(error); } impl_items.push(ImplItem::Fn(result)); } diff --git a/cutile-macro/src/shadow_dispatch.rs b/cutile-macro/src/shadow_dispatch.rs index c6bd1193..d4f1c499 100644 --- a/cutile-macro/src/shadow_dispatch.rs +++ b/cutile-macro/src/shadow_dispatch.rs @@ -477,10 +477,8 @@ impl RankPolyOpSpec { } // Rank-dependent non-shape arg types as trait generics (e.g. `Idx0` // for `idx: [i32; N]`). Caller's array literal pins them. - for slot in &self.rank_dep_arg_idents { - if let Some(id) = slot { - all_trait_params.push(quote! { #id }); - } + for id in self.rank_dep_arg_idents.iter().flatten() { + all_trait_params.push(quote! { #id }); } if let Some(ref out) = extra_out_trait_param { all_trait_params.push(out.clone()); @@ -565,7 +563,7 @@ impl RankPolyOpSpec { let mut return_concrete = rewrite_ty_for_rank(&self.return_type, combo, &self.cgas); for (orig, replacement) in self.dead_lifetimes.iter().zip(self.dead_lt_idents.iter()) { return_concrete = - replace_lifetimes_with(&return_concrete, &[orig.clone()], replacement); + replace_lifetimes_with(&return_concrete, std::slice::from_ref(orig), replacement); } let mut trait_instantiation_args: Vec = Vec::new(); @@ -773,10 +771,8 @@ impl RankPolyOpSpec { trait_args.push(quote! { #i }); } // Rank-dep arg generics, matching trait declaration ordering. - for slot in &self.rank_dep_arg_idents { - if let Some(id) = slot { - trait_args.push(quote! { #id }); - } + for id in self.rank_dep_arg_idents.iter().flatten() { + trait_args.push(quote! { #id }); } if use_free_out { trait_args.push(quote! { #out_ident }); @@ -827,10 +823,8 @@ impl RankPolyOpSpec { for i in &extra_shape_generic_idents { all_wrapper_generics.push(quote! { #i }); } - for slot in &self.rank_dep_arg_idents { - if let Some(id) = slot { - all_wrapper_generics.push(quote! { #id }); - } + for id in self.rank_dep_arg_idents.iter().flatten() { + all_wrapper_generics.push(quote! { #id }); } if use_free_out { all_wrapper_generics.push(quote! { #out_ident }); @@ -1744,7 +1738,7 @@ pub fn desugar_variadic_trait_decl(item: &ItemTrait) -> Result = new_sig .inputs @@ -2212,10 +2206,8 @@ fn type_uses_lifetime(ty: &Type) -> bool { for arg in ab.args.iter() { match arg { GenericArgument::Lifetime(_) => return true, - GenericArgument::Type(t) => { - if type_uses_lifetime(t) { - return true; - } + GenericArgument::Type(t) if type_uses_lifetime(t) => { + return true; } _ => {} } @@ -2236,11 +2228,10 @@ fn filter_cuda_tile_attrs(attrs: &[syn::Attribute]) -> Vec { attrs .iter() .filter(|a| { - let path = a.path(); - !path + a.path() .segments .first() - .is_some_and(|s| s.ident == "cuda_tile") + .is_none_or(|s| s.ident != "cuda_tile") }) .cloned() .collect() diff --git a/cutile/src/api.rs b/cutile/src/api.rs index d767477a..0c7b6469 100644 --- a/cutile/src/api.rs +++ b/cutile/src/api.rs @@ -279,7 +279,6 @@ pub fn memcpy(dst: &mut Tensor, src: &Tensor) -> Memcpy { /// the destination is borrowed immutably but written to through the device /// pointer during graph replay. /// - pub struct Memcpy { src_ptr: cuda_core::sys::CUdeviceptr, dst_ptr: cuda_core::sys::CUdeviceptr, @@ -637,6 +636,7 @@ pub fn convert( /// - `std`: Standard deviation /// - `shape`: Tensor shape /// - `seed`: Optional random seed for reproducibility +/// /// Generates a tensor with values from a normal distribution. /// /// Supports `f32` and `f64` natively via cuRAND. For `f16`, generates `f32` @@ -674,7 +674,7 @@ pub fn randn_f16( let res = value((Arc::new(src_tensor), dst)) .then(convert_apply) .unzip(); - res.1.unpartition().reshape(&shape.to_vec()) + res.1.unpartition().reshape(shape.as_ref()) }) }) } diff --git a/cutile/src/tensor.rs b/cutile/src/tensor.rs index d8a482ce..e7b19b69 100644 --- a/cutile/src/tensor.rs +++ b/cutile/src/tensor.rs @@ -578,7 +578,7 @@ impl Tensor { return tensor_error_result("Reinterpret shape must preserve total byte size."); } let alignment = align_of::() as u64; - if alignment > 1 && self.cu_deviceptr() % alignment != 0 { + if alignment > 1 && !self.cu_deviceptr().is_multiple_of(alignment) { return tensor_error_result( "Tensor storage alignment is incompatible with reinterpret target type.", ); @@ -826,7 +826,7 @@ impl Reshape for Tensor { } } -impl<'a, T: DType> Reshape for &'a Arc> { +impl Reshape for &Arc> { type Output = Arc>; fn reshape(self, shape: &[usize]) -> Result>, Error> { self.reshape_shared(shape) @@ -1052,7 +1052,7 @@ impl<'a, T: DType> PartitionMut<'a, T> for &'a mut Tensor { } } -impl<'a, T: DType> Partition<&'a mut Tensor> { +impl Partition<&mut Tensor> { pub fn dtype_str(&self) -> &'static str { T::DTYPE.as_str() } @@ -1318,7 +1318,7 @@ impl KernelOutputStored for Partition> { } } -impl<'a, T: DType> KernelOutputStored for Partition<&'a mut Tensor> { +impl KernelOutputStored for Partition<&mut Tensor> { fn push_kernel_args(&self, launcher: &mut AsyncKernelLaunch) { unsafe { launcher.push_device_ptr(self.object.cu_deviceptr()); @@ -1462,7 +1462,7 @@ impl KernelInputStored for Arc> { } } -impl<'a, T: DType + Sync> KernelInputStored for &'a Tensor { +impl KernelInputStored for &Tensor { fn push_kernel_args(&self, launcher: &mut AsyncKernelLaunch) { unsafe { launcher.push_device_ptr(self.cu_deviceptr()); diff --git a/cutile/src/tile_kernel.rs b/cutile/src/tile_kernel.rs index 72345fc4..ee9d77df 100644 --- a/cutile/src/tile_kernel.rs +++ b/cutile/src/tile_kernel.rs @@ -48,6 +48,7 @@ pub struct TileFunctionKey { } impl TileFunctionKey { + #[allow(clippy::too_many_arguments)] pub fn new( module_name: String, function_name: String, @@ -580,7 +581,7 @@ impl KernelArgument for &Partition> { } /// Same as above but for borrowed mutable tensor partitions. -impl<'a, T: DType> KernelArgument for &Partition<&'a mut Tensor> { +impl KernelArgument for &Partition<&mut Tensor> { fn push_arg(self, launcher: &mut AsyncKernelLaunch) { unsafe { launcher.push_device_ptr(self.object.cu_deviceptr());