diff --git a/candle-core/src/op.rs b/candle-core/src/op.rs index b6d668dc..525383b2 100644 --- a/candle-core/src/op.rs +++ b/candle-core/src/op.rs @@ -109,11 +109,11 @@ pub trait CustomOp1: Send + Sync { /// The forward pass, as run on a cpu device. Note that the storage can use arbitrary strides, /// offsets etc so the associated layout should be used to access it. - fn cpu_fwd(&self, s: &CpuStorage, l: &Layout) -> Result<(CpuStorage, Shape)>; + fn cpu_fwd(&self, storage: &CpuStorage, layout: &Layout) -> Result<(CpuStorage, Shape)>; /// The forward pass, as run on a gpu device. Note that the storage can use arbitrary strides, /// offsets etc so the associated layout should be used to access it. - fn cuda_fwd(&self, _: &CudaStorage, _: &Layout) -> Result<(CudaStorage, Shape)> { + fn cuda_fwd(&self, _storage: &CudaStorage, _layout: &Layout) -> Result<(CudaStorage, Shape)> { Err(crate::Error::Cuda( format!("no cuda implementation for {}", self.name()).into(), )) diff --git a/candle-examples/examples/custom-ops/main.rs b/candle-examples/examples/custom-ops/main.rs index 1024653b..0de78b72 100644 --- a/candle-examples/examples/custom-ops/main.rs +++ b/candle-examples/examples/custom-ops/main.rs @@ -33,12 +33,12 @@ impl CustomOp1 for LayerNorm { "layer-norm" } - fn cpu_fwd(&self, s: &CpuStorage, l: &Layout) -> Result<(CpuStorage, Shape)> { - let (dim1, dim2) = l.shape().dims2()?; - let s = s.as_slice::()?; - let src = match l.contiguous_offsets() { + fn cpu_fwd(&self, storage: &CpuStorage, layout: &Layout) -> Result<(CpuStorage, Shape)> { + let (dim1, dim2) = layout.shape().dims2()?; + let slice = storage.as_slice::()?; + let src = match layout.contiguous_offsets() { None => Err(Error::Wrapped("input has to be contiguous".into()))?, - Some((o1, o2)) => &s[o1..o2], + Some((o1, o2)) => &slice[o1..o2], }; let mut dst = Vec::with_capacity(dim1 * dim2); for idx1 in 0..dim1 { @@ -48,30 +48,30 @@ impl CustomOp1 for LayerNorm { dst.extend(src.iter().map(|x| x * s_variance)) } let storage = candle::WithDType::to_cpu_storage_owned(dst); - Ok((storage, l.shape().clone())) + Ok((storage, layout.shape().clone())) } #[cfg(feature = "cuda")] fn cuda_fwd( &self, - s: &candle::CudaStorage, - l: &Layout, + storage: &candle::CudaStorage, + layout: &Layout, ) -> Result<(candle::CudaStorage, Shape)> { use candle::cuda_backend::{cudarc, WrapErr}; use cudarc::driver::{LaunchAsync, LaunchConfig}; - let (d1, d2) = l.shape().dims2()?; + let (d1, d2) = layout.shape().dims2()?; let d1 = d1 as u32; let d2 = d2 as u32; - let dev = s.device().clone(); - let s = s.as_cuda_slice::()?; - let s = match l.contiguous_offsets() { + let dev = storage.device().clone(); + let slice = storage.as_cuda_slice::()?; + let slice = match layout.contiguous_offsets() { None => Err(Error::Wrapped("input has to be contiguous".into()))?, - Some((o1, o2)) => s.slice(o1..o2), + Some((o1, o2)) => slice.slice(o1..o2), }; - let elem_count = l.shape().elem_count(); + let elem_count = layout.shape().elem_count(); let dst = unsafe { dev.alloc::(elem_count) }.w()?; let func = dev.get_or_load_func("rms_f32", cuda_kernels::LAYERNORM_KERNELS)?; - let params = (&dst, &s, self.eps, d1, d2); + let params = (&dst, &slice, self.eps, d1, d2); let cfg = LaunchConfig { grid_dim: (d1, 1, 1), block_dim: (d2, 1, 1), @@ -80,7 +80,7 @@ impl CustomOp1 for LayerNorm { unsafe { func.launch(cfg, params) }.w()?; let dst = candle::CudaStorage::wrap_cuda_slice(dst, dev); - Ok((dst, l.shape().clone())) + Ok((dst, layout.shape().clone())) } }