Add to the cuda example a reproduction of the issue. (#579)

* Add to the cuda example a reproduction of the issue. * Tweak. * Add a test using non-square matrixes. * Fix the conv2d kernel. * Display the error. * And tweak the comment.
2025-06-16 10:38:54 +00:00 · 2023-08-24 12:07:31 +01:00
parent dd64465899
commit ca318a6ec7
3 changed files with 58 additions and 12 deletions
--- a/candle-core/examples/cuda_basics.rs
+++ b/candle-core/examples/cuda_basics.rs
@ -9,8 +9,17 @@ use candle_core::{Device, Tensor};

 fn main() -> Result<()> {
    let device = Device::new_cuda(0)?;
-    let t = Tensor::rand(-1f32, 1f32, 96, &device)?;
-    println!("{t}");
+    let in_t = Tensor::rand(-1f32, 1f32, (1, 3, 12, 7), &device)?;
+    let k_t = Tensor::rand(-1f32, 1f32, (6, 3, 1, 1), &device)?;
+    let out_t = in_t.conv2d(&k_t, 0, 1, 1)?;
+    println!("{out_t}");
+    let in_t = in_t.to_device(&Device::Cpu)?;
+    let k_t = k_t.to_device(&Device::Cpu)?;
+    let out_t2 = in_t.conv2d(&k_t, 0, 1, 1)?;
+    let diff = (out_t.to_device(&Device::Cpu)? - out_t2)?
+        .sqr()?
+        .sum_all()?;
+    println!("{diff}");

    let t = Tensor::randn(0f32, 1f32, (2, 4, 96, 96), &device)?;
    let w = Tensor::randn(0f32, 1f32, (320, 4, 3, 3), &device)?;