Add some group parameter to convolutions. (#566)

* Add some group parameter to convolutions. * Avoid some unnecessary groups checks. * Move the tensor convolution bits. * Properh handling of groups. * Bump the crate version. * And add a changelog.
2025-06-17 02:58:50 +00:00 · 2023-08-23 12:58:55 +01:00
parent 4ee1cf038a
commit aba1e90797
30 changed files with 216 additions and 113 deletions
--- a/candle-core/examples/basics.rs
+++ b/candle-core/examples/basics.rs
@ -11,7 +11,7 @@ fn main() -> Result<()> {
    let inp = Tensor::randn(0f32, 1., (2, 320, 96, 96), &Device::Cpu)?;
    let w = Tensor::randn(0f32, 1., (320, 320, 3, 3), &Device::Cpu)?;
    let start = std::time::Instant::now();
-    let res = inp.conv2d(&w, 0, 1);
+    let res = inp.conv2d(&w, 0, 1, 1)?;
    println!("{:?}", start.elapsed());
    println!("{res:?}");
    Ok(())
--- a/candle-core/examples/cpu_benchmarks.rs
+++ b/candle-core/examples/cpu_benchmarks.rs
@ -40,7 +40,7 @@ impl Benchmark for Conv1d {
    }

    fn run_one(d: &Self::PreProcessData) -> Result<Self::RunResult> {
-        d.0.conv1d(&d.1, 0, 1)
+        d.0.conv1d(&d.1, 0, 1, 1)
    }

    const ITERS: usize = 5;
@ -59,7 +59,7 @@ impl Benchmark for Conv2d {
    }

    fn run_one(d: &Self::PreProcessData) -> Result<Self::RunResult> {
-        d.0.conv2d(&d.1, 0, 1)
+        d.0.conv2d(&d.1, 0, 1, 1)
    }

    const ITERS: usize = 1;
--- a/candle-core/examples/cuda_basics.rs
+++ b/candle-core/examples/cuda_basics.rs
@ -11,7 +11,7 @@ fn main() -> Result<()> {
    let device = Device::new_cuda(0)?;
    let t = Tensor::randn(0f32, 1f32, (2, 4, 96, 96), &device)?;
    let w = Tensor::randn(0f32, 1f32, (320, 4, 3, 3), &device)?;
-    let res = t.conv2d(&w, 1, 1)?;
+    let res = t.conv2d(&w, 1, 1, 1)?;
    println!("{res:?}");
    Ok(())
 }