Segment Anything - process images (#766)

* Start processing images. * Add LayerNorm2d. * Properly use LayerNorm2d. * Tweak eps. * Use LayerNorm on inputs with a rank different from 3. * Window partitioning. * Fix a couple todos. * More todos. * Hard-code the einsums. * More padding support. * Some sizes tweaks. * Use the hub to get the weights. * Use a batch matmul. * Tweaks. * More fixes. * Get some predictions to be generated.
2025-06-18 19:47:12 +00:00 · 2023-09-07 19:22:45 +01:00
parent 7b50f3e106
commit 7396b8ed1a
10 changed files with 303 additions and 105 deletions
--- a/candle-nn/src/linear.rs
+++ b/candle-nn/src/linear.rs
@ -41,8 +41,9 @@ impl Linear {

 impl super::Module for Linear {
    fn forward(&self, x: &Tensor) -> candle::Result<Tensor> {
-        let w = match x.dims() {
-            &[bsize, _, _] => self.weight.broadcast_left(bsize)?.t()?,
+        let w = match *x.dims() {
+            [b1, b2, _, _] => self.weight.broadcast_left((b1, b2))?.t()?,
+            [bsize, _, _] => self.weight.broadcast_left(bsize)?.t()?,
            _ => self.weight.t()?,
        };
        let x = x.matmul(&w)?;