Complexifying our hello world

2025-06-16 18:48:51 +00:00 · 2023-07-27 16:35:40 +02:00
parent 5bc5716b85
commit ad9d8fe400
2 changed files with 146 additions and 1 deletions
--- a/candle-book/src/guide/hello_world.md
+++ b/candle-book/src/guide/hello_world.md
@ -43,8 +43,147 @@ Everything should now run with:
 cargo run --release
 ```

-Now that we have the running dummy code we can get to more advanced topics:
+## Using a `Linear` layer.

+Now that we have this, we might want to complexity a little, for instance by adding `bias` and creating
+the classical `Linear` layer. We can do as such
+
+```rust
+# extern crate candle;
+# use candle::{DType, Device, Result, Tensor};
+struct Linear{
+    weight: Tensor,
+    bias: Tensor,
+}
+impl Linear{
+    fn forward(&self, x: &Tensor) -> Result<Tensor> {
+        let x = x.matmul(&self.weight)?;
+        x.broadcast_add(&self.bias)
+    }
+}
+
+struct Model {
+    first: Linear,
+    second: Linear,
+}
+
+impl Model {
+    fn forward(&self, image: &Tensor) -> Result<Tensor> {
+        let x = self.first.forward(image)?;
+        let x = x.relu()?;
+        self.second.forward(&x)
+    }
+}
+```
+
+This will change the loading code into a new function
+
+```rust
+# extern crate candle;
+# use candle::{DType, Device, Result, Tensor};
+# struct Linear{
+#     weight: Tensor,
+#     bias: Tensor,
+# }
+# impl Linear{
+#     fn forward(&self, x: &Tensor) -> Result<Tensor> {
+#         let x = x.matmul(&self.weight)?;
+#         x.broadcast_add(&self.bias)
+#     }
+# }
+# 
+# struct Model {
+#     first: Linear,
+#     second: Linear,
+# }
+# 
+# impl Model {
+#     fn forward(&self, image: &Tensor) -> Result<Tensor> {
+#         let x = self.first.forward(image)?;
+#         let x = x.relu()?;
+#         self.second.forward(&x)
+#     }
+# }
+fn main() -> Result<()> {
+    // Use Device::new_cuda(0)?; to use the GPU.
+    let device = Device::Cpu;
+
+    let weight = Tensor::zeros((784, 100), DType::F32, &device)?;
+    let bias = Tensor::zeros((100, ), DType::F32, &device)?;
+    let first = Linear{weight, bias};
+    let weight = Tensor::zeros((100, 10), DType::F32, &device)?;
+    let bias = Tensor::zeros((10, ), DType::F32, &device)?;
+    let second = Linear{weight, bias};
+    let model = Model { first, second };
+
+    let dummy_image = Tensor::zeros((1, 784), DType::F32, &device)?;
+
+    let digit = model.forward(&dummy_image)?;
+    println!("Digit {digit:?} digit");
+    Ok(())
+}
+```
+
+Now it works, great and is a great way to create your own layers.
+But most of the classical layers are already implemented in [candle-nn](https://github.com/LaurentMazare/candle/tree/main/candle-nn).
+
+## Using a `candle_nn`.
+
+For instance [Linear](https://github.com/LaurentMazare/candle/blob/main/candle-nn/src/linear.rs) is already there.
+This Linear is coded with PyTorch layout in mind, to reuse better existing models out there, so it uses the transpose of the weights and not the weights directly.
+
+So instead we can simplify our example:
+
+```bash
+cargo add --git https://github.com/LaurentMazare/candle.git candle-nn
+```
+
+And rewrite our examples using it
+
+```rust
+# extern crate candle;
+# extern crate candle_nn;
+use candle::{DType, Device, Result, Tensor};
+use candle_nn::Linear;
+
+struct Model {
+    first: Linear,
+    second: Linear,
+}
+
+impl Model {
+    fn forward(&self, image: &Tensor) -> Result<Tensor> {
+        let x = self.first.forward(image)?;
+        let x = x.relu()?;
+        self.second.forward(&x)
+    }
+}
+
+fn main() -> Result<()> {
+    // Use Device::new_cuda(0)?; to use the GPU.
+    let device = Device::Cpu;
+
+    // This has changed (784, 100) -> (100, 784) !
+    let weight = Tensor::zeros((100, 784), DType::F32, &device)?;
+    let bias = Tensor::zeros((100, ), DType::F32, &device)?;
+    let first = Linear::new(weight, Some(bias));
+    let weight = Tensor::zeros((10, 100), DType::F32, &device)?;
+    let bias = Tensor::zeros((10, ), DType::F32, &device)?;
+    let second = Linear::new(weight, Some(bias));
+    let model = Model { first, second };
+
+    let dummy_image = Tensor::zeros((1, 784), DType::F32, &device)?;
+
+    let digit = model.forward(&dummy_image)?;
+    println!("Digit {digit:?} digit");
+    Ok(())
+}
+```
+
+Feel free to modify this example to use `Conv2d` to create a classical convnet instead.
+
+
+Now that we have the running dummy code we can get to more advanced topics:

 - [For PyTorch users](./guide/cheatsheet.md)
 - [Running existing models](./inference/README.md)
--- a/candle-core/src/shape.rs
+++ b/candle-core/src/shape.rs
@ -41,6 +41,12 @@ impl From<usize> for Shape {
    }
 }

+impl From<(usize,)> for Shape {
+    fn from(d1: (usize,)) -> Self {
+        Self(vec![d1.0])
+    }
+}
+
 impl From<(usize, usize)> for Shape {
    fn from(d12: (usize, usize)) -> Self {
        Self(vec![d12.0, d12.1])