diff --git a/README.md b/README.md index b432f4d2..08c68e9e 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,9 @@ Cheatsheet: | | Using PyTorch | Using Candle | |------------|------------------------------------------|------------------------------------------------------------------| -| Creation | `torch.Tensor([[1, 2], [3, 4]])` | `Tensor::new(&[[1f32, 2.]], [3., 4.]], &Device::Cpu)?` | +| Creation | `torch.Tensor([[1, 2], [3, 4]])` | `Tensor::new(` | +| | | ` &[[1f32, 2.]], [3., 4.]],` | +| | | ` &Device::Cpu)?` | | Indexing | `tensor[:, :4]` | `tensor.i((.., ..4))?` | | Operations | `tensor.view((2, 2))` | `tensor.reshape((2, 2))?` | | Operations | `a.matmul(b)` | `a.matmul(&b)?` | diff --git a/candle-nn/src/conv.rs b/candle-nn/src/conv.rs index e670a0d2..d938cae4 100644 --- a/candle-nn/src/conv.rs +++ b/candle-nn/src/conv.rs @@ -1,3 +1,4 @@ +//! Convolution Layers. use candle::{Result, Tensor}; #[derive(Debug, Clone, Copy, PartialEq, Eq)] diff --git a/candle-nn/src/embedding.rs b/candle-nn/src/embedding.rs index deeba01e..a0a853b0 100644 --- a/candle-nn/src/embedding.rs +++ b/candle-nn/src/embedding.rs @@ -1,3 +1,4 @@ +//! Embedding Layer. use candle::{Result, Tensor}; #[derive(Debug)] diff --git a/candle-nn/src/layer_norm.rs b/candle-nn/src/layer_norm.rs index 0b208c49..188a02bf 100644 --- a/candle-nn/src/layer_norm.rs +++ b/candle-nn/src/layer_norm.rs @@ -1,3 +1,33 @@ +//! Layer Normalization. +//! +//! This layer applies Layer Normalization over a mini-batch of inputs as described in [`Layer +//! Normalization`]. The input is expected to have three dimensions: a batch dimension, a length, +//! and a hidden size, the normalization is applied over the last dimension. +//! +//! # Example +//! +//! ```rust +//! use candle::{Tensor, Device::Cpu}; +//! use candle_nn::LayerNorm; +//! # fn main() -> candle::Result<()> { +//! +//! let w = Tensor::new(1f32, &Cpu)?; +//! let b = Tensor::new(0f32, &Cpu)?; +//! let layer = LayerNorm::new(w, b, 1e-5); +//! +//! let xs = Tensor::new( +//! &[[[1f32, 2., 3.], [4., 5., 6.], [9., 8., 7.]]], +//! &Cpu)?; +//! let ys = layer.forward(&xs)?; +//! assert_eq!( +//! ys.to_vec3::()?, +//! &[[[-1.2247356, 0.0, 1.2247356], +//! [-1.2247356, 0.0, 1.2247356], +//! [ 1.2247356, 0.0, -1.2247356]]]); +//! # Ok(()) } +//! ``` +//! +//! [`Layer Normalization`]: https://arxiv.org/abs/1607.06450 use candle::{DType, Result, Tensor}; // This layer norm version handles both weight and bias so removes the mean. diff --git a/candle-nn/src/linear.rs b/candle-nn/src/linear.rs index 2e65ca2d..943011c9 100644 --- a/candle-nn/src/linear.rs +++ b/candle-nn/src/linear.rs @@ -1,3 +1,22 @@ +//! Linear layer +//! +//! This layer applies a linear transformation to the incoming data, `y = x@w.t() + b`. +//! The bias is optional. The `forward` method can be used to apply the layer, it supports input +//! with a batch dimension (so of shape `(b_sz, in_c)`) or without (of shape `(in_c,)`), the +//! output has shape `(b_sz, out_c)` and `(out_c,)` respectively. +//! +//! ```rust +//! use candle::{Tensor, Device::Cpu}; +//! use candle_nn::Linear; +//! # fn main() -> candle::Result<()> { +//! +//! let w = Tensor::new(&[[1f32, 2.], [3., 4.], [5., 6.]], &Cpu)?; +//! let layer = Linear::new(w, None); // Use no bias. +//! let xs = Tensor::new(&[[10f32, 100.]], &Cpu)?; +//! let ys = layer.forward(&xs)?; +//! assert_eq!(ys.to_vec2::()?, &[[210.0, 430.0, 650.0]]); +//! # Ok(()) } +//! ``` use candle::Tensor; #[derive(Debug)]