mirror of
https://github.com/huggingface/candle.git
synced 2025-06-20 20:09:50 +00:00
Add a stable diffusion example (#328)
* Start adding a stable-diffusion example. * Proper computation of the causal mask. * Add the chunk operation. * Work in progress: port the attention module. * Add some dummy modules for conv2d and group-norm, get the attention module to compile. * Re-enable the 2d convolution. * Add the embeddings module. * Add the resnet module. * Add the unet blocks. * Add the unet. * And add the variational auto-encoder. * Use the pad function from utils.
This commit is contained in:
@ -48,3 +48,84 @@ impl Conv1d {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub struct Conv2dConfig {
|
||||
pub padding: usize,
|
||||
pub stride: usize,
|
||||
}
|
||||
|
||||
impl Default for Conv2dConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
padding: 0,
|
||||
stride: 1,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
#[derive(Debug)]
|
||||
pub struct Conv2d {
|
||||
weight: Tensor,
|
||||
bias: Option<Tensor>,
|
||||
config: Conv2dConfig,
|
||||
}
|
||||
|
||||
impl Conv2d {
|
||||
pub fn new(weight: Tensor, bias: Option<Tensor>, config: Conv2dConfig) -> Self {
|
||||
Self {
|
||||
weight,
|
||||
bias,
|
||||
config,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn config(&self) -> &Conv2dConfig {
|
||||
&self.config
|
||||
}
|
||||
|
||||
pub fn forward(&self, _x: &Tensor) -> Result<Tensor> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn conv1d(
|
||||
in_channels: usize,
|
||||
out_channels: usize,
|
||||
kernel_size: usize,
|
||||
cfg: Conv1dConfig,
|
||||
vs: crate::VarBuilder,
|
||||
) -> Result<Conv1d> {
|
||||
let init_ws = crate::init::DEFAULT_KAIMING_NORMAL;
|
||||
let ws = vs.get_or_init((out_channels, in_channels, kernel_size), "weight", init_ws)?;
|
||||
let bound = 1. / (in_channels as f64).sqrt();
|
||||
let init_bs = crate::Init::Uniform {
|
||||
lo: -bound,
|
||||
up: bound,
|
||||
};
|
||||
let bs = vs.get_or_init(out_channels, "bias", init_bs)?;
|
||||
Ok(Conv1d::new(ws, Some(bs), cfg))
|
||||
}
|
||||
|
||||
pub fn conv2d(
|
||||
in_channels: usize,
|
||||
out_channels: usize,
|
||||
kernel_size: usize,
|
||||
cfg: Conv2dConfig,
|
||||
vs: crate::VarBuilder,
|
||||
) -> Result<Conv2d> {
|
||||
let init_ws = crate::init::DEFAULT_KAIMING_NORMAL;
|
||||
let ws = vs.get_or_init(
|
||||
(out_channels, in_channels, kernel_size, kernel_size),
|
||||
"weight",
|
||||
init_ws,
|
||||
)?;
|
||||
let bound = 1. / (in_channels as f64).sqrt();
|
||||
let init_bs = crate::Init::Uniform {
|
||||
lo: -bound,
|
||||
up: bound,
|
||||
};
|
||||
let bs = vs.get_or_init(out_channels, "bias", init_bs)?;
|
||||
Ok(Conv2d::new(ws, Some(bs), cfg))
|
||||
}
|
||||
|
48
candle-nn/src/group_norm.rs
Normal file
48
candle-nn/src/group_norm.rs
Normal file
@ -0,0 +1,48 @@
|
||||
//! Group Normalization.
|
||||
//!
|
||||
//! This layer applies Group Normalization over a mini-batch of inputs.
|
||||
use candle::{Result, Tensor};
|
||||
|
||||
// This group norm version handles both weight and bias so removes the mean.
|
||||
#[allow(dead_code)]
|
||||
#[derive(Debug)]
|
||||
pub struct GroupNorm {
|
||||
weight: Tensor,
|
||||
bias: Tensor,
|
||||
eps: f64,
|
||||
num_channels: usize,
|
||||
num_groups: usize,
|
||||
}
|
||||
|
||||
impl GroupNorm {
|
||||
pub fn new(
|
||||
weight: Tensor,
|
||||
bias: Tensor,
|
||||
num_channels: usize,
|
||||
num_groups: usize,
|
||||
eps: f64,
|
||||
) -> Self {
|
||||
Self {
|
||||
weight,
|
||||
bias,
|
||||
eps,
|
||||
num_channels,
|
||||
num_groups,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn forward(&self, _: &Tensor) -> Result<Tensor> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn group_norm(
|
||||
num_channels: usize,
|
||||
num_groups: usize,
|
||||
eps: f64,
|
||||
vb: crate::VarBuilder,
|
||||
) -> Result<GroupNorm> {
|
||||
let weight = vb.get_or_init(num_channels, "weight", crate::Init::Const(1.))?;
|
||||
let bias = vb.get_or_init(num_channels, "bias", crate::Init::Const(0.))?;
|
||||
Ok(GroupNorm::new(weight, bias, num_channels, num_groups, eps))
|
||||
}
|
@ -3,6 +3,7 @@
|
||||
pub mod activation;
|
||||
pub mod conv;
|
||||
pub mod embedding;
|
||||
pub mod group_norm;
|
||||
pub mod init;
|
||||
pub mod layer_norm;
|
||||
pub mod linear;
|
||||
@ -12,8 +13,9 @@ pub mod optim;
|
||||
pub mod var_builder;
|
||||
|
||||
pub use activation::Activation;
|
||||
pub use conv::{Conv1d, Conv1dConfig};
|
||||
pub use conv::{conv1d, conv2d, Conv1d, Conv1dConfig, Conv2d, Conv2dConfig};
|
||||
pub use embedding::{embedding, Embedding};
|
||||
pub use group_norm::{group_norm, GroupNorm};
|
||||
pub use init::Init;
|
||||
pub use layer_norm::{layer_norm, LayerNorm};
|
||||
pub use linear::{linear, linear_no_bias, Linear};
|
||||
|
@ -32,3 +32,7 @@ pub fn log_softmax<D: candle::shape::Dim>(xs: &Tensor, d: D) -> Result<Tensor> {
|
||||
let log_sm = diff.broadcast_sub(&sum_exp.log()?)?;
|
||||
Ok(log_sm)
|
||||
}
|
||||
|
||||
pub fn silu(xs: &Tensor) -> Result<Tensor> {
|
||||
xs / (xs.neg()?.exp()? + 1.0)?
|
||||
}
|
||||
|
Reference in New Issue
Block a user