diff --git a/candle-examples/examples/stable-diffusion/attention.rs b/candle-examples/examples/stable-diffusion/attention.rs index 83e7ef34..943a6ae1 100644 --- a/candle-examples/examples/stable-diffusion/attention.rs +++ b/candle-examples/examples/stable-diffusion/attention.rs @@ -432,7 +432,7 @@ impl AttentionBlock { (query_states * scale)?.matmul(&(key_states.t()? * scale)?)?; let attention_probs = nn::ops::softmax(&attention_scores, D::Minus1)?; - let xs = attention_probs.matmul(&value_states)?; + let xs = attention_probs.matmul(&value_states.contiguous()?)?; let xs = xs.transpose(1, 2)?.contiguous()?; let xs = xs.flatten_from(D::Minus2)?; let xs = self diff --git a/candle-examples/examples/stable-diffusion/main.rs b/candle-examples/examples/stable-diffusion/main.rs index 8ce0c234..ac31e855 100644 --- a/candle-examples/examples/stable-diffusion/main.rs +++ b/candle-examples/examples/stable-diffusion/main.rs @@ -14,7 +14,7 @@ mod utils; mod vae; use anyhow::{Error as E, Result}; -use candle::{DType, Device, Tensor}; +use candle::{DType, Device, IndexOp, Tensor}; use clap::Parser; use tokenizers::Tokenizer; @@ -245,7 +245,7 @@ fn run(args: Args) -> Result<()> { if args.intermediary_images { let image = vae.decode(&(&latents / 0.18215)?)?; let image = ((image / 2.)? + 0.5)?.to_device(&Device::Cpu)?; - let image = (image * 255.)?.to_dtype(DType::U8)?; + let image = (image * 255.)?.to_dtype(DType::U8)?.i(0)?; let image_filename = output_filename(&final_image, idx + 1, num_samples, Some(timestep_index + 1)); crate::utils::save_image(&image, image_filename)? @@ -260,7 +260,7 @@ fn run(args: Args) -> Result<()> { let image = vae.decode(&(&latents / 0.18215)?)?; // TODO: Add the clamping between 0 and 1. let image = ((image / 2.)? + 0.5)?.to_device(&Device::Cpu)?; - let image = (image * 255.)?.to_dtype(DType::U8)?; + let image = (image * 255.)?.to_dtype(DType::U8)?.i(0)?; let image_filename = output_filename(&final_image, idx + 1, num_samples, None); crate::utils::save_image(&image, image_filename)? }