diff --git a/README.md b/README.md
index 99c1a1b5..dbb5d583 100644
--- a/README.md
+++ b/README.md
@@ -37,10 +37,11 @@ cargo run --example llama --release
 cargo run --example falcon --release
 cargo run --example bert --release
 cargo run --example bigcode --release
-cargo run --example stable-diffusion --release --features image -- --prompt "a rusty robot holding a fire torch"
+cargo run --example stable-diffusion --release -- --prompt "a rusty robot holding a fire torch"
 ```
 
-In order to use **CUDA** add `--features cuda` to the example command line.
+In order to use **CUDA** add `--features cuda` to the example command line. If
+you have cuDNN installed, use `--features cudnn` for even more speedups.
 
 There are also some wasm examples for whisper and
 [llama2.c](https://github.com/karpathy/llama2.c). You can either build them with
diff --git a/candle-examples/Cargo.toml b/candle-examples/Cargo.toml
index e07001e4..cade6695 100644
--- a/candle-examples/Cargo.toml
+++ b/candle-examples/Cargo.toml
@@ -23,7 +23,7 @@ num-traits = { workspace = true }
 intel-mkl-src = { workspace = true, optional = true }
 cudarc = { workspace = true, optional = true }
 half = { workspace = true, optional = true }
-image = { workspace = true, optional = true }
+image = { workspace = true }
 
 [dev-dependencies]
 anyhow = { workspace = true }
@@ -55,7 +55,3 @@ nccl = ["cuda", "cudarc/nccl", "dep:half"]
 [[example]]
 name = "llama_multiprocess"
 required-features = ["cuda", "nccl", "flash-attn"]
-
-[[example]]
-name = "stable-diffusion"
-required-features = ["image"]