Quantized version of flux. (#2500)

* Quantized version of flux. * More generic sampling. * Hook the quantized model. * Use the newly minted gguf file. * Fix for the quantized model. * Default to avoid the faster cuda kernels.
2025-06-19 03:54:56 +00:00 · 2024-09-26 10:23:43 +02:00
parent d01207dbf3
commit 10d47183c0
6 changed files with 555 additions and 26 deletions
--- a/candle-transformers/src/models/flux/sampling.rs
+++ b/candle-transformers/src/models/flux/sampling.rs
@ -92,8 +92,8 @@ pub fn unpack(xs: &Tensor, height: usize, width: usize) -> Result<Tensor> {
 }

 #[allow(clippy::too_many_arguments)]
-pub fn denoise(
-    model: &super::model::Flux,
+pub fn denoise<M: super::WithForward>(
+    model: &M,
    img: &Tensor,
    img_ids: &Tensor,
    txt: &Tensor,