Quantized support for f16 and f32 (#457)

* Add f32 as a quantized type. * Add f16 as a quantized type too.
2025-06-16 02:38:10 +00:00 · 2023-08-15 21:09:37 +01:00
parent e68b2accb4
commit b8263aa15c
3 changed files with 75 additions and 4 deletions
--- a/candle-examples/examples/ggml/main.rs
+++ b/candle-examples/examples/ggml/main.rs
@ -8,7 +8,7 @@ use candle::{DType, Device};
 #[derive(Parser, Debug)]
 #[command(author, version, about, long_about = None)]
 struct Args {
-    /// GGML file to load.
+    /// GGML file to load, typically a .bin file generated by the quantize command from llama.cpp
    #[arg(long)]
    model: String,
 }