Quantized support for f16 and f32 (#457)

* Add f32 as a quantized type.

* Add f16 as a quantized type too.
This commit is contained in:
Laurent Mazare
2023-08-15 21:09:37 +01:00
committed by GitHub
parent e68b2accb4
commit b8263aa15c
3 changed files with 75 additions and 4 deletions

View File

@ -8,7 +8,7 @@ use candle::{DType, Device};
#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
struct Args {
/// GGML file to load.
/// GGML file to load, typically a .bin file generated by the quantize command from llama.cpp
#[arg(long)]
model: String,
}