mirror of
https://github.com/huggingface/candle.git
synced 2025-06-16 02:38:10 +00:00
Quantized support for f16 and f32 (#457)
* Add f32 as a quantized type. * Add f16 as a quantized type too.
This commit is contained in:
@ -8,7 +8,7 @@ use candle::{DType, Device};
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(author, version, about, long_about = None)]
|
||||
struct Args {
|
||||
/// GGML file to load.
|
||||
/// GGML file to load, typically a .bin file generated by the quantize command from llama.cpp
|
||||
#[arg(long)]
|
||||
model: String,
|
||||
}
|
||||
|
Reference in New Issue
Block a user