mirror of
https://github.com/huggingface/candle.git
synced 2025-06-21 20:22:49 +00:00
Add dequantization for ggmls q4_0
, q4_1
, q5_0
, q5_1
and q8_0
(#407)
* Added dequantization for `q4_0`, `q4_1`, `q5_0`, `q5_1` and `q8_0` * expose `tensor_from_ggml` for external usage * bugfixes & example
This commit is contained in:
29
candle-examples/examples/ggml/main.rs
Normal file
29
candle-examples/examples/ggml/main.rs
Normal file
@ -0,0 +1,29 @@
|
||||
use anyhow::Result;
|
||||
use clap::Parser;
|
||||
use std::fs::File;
|
||||
|
||||
use candle::ggml::Content;
|
||||
use candle::{DType, Device};
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(author, version, about, long_about = None)]
|
||||
struct Args {
|
||||
/// GGML file to load.
|
||||
#[arg(long)]
|
||||
model: String,
|
||||
}
|
||||
|
||||
fn main() -> Result<()> {
|
||||
let args = Args::parse();
|
||||
|
||||
let mut file = File::open(args.model)?;
|
||||
let start = std::time::Instant::now();
|
||||
let model = Content::read(&mut file, DType::F16, &Device::Cpu)?;
|
||||
|
||||
println!(
|
||||
"Loaded {:?} tensors in {:?}",
|
||||
model.tensors.len(),
|
||||
start.elapsed()
|
||||
);
|
||||
Ok(())
|
||||
}
|
Reference in New Issue
Block a user