Add dequantization for ggmls q4_0, q4_1, q5_0, q5_1 and q8_0 (#407)

* Added dequantization for `q4_0`, `q4_1`, `q5_0`, `q5_1` and `q8_0`

* expose `tensor_from_ggml` for external usage

* bugfixes & example
This commit is contained in:
Lukas Kreussel
2023-08-14 00:22:57 +02:00
committed by GitHub
parent 8bd2b22b33
commit 9e7e6e0288
2 changed files with 288 additions and 63 deletions

View File

@ -0,0 +1,29 @@
use anyhow::Result;
use clap::Parser;
use std::fs::File;
use candle::ggml::Content;
use candle::{DType, Device};
#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
struct Args {
/// GGML file to load.
#[arg(long)]
model: String,
}
fn main() -> Result<()> {
let args = Args::parse();
let mut file = File::open(args.model)?;
let start = std::time::Instant::now();
let model = Content::read(&mut file, DType::F16, &Device::Cpu)?;
println!(
"Loaded {:?} tensors in {:?}",
model.tensors.len(),
start.elapsed()
);
Ok(())
}