mirror of
https://github.com/huggingface/candle.git
synced 2025-06-17 02:58:50 +00:00

* Added dequantization for `q4_0`, `q4_1`, `q5_0`, `q5_1` and `q8_0` * expose `tensor_from_ggml` for external usage * bugfixes & example
30 lines
614 B
Rust
30 lines
614 B
Rust
use anyhow::Result;
|
|
use clap::Parser;
|
|
use std::fs::File;
|
|
|
|
use candle::ggml::Content;
|
|
use candle::{DType, Device};
|
|
|
|
#[derive(Parser, Debug)]
|
|
#[command(author, version, about, long_about = None)]
|
|
struct Args {
|
|
/// GGML file to load.
|
|
#[arg(long)]
|
|
model: String,
|
|
}
|
|
|
|
fn main() -> Result<()> {
|
|
let args = Args::parse();
|
|
|
|
let mut file = File::open(args.model)?;
|
|
let start = std::time::Instant::now();
|
|
let model = Content::read(&mut file, DType::F16, &Device::Cpu)?;
|
|
|
|
println!(
|
|
"Loaded {:?} tensors in {:?}",
|
|
model.tensors.len(),
|
|
start.elapsed()
|
|
);
|
|
Ok(())
|
|
}
|