mirror of
https://github.com/huggingface/candle.git
synced 2025-06-19 19:58:35 +00:00

* Add quantized tensors. * Implement the debug trait for QTensor. * Add the QMatMul custom op.
29 lines
646 B
Rust
29 lines
646 B
Rust
use anyhow::Result;
|
|
use clap::Parser;
|
|
use std::fs::File;
|
|
|
|
use candle::quantized::ggml_file::Content;
|
|
|
|
#[derive(Parser, Debug)]
|
|
#[command(author, version, about, long_about = None)]
|
|
struct Args {
|
|
/// GGML file to load, typically a .bin file generated by the quantize command from llama.cpp
|
|
#[arg(long)]
|
|
model: String,
|
|
}
|
|
|
|
fn main() -> Result<()> {
|
|
let args = Args::parse();
|
|
|
|
let mut file = File::open(args.model)?;
|
|
let start = std::time::Instant::now();
|
|
let model = Content::read(&mut file)?;
|
|
|
|
println!(
|
|
"Loaded {:?} tensors in {:?}",
|
|
model.tensors.len(),
|
|
start.elapsed()
|
|
);
|
|
Ok(())
|
|
}
|