candle/candle-examples/examples/ggml/main.rs

use anyhow::Result;
use clap::Parser;
use std::fs::File;

use candle::quantized::ggml_file::Content;

#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
struct Args {
    /// GGML file to load, typically a .bin file generated by the quantize command from llama.cpp
    #[arg(long)]
    model: String,
}

fn main() -> Result<()> {
    let args = Args::parse();

    let mut file = File::open(args.model)?;
    let start = std::time::Instant::now();
    let model = Content::read(&mut file)?;

    println!(
        "Loaded {:?} tensors in {:?}",
        model.tensors.len(),
        start.elapsed()
    );
    Ok(())
}