Add dequantization for ggmls q4_0, q4_1, q5_0, q5_1 and q8_0 (#407)

* Added dequantization for `q4_0`, `q4_1`, `q5_0`, `q5_1` and `q8_0` * expose `tensor_from_ggml` for external usage * bugfixes & example
2025-06-21 20:22:49 +00:00 · 2023-08-14 00:22:57 +02:00
parent 8bd2b22b33
commit 9e7e6e0288
2 changed files with 288 additions and 63 deletions
--- a/candle-examples/examples/ggml/main.rs
+++ b/candle-examples/examples/ggml/main.rs
@ -0,0 +1,29 @@
+use anyhow::Result;
+use clap::Parser;
+use std::fs::File;
+
+use candle::ggml::Content;
+use candle::{DType, Device};
+
+#[derive(Parser, Debug)]
+#[command(author, version, about, long_about = None)]
+struct Args {
+    /// GGML file to load.
+    #[arg(long)]
+    model: String,
+}
+
+fn main() -> Result<()> {
+    let args = Args::parse();
+
+    let mut file = File::open(args.model)?;
+    let start = std::time::Instant::now();
+    let model = Content::read(&mut file, DType::F16, &Device::Cpu)?;
+
+    println!(
+        "Loaded {:?} tensors in {:?}",
+        model.tensors.len(),
+        start.elapsed()
+    );
+    Ok(())
+}