T5 quantized example (#922)

* Load gguf files for the quantized t5. * Add the quantized t5 example. * Allow for loading local files. * Add some support for quantizing safetensor files. * Transpose before quantizing. * Quantized t5. * Retrieve the weights from the hub.
2025-06-17 19:18:50 +00:00 · 2023-09-21 12:33:15 +01:00
parent 2619c4307f
commit 3b557765e8
4 changed files with 272 additions and 1 deletions
--- a/candle-transformers/src/models/quantized_t5.rs
+++ b/candle-transformers/src/models/quantized_t5.rs
@ -15,6 +15,21 @@ pub struct VarBuilder {
 }

 impl VarBuilder {
+    pub fn from_gguf<P: AsRef<std::path::Path>>(p: P) -> Result<Self> {
+        let mut file = std::fs::File::open(p)?;
+        let content = candle::quantized::gguf_file::Content::read(&mut file)?;
+        let mut data = std::collections::HashMap::new();
+        for tensor_name in content.tensor_infos.keys() {
+            let tensor = content.tensor(&mut file, tensor_name)?;
+            data.insert(tensor_name.to_string(), Arc::new(tensor));
+        }
+        Ok(Self {
+            data: Arc::new(data),
+            path: Vec::new(),
+            device: Device::Cpu,
+        })
+    }
+
    fn pp<S: ToString>(&self, s: S) -> Self {
        let mut path = self.path.clone();
        path.push(s.to_string());
@ -87,7 +102,7 @@ struct QMatMul {

 impl QMatMul {
    fn new(out_dim: usize, in_dim: usize, vb: VarBuilder) -> Result<Self> {
-        let ws = vb.get((out_dim, in_dim), "weight")?;
+        let ws = vb.get((in_dim, out_dim), "weight")?;
        let inner = candle::quantized::QMatMul::from_arc(ws);
        let span = tracing::span!(tracing::Level::TRACE, "qmatmul");
        Ok(Self { inner, span })