GGUF support in the quantized model. (#559)

* GGUF support in the quantized model. * Get the GGUF support to work on llama.
2025-06-19 03:54:56 +00:00 · 2023-08-23 09:20:57 +01:00
parent 0764741cc4
commit 508d34daf2
2 changed files with 231 additions and 47 deletions
--- a/candle-core/src/quantized/gguf_file.rs
+++ b/candle-core/src/quantized/gguf_file.rs
@ -2,7 +2,7 @@
 //!
 //! Spec: https://github.com/philpax/ggml/blob/gguf-spec/docs/gguf.md

-use super::GgmlDType;
+use super::{GgmlDType, QTensor};
 use crate::Result;
 use byteorder::{LittleEndian, ReadBytesExt};
 use std::collections::HashMap;
@ -55,7 +55,7 @@ impl TensorInfo {
        &self,
        reader: &mut R,
        tensor_data_offset: u64,
-    ) -> Result<super::QTensor> {
+    ) -> Result<QTensor> {
        let tensor_elems = self.shape.elem_count();
        let size_in_bytes =
            tensor_elems * self.ggml_dtype.type_size() / self.ggml_dtype.blck_size();
@ -78,6 +78,10 @@ fn read_string<R: std::io::Read>(reader: &mut R) -> Result<String> {
    let len = reader.read_u32::<LittleEndian>()?;
    let mut v = vec![0u8; len as usize];
    reader.read_exact(&mut v)?;
+    // GGUF strings are supposed to be non-null terminated but in practice this happens.
+    while let Some(0) = v.last() {
+        v.pop();
+    }
    // GGUF strings are utf8 encoded but there are cases that don't seem to be valid.
    Ok(String::from_utf8_lossy(&v).into_owned())
 }
@ -125,6 +129,76 @@ pub enum Value {
 }

 impl Value {
+    pub fn to_u8(&self) -> Result<u8> {
+        match self {
+            Self::U8(v) => Ok(*v),
+            v => crate::bail!("not a u8 {v:?}"),
+        }
+    }
+
+    pub fn to_i8(&self) -> Result<i8> {
+        match self {
+            Self::I8(v) => Ok(*v),
+            v => crate::bail!("not a i8 {v:?}"),
+        }
+    }
+
+    pub fn to_u16(&self) -> Result<u16> {
+        match self {
+            Self::U16(v) => Ok(*v),
+            v => crate::bail!("not a u16 {v:?}"),
+        }
+    }
+
+    pub fn to_i16(&self) -> Result<i16> {
+        match self {
+            Self::I16(v) => Ok(*v),
+            v => crate::bail!("not a i16 {v:?}"),
+        }
+    }
+
+    pub fn to_u32(&self) -> Result<u32> {
+        match self {
+            Self::U32(v) => Ok(*v),
+            v => crate::bail!("not a u32 {v:?}"),
+        }
+    }
+
+    pub fn to_i32(&self) -> Result<i32> {
+        match self {
+            Self::I32(v) => Ok(*v),
+            v => crate::bail!("not a i32 {v:?}"),
+        }
+    }
+
+    pub fn to_f32(&self) -> Result<f32> {
+        match self {
+            Self::F32(v) => Ok(*v),
+            v => crate::bail!("not a f32 {v:?}"),
+        }
+    }
+
+    pub fn to_bool(&self) -> Result<bool> {
+        match self {
+            Self::Bool(v) => Ok(*v),
+            v => crate::bail!("not a bool {v:?}"),
+        }
+    }
+
+    pub fn to_vec(&self) -> Result<&Vec<Value>> {
+        match self {
+            Self::Array(v) => Ok(v),
+            v => crate::bail!("not a vec {v:?}"),
+        }
+    }
+
+    pub fn to_string(&self) -> Result<&String> {
+        match self {
+            Self::String(v) => Ok(v),
+            v => crate::bail!("not a string {v:?}"),
+        }
+    }
+
    fn read<R: std::io::Read>(reader: &mut R, value_type: ValueType) -> Result<Self> {
        let v = match value_type {
            ValueType::U8 => Self::U8(reader.read_u8()?),
@ -225,4 +299,16 @@ impl Content {
            tensor_data_offset,
        })
    }
+
+    pub fn tensor<R: std::io::Seek + std::io::Read>(
+        &self,
+        reader: &mut R,
+        name: &str,
+    ) -> Result<QTensor> {
+        let tensor_info = match self.tensor_infos.get(name) {
+            Some(tensor_info) => tensor_info,
+            None => crate::bail!("cannot find tensor-infor for {name}"),
+        };
+        tensor_info.read(reader, self.tensor_data_offset)
+    }
 }