mirror of
https://github.com/huggingface/candle.git
synced 2025-06-17 19:18:50 +00:00
Preliminary GGUF support. (#557)
* Preliminary GGUF support. * Tensor reading.
This commit is contained in:
220
candle-core/src/quantized/gguf_file.rs
Normal file
220
candle-core/src/quantized/gguf_file.rs
Normal file
@ -0,0 +1,220 @@
|
|||||||
|
//! Support for the GGUF file format.
|
||||||
|
//!
|
||||||
|
//! Spec: https://github.com/philpax/ggml/blob/gguf-spec/docs/gguf.md
|
||||||
|
|
||||||
|
use super::GgmlDType;
|
||||||
|
use crate::Result;
|
||||||
|
use byteorder::{LittleEndian, ReadBytesExt};
|
||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
pub const DEFAULT_ALIGNMENT: usize = 32;
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
enum Magic {
|
||||||
|
Gguf,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TryFrom<u32> for Magic {
|
||||||
|
type Error = crate::Error;
|
||||||
|
fn try_from(value: u32) -> Result<Self> {
|
||||||
|
let magic = match value {
|
||||||
|
0x46554747 | 0x47475546 => Self::Gguf,
|
||||||
|
_ => crate::bail!("unknown magic {value:08x}"),
|
||||||
|
};
|
||||||
|
Ok(magic)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
pub enum VersionedMagic {
|
||||||
|
GgufV1,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl VersionedMagic {
|
||||||
|
fn read<R: std::io::Read>(reader: &mut R) -> Result<Self> {
|
||||||
|
let magic = reader.read_u32::<LittleEndian>()?;
|
||||||
|
let magic = Magic::try_from(magic)?;
|
||||||
|
let version = reader.read_u32::<LittleEndian>()?;
|
||||||
|
let versioned_magic = match (magic, version) {
|
||||||
|
(Magic::Gguf, 1) => Self::GgufV1,
|
||||||
|
_ => crate::bail!("ggml: unsupported magic/version {magic:?}/{version}"),
|
||||||
|
};
|
||||||
|
Ok(versioned_magic)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct TensorInfo {
|
||||||
|
pub ggml_dtype: GgmlDType,
|
||||||
|
pub shape: crate::Shape,
|
||||||
|
pub offset: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TensorInfo {
|
||||||
|
pub fn read<R: std::io::Seek + std::io::Read>(
|
||||||
|
&self,
|
||||||
|
reader: &mut R,
|
||||||
|
tensor_data_offset: u64,
|
||||||
|
) -> Result<super::QTensor> {
|
||||||
|
let tensor_elems = self.shape.elem_count();
|
||||||
|
let size_in_bytes =
|
||||||
|
tensor_elems * self.ggml_dtype.type_size() / self.ggml_dtype.blck_size();
|
||||||
|
let mut raw_data = vec![0u8; size_in_bytes];
|
||||||
|
reader.seek(std::io::SeekFrom::Start(tensor_data_offset + self.offset))?;
|
||||||
|
reader.read_exact(&mut raw_data)?;
|
||||||
|
super::ggml_file::qtensor_from_ggml(self.ggml_dtype, &raw_data, self.shape.dims().to_vec())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct Content {
|
||||||
|
pub magic: VersionedMagic,
|
||||||
|
pub metadata: HashMap<String, Value>,
|
||||||
|
pub tensor_infos: HashMap<String, TensorInfo>,
|
||||||
|
pub tensor_data_offset: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn read_string<R: std::io::Read>(reader: &mut R) -> Result<String> {
|
||||||
|
let len = reader.read_u32::<LittleEndian>()?;
|
||||||
|
let mut v = vec![0u8; len as usize];
|
||||||
|
reader.read_exact(&mut v)?;
|
||||||
|
// GGUF strings are utf8 encoded but there are cases that don't seem to be valid.
|
||||||
|
Ok(String::from_utf8_lossy(&v).into_owned())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
pub enum ValueType {
|
||||||
|
// The value is a 8-bit unsigned integer.
|
||||||
|
U8,
|
||||||
|
// The value is a 8-bit signed integer.
|
||||||
|
I8,
|
||||||
|
// The value is a 16-bit unsigned little-endian integer.
|
||||||
|
U16,
|
||||||
|
// The value is a 16-bit signed little-endian integer.
|
||||||
|
I16,
|
||||||
|
// The value is a 32-bit unsigned little-endian integer.
|
||||||
|
U32,
|
||||||
|
// The value is a 32-bit signed little-endian integer.
|
||||||
|
I32,
|
||||||
|
// The value is a 32-bit IEEE754 floating point number.
|
||||||
|
F32,
|
||||||
|
// The value is a boolean.
|
||||||
|
// 1-byte value where 0 is false and 1 is true.
|
||||||
|
// Anything else is invalid, and should be treated as either the model being invalid or the reader being buggy.
|
||||||
|
Bool,
|
||||||
|
// The value is a UTF-8 non-null-terminated string, with length prepended.
|
||||||
|
String,
|
||||||
|
// The value is an array of other values, with the length and type prepended.
|
||||||
|
///
|
||||||
|
// Arrays can be nested, and the length of the array is the number of elements in the array, not the number of bytes.
|
||||||
|
Array,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub enum Value {
|
||||||
|
U8(u8),
|
||||||
|
I8(i8),
|
||||||
|
U16(u16),
|
||||||
|
I16(i16),
|
||||||
|
U32(u32),
|
||||||
|
I32(i32),
|
||||||
|
F32(f32),
|
||||||
|
Bool(bool),
|
||||||
|
String(String),
|
||||||
|
Array(Vec<Value>),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Value {
|
||||||
|
fn read<R: std::io::Read>(reader: &mut R, value_type: ValueType) -> Result<Self> {
|
||||||
|
let v = match value_type {
|
||||||
|
ValueType::U8 => Self::U8(reader.read_u8()?),
|
||||||
|
ValueType::I8 => Self::I8(reader.read_i8()?),
|
||||||
|
ValueType::U16 => Self::U16(reader.read_u16::<LittleEndian>()?),
|
||||||
|
ValueType::I16 => Self::I16(reader.read_i16::<LittleEndian>()?),
|
||||||
|
ValueType::U32 => Self::U32(reader.read_u32::<LittleEndian>()?),
|
||||||
|
ValueType::I32 => Self::I32(reader.read_i32::<LittleEndian>()?),
|
||||||
|
ValueType::F32 => Self::F32(reader.read_f32::<LittleEndian>()?),
|
||||||
|
ValueType::Bool => match reader.read_u8()? {
|
||||||
|
0 => Self::Bool(false),
|
||||||
|
1 => Self::Bool(true),
|
||||||
|
b => crate::bail!("unexpected bool value {b}"),
|
||||||
|
},
|
||||||
|
ValueType::String => Self::String(read_string(reader)?),
|
||||||
|
ValueType::Array => {
|
||||||
|
let value_type = reader.read_u32::<LittleEndian>()?;
|
||||||
|
let value_type = ValueType::from_u32(value_type)?;
|
||||||
|
let len = reader.read_u32::<LittleEndian>()? as usize;
|
||||||
|
let mut vs = Vec::with_capacity(len);
|
||||||
|
for _ in 0..len {
|
||||||
|
vs.push(Value::read(reader, value_type)?)
|
||||||
|
}
|
||||||
|
Self::Array(vs)
|
||||||
|
}
|
||||||
|
};
|
||||||
|
Ok(v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ValueType {
|
||||||
|
fn from_u32(v: u32) -> Result<Self> {
|
||||||
|
let v = match v {
|
||||||
|
0 => Self::U8,
|
||||||
|
1 => Self::I8,
|
||||||
|
2 => Self::U16,
|
||||||
|
3 => Self::I16,
|
||||||
|
4 => Self::U32,
|
||||||
|
5 => Self::I32,
|
||||||
|
6 => Self::F32,
|
||||||
|
7 => Self::Bool,
|
||||||
|
8 => Self::String,
|
||||||
|
9 => Self::Array,
|
||||||
|
v => crate::bail!("unrecognized value-type {v}"),
|
||||||
|
};
|
||||||
|
Ok(v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Content {
|
||||||
|
pub fn read<R: std::io::Seek + std::io::Read>(reader: &mut R) -> Result<Self> {
|
||||||
|
let magic = VersionedMagic::read(reader)?;
|
||||||
|
let tensor_count = reader.read_u32::<LittleEndian>()? as usize;
|
||||||
|
let metadata_kv_count = reader.read_u32::<LittleEndian>()?;
|
||||||
|
let mut metadata = HashMap::new();
|
||||||
|
for _idx in 0..metadata_kv_count {
|
||||||
|
let key = read_string(reader)?;
|
||||||
|
let value_type = reader.read_u32::<LittleEndian>()?;
|
||||||
|
let value_type = ValueType::from_u32(value_type)?;
|
||||||
|
let value = Value::read(reader, value_type)?;
|
||||||
|
metadata.insert(key, value);
|
||||||
|
}
|
||||||
|
let mut tensor_infos = HashMap::new();
|
||||||
|
for _idx in 0..tensor_count {
|
||||||
|
let tensor_name = read_string(reader)?;
|
||||||
|
let n_dimensions = reader.read_u32::<LittleEndian>()?;
|
||||||
|
let mut dimensions = vec![0u32; n_dimensions as usize];
|
||||||
|
reader.read_u32_into::<LittleEndian>(&mut dimensions)?;
|
||||||
|
dimensions.reverse();
|
||||||
|
let dimensions: Vec<usize> = dimensions.into_iter().map(|c| c as usize).collect();
|
||||||
|
let ggml_dtype = reader.read_u32::<LittleEndian>()?;
|
||||||
|
let ggml_dtype = GgmlDType::from_u32(ggml_dtype)?;
|
||||||
|
let offset = reader.read_u64::<LittleEndian>()?;
|
||||||
|
tensor_infos.insert(
|
||||||
|
tensor_name,
|
||||||
|
TensorInfo {
|
||||||
|
shape: crate::Shape::from(dimensions),
|
||||||
|
offset,
|
||||||
|
ggml_dtype,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
let position = reader.stream_position()?;
|
||||||
|
let alignment = DEFAULT_ALIGNMENT as u64;
|
||||||
|
let tensor_data_offset = (position + alignment - 1) / alignment * alignment;
|
||||||
|
Ok(Self {
|
||||||
|
magic,
|
||||||
|
metadata,
|
||||||
|
tensor_infos,
|
||||||
|
tensor_data_offset,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
@ -3,6 +3,7 @@ use crate::{Device, Result, Shape, Tensor};
|
|||||||
#[cfg(target_feature = "avx")]
|
#[cfg(target_feature = "avx")]
|
||||||
pub mod avx;
|
pub mod avx;
|
||||||
pub mod ggml_file;
|
pub mod ggml_file;
|
||||||
|
pub mod gguf_file;
|
||||||
pub mod k_quants;
|
pub mod k_quants;
|
||||||
#[cfg(target_feature = "neon")]
|
#[cfg(target_feature = "neon")]
|
||||||
pub mod neon;
|
pub mod neon;
|
||||||
|
Reference in New Issue
Block a user