Add a function to write gguf files. (#585)

* Add a function to write gguf files.

* More GGUF file writing.

* Write the tensor data in GGUF files.
This commit is contained in:
Laurent Mazare
2023-08-24 17:03:06 +01:00
committed by GitHub
parent a87c6f7652
commit c265ac50fa
2 changed files with 163 additions and 4 deletions

View File

@ -16,7 +16,7 @@ pub struct QTensor {
shape: Shape,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum GgmlDType {
F32,
F16,
@ -56,6 +56,25 @@ impl GgmlDType {
Ok(dtype)
}
pub(crate) fn to_u32(self) -> u32 {
match self {
Self::F32 => 0,
Self::F16 => 1,
Self::Q4_0 => 2,
Self::Q4_1 => 3,
Self::Q5_0 => 6,
Self::Q5_1 => 7,
Self::Q8_0 => 8,
Self::Q8_1 => 9,
Self::Q2K => 10,
Self::Q3K => 11,
Self::Q4K => 12,
Self::Q5K => 13,
Self::Q6K => 14,
Self::Q8K => 15,
}
}
/// The type size for blocks in bytes.
pub fn type_size(&self) -> usize {
use k_quants::*;
@ -99,6 +118,8 @@ pub trait QuantizedType: Send + Sync {
fn dtype(&self) -> GgmlDType;
fn matmul_t(&self, mkn: (usize, usize, usize), lhs: &[f32], dst: &mut [f32]) -> Result<()>;
fn to_float(&self, ys: &mut [f32]) -> Result<()>;
fn storage_size_in_bytes(&self) -> usize;
fn as_ptr(&self) -> *const u8;
}
impl<T: k_quants::GgmlType + Send + Sync> QuantizedType for Vec<T> {
@ -113,6 +134,14 @@ impl<T: k_quants::GgmlType + Send + Sync> QuantizedType for Vec<T> {
fn to_float(&self, ys: &mut [f32]) -> Result<()> {
T::to_float(self.as_slice(), ys)
}
fn storage_size_in_bytes(&self) -> usize {
self.len() * std::mem::size_of::<T>()
}
fn as_ptr(&self) -> *const u8 {
self.as_ptr() as *const u8
}
}
impl std::fmt::Debug for QTensor {
@ -186,6 +215,14 @@ impl QTensor {
pub fn matmul_t(&self, mkn: (usize, usize, usize), lhs: &[f32], dst: &mut [f32]) -> Result<()> {
self.data.matmul_t(mkn, lhs, dst)
}
pub fn storage_size_in_bytes(&self) -> usize {
self.data.storage_size_in_bytes()
}
pub fn as_ptr(&self) -> *const u8 {
self.data.as_ptr()
}
}
#[derive(Debug)]