T5 quantized example (#922)

* Load gguf files for the quantized t5.

* Add the quantized t5 example.

* Allow for loading local files.

* Add some support for quantizing safetensor files.

* Transpose before quantizing.

* Quantized t5.

* Retrieve the weights from the hub.
This commit is contained in:
Laurent Mazare
2023-09-21 12:33:15 +01:00
committed by GitHub
parent 2619c4307f
commit 3b557765e8
4 changed files with 272 additions and 1 deletions

View File

@ -15,6 +15,21 @@ pub struct VarBuilder {
}
impl VarBuilder {
pub fn from_gguf<P: AsRef<std::path::Path>>(p: P) -> Result<Self> {
let mut file = std::fs::File::open(p)?;
let content = candle::quantized::gguf_file::Content::read(&mut file)?;
let mut data = std::collections::HashMap::new();
for tensor_name in content.tensor_infos.keys() {
let tensor = content.tensor(&mut file, tensor_name)?;
data.insert(tensor_name.to_string(), Arc::new(tensor));
}
Ok(Self {
data: Arc::new(data),
path: Vec::new(),
device: Device::Cpu,
})
}
fn pp<S: ToString>(&self, s: S) -> Self {
let mut path = self.path.clone();
path.push(s.to_string());
@ -87,7 +102,7 @@ struct QMatMul {
impl QMatMul {
fn new(out_dim: usize, in_dim: usize, vb: VarBuilder) -> Result<Self> {
let ws = vb.get((out_dim, in_dim), "weight")?;
let ws = vb.get((in_dim, out_dim), "weight")?;
let inner = candle::quantized::QMatMul::from_arc(ws);
let span = tracing::span!(tracing::Level::TRACE, "qmatmul");
Ok(Self { inner, span })