mirror of
https://github.com/huggingface/candle.git
synced 2025-06-17 19:18:50 +00:00
T5 quantized example (#922)
* Load gguf files for the quantized t5. * Add the quantized t5 example. * Allow for loading local files. * Add some support for quantizing safetensor files. * Transpose before quantizing. * Quantized t5. * Retrieve the weights from the hub.
This commit is contained in:
@ -15,6 +15,21 @@ pub struct VarBuilder {
|
||||
}
|
||||
|
||||
impl VarBuilder {
|
||||
pub fn from_gguf<P: AsRef<std::path::Path>>(p: P) -> Result<Self> {
|
||||
let mut file = std::fs::File::open(p)?;
|
||||
let content = candle::quantized::gguf_file::Content::read(&mut file)?;
|
||||
let mut data = std::collections::HashMap::new();
|
||||
for tensor_name in content.tensor_infos.keys() {
|
||||
let tensor = content.tensor(&mut file, tensor_name)?;
|
||||
data.insert(tensor_name.to_string(), Arc::new(tensor));
|
||||
}
|
||||
Ok(Self {
|
||||
data: Arc::new(data),
|
||||
path: Vec::new(),
|
||||
device: Device::Cpu,
|
||||
})
|
||||
}
|
||||
|
||||
fn pp<S: ToString>(&self, s: S) -> Self {
|
||||
let mut path = self.path.clone();
|
||||
path.push(s.to_string());
|
||||
@ -87,7 +102,7 @@ struct QMatMul {
|
||||
|
||||
impl QMatMul {
|
||||
fn new(out_dim: usize, in_dim: usize, vb: VarBuilder) -> Result<Self> {
|
||||
let ws = vb.get((out_dim, in_dim), "weight")?;
|
||||
let ws = vb.get((in_dim, out_dim), "weight")?;
|
||||
let inner = candle::quantized::QMatMul::from_arc(ws);
|
||||
let span = tracing::span!(tracing::Level::TRACE, "qmatmul");
|
||||
Ok(Self { inner, span })
|
||||
|
Reference in New Issue
Block a user