* init t5 wasm model

* split workers for each model

* clean up

* add some ui

* readme

* index

* typo

* remove cache param, clear_kv_cache

* add max_length as param

* add model tasks option to ui

* add method to load quantized gguf from buffer

* Add quantized wasm module

* add quantized models to UI, dynamic import wasms

* link to quantized

* fix copy

* fix ModelEncoder

* fix README.md
This commit is contained in:
Radamés Ajna
2023-09-22 07:31:10 -07:00
committed by GitHub
parent 8601537e31
commit 19e52e5007
12 changed files with 1131 additions and 0 deletions

View File

@ -30,6 +30,21 @@ impl VarBuilder {
})
}
pub fn from_gguf_buffer(buffer: &[u8]) -> Result<Self> {
let mut cursor = std::io::Cursor::new(buffer);
let content = candle::quantized::gguf_file::Content::read(&mut cursor)?;
let mut data = std::collections::HashMap::new();
for tensor_name in content.tensor_infos.keys() {
let tensor = content.tensor(&mut cursor, tensor_name)?;
data.insert(tensor_name.to_string(), Arc::new(tensor));
}
Ok(Self {
data: Arc::new(data),
path: Vec::new(),
device: Device::Cpu,
})
}
fn pp<S: ToString>(&self, s: S) -> Self {
let mut path = self.path.clone();
path.push(s.to_string());