mirror of
https://github.com/huggingface/candle.git
synced 2025-06-16 18:48:51 +00:00
Quantized version of mistral. (#1009)
* Quantized version of mistral. * Integrate the quantized mistral variant. * Use the quantized weight files. * Tweak the quantization command. * Fix the dtype when computing the rotary embeddings. * Update the readme with the quantized version. * Fix the decoding of the remaining tokens.
This commit is contained in:
@ -50,8 +50,20 @@ impl TokenOutputStream {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn decode_rest(&self) -> Result<String> {
|
||||
self.decode(&self.tokens[self.prev_index..])
|
||||
pub fn decode_rest(&self) -> Result<Option<String>> {
|
||||
let prev_text = if self.tokens.is_empty() {
|
||||
String::new()
|
||||
} else {
|
||||
let tokens = &self.tokens[self.prev_index..self.current_index];
|
||||
self.decode(tokens)?
|
||||
};
|
||||
let text = self.decode(&self.tokens[self.prev_index..])?;
|
||||
if text.len() > prev_text.len() {
|
||||
let text = text.split_at(prev_text.len());
|
||||
Ok(Some(text.1.to_string()))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn decode_all(&self) -> Result<String> {
|
||||
|
Reference in New Issue
Block a user