mirror of
https://github.com/huggingface/candle.git
synced 2025-06-19 11:56:45 +00:00
Quantized version of mistral. (#1009)
* Quantized version of mistral. * Integrate the quantized mistral variant. * Use the quantized weight files. * Tweak the quantization command. * Fix the dtype when computing the rotary embeddings. * Update the readme with the quantized version. * Fix the decoding of the remaining tokens.
This commit is contained in:
@ -6,18 +6,18 @@ use std::sync::Arc;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct Config {
|
||||
vocab_size: usize,
|
||||
hidden_size: usize,
|
||||
intermediate_size: usize,
|
||||
num_hidden_layers: usize,
|
||||
num_attention_heads: usize,
|
||||
num_key_value_heads: usize,
|
||||
hidden_act: Activation,
|
||||
max_position_embeddings: usize,
|
||||
rms_norm_eps: f64,
|
||||
rope_theta: f64,
|
||||
sliding_window: usize,
|
||||
use_flash_attn: bool,
|
||||
pub(crate) vocab_size: usize,
|
||||
pub(crate) hidden_size: usize,
|
||||
pub(crate) intermediate_size: usize,
|
||||
pub(crate) num_hidden_layers: usize,
|
||||
pub(crate) num_attention_heads: usize,
|
||||
pub(crate) num_key_value_heads: usize,
|
||||
pub(crate) hidden_act: Activation,
|
||||
pub(crate) max_position_embeddings: usize,
|
||||
pub(crate) rms_norm_eps: f64,
|
||||
pub(crate) rope_theta: f64,
|
||||
pub(crate) sliding_window: usize,
|
||||
pub(crate) use_flash_attn: bool,
|
||||
}
|
||||
|
||||
impl Config {
|
||||
|
Reference in New Issue
Block a user