mirror of
https://github.com/huggingface/candle.git
synced 2025-06-16 10:38:54 +00:00
Quantized mixtral model (#1442)
* Add the Mixtral model. * Add more of the mixtral layers. * Add the final layers for mixtral. * Sketch the expert selection. * Add some expert routing logic. * Hopefully finish the routing logic for mixtral. * Add the mixtral example. * Fix the weight filenames. * Bugfix. * Another fix. * Yet another fix + remove the unused pragma. * Shape fix. * Support for quantized mixtral. * Support mixtral in the quantized example. * Mlp or moe type. * Fix the expert field namings. * Refactor the mlp bit. * More MoE logic. * Add the MoE quantized logic. * Fix the experts length.
This commit is contained in:
@ -61,6 +61,8 @@ enum Which {
|
||||
OpenChat35,
|
||||
#[value(name = "7b-starling-a")]
|
||||
Starling7bAlpha,
|
||||
#[value(name = "mixtral")]
|
||||
Mixtral,
|
||||
}
|
||||
|
||||
impl Which {
|
||||
@ -83,6 +85,7 @@ impl Which {
|
||||
| Self::Starling7bAlpha
|
||||
| Self::Zephyr7bAlpha
|
||||
| Self::Zephyr7bBeta
|
||||
| Self::Mixtral
|
||||
| Self::Mistral7b
|
||||
| Self::Mistral7bInstruct => true,
|
||||
}
|
||||
@ -101,6 +104,7 @@ impl Which {
|
||||
| Self::L34bCode
|
||||
| Self::Leo7b
|
||||
| Self::Leo13b
|
||||
| Self::Mixtral
|
||||
| Self::Mistral7b
|
||||
| Self::Mistral7bInstruct
|
||||
| Self::OpenChat35
|
||||
@ -122,6 +126,7 @@ impl Which {
|
||||
| Self::L34bCode
|
||||
| Self::Leo7b
|
||||
| Self::Leo13b
|
||||
| Self::Mixtral
|
||||
| Self::Mistral7b
|
||||
| Self::Mistral7bInstruct
|
||||
| Self::Zephyr7bAlpha
|
||||
@ -143,6 +148,7 @@ impl Which {
|
||||
| Which::L34bCode => "hf-internal-testing/llama-tokenizer",
|
||||
Which::Leo7b => "LeoLM/leo-hessianai-7b",
|
||||
Which::Leo13b => "LeoLM/leo-hessianai-13b",
|
||||
Which::Mixtral => "mistralai/Mixtral-8x7B-v0.1",
|
||||
Which::Mistral7b
|
||||
| Which::Mistral7bInstruct
|
||||
| Which::Zephyr7bAlpha
|
||||
@ -256,6 +262,10 @@ impl Args {
|
||||
"TheBloke/leo-hessianai-13B-GGUF",
|
||||
"leo-hessianai-13b.Q4_K_M.gguf",
|
||||
),
|
||||
Which::Mixtral => (
|
||||
"TheBloke/Mixtral-8x7B-v0.1-GGUF",
|
||||
"mixtral-8x7b-v0.1.Q4_K_M.gguf",
|
||||
),
|
||||
Which::Mistral7b => (
|
||||
"TheBloke/Mistral-7B-v0.1-GGUF",
|
||||
"mistral-7b-v0.1.Q4_K_S.gguf",
|
||||
@ -374,7 +384,8 @@ fn main() -> anyhow::Result<()> {
|
||||
| Which::L34bCode
|
||||
| Which::Leo7b
|
||||
| Which::Leo13b => 1,
|
||||
Which::Mistral7b
|
||||
Which::Mixtral
|
||||
| Which::Mistral7b
|
||||
| Which::Mistral7bInstruct
|
||||
| Which::Zephyr7bAlpha
|
||||
| Which::Zephyr7bBeta
|
||||
|
Reference in New Issue
Block a user