From 64296090907922aeaf5e647017197a8c8de6dce4 Mon Sep 17 00:00:00 2001 From: Kyle Birnbaum Date: Sun, 30 Mar 2025 01:55:21 -0700 Subject: [PATCH] Added Deepseekr1 Llama8b variant to quantized example (#2842) * added deepseekr1 llama8b variant to quantized example * lint --- candle-examples/examples/quantized/main.rs | 49 ++++++++++++++++++++-- 1 file changed, 46 insertions(+), 3 deletions(-) diff --git a/candle-examples/examples/quantized/main.rs b/candle-examples/examples/quantized/main.rs index 2b537aac..abd4b389 100644 --- a/candle-examples/examples/quantized/main.rs +++ b/candle-examples/examples/quantized/main.rs @@ -75,6 +75,8 @@ enum Which { SmolLM2_360MInstruct, #[value(name = "SmoLM2-1.7B-Instruct")] SmolLM2_1BInstruct, + #[value(name = "deepseekr1-llama8b")] + DeepseekR1Llama8b, } impl Which { @@ -94,7 +96,8 @@ impl Which { | Self::L8b | Self::Phi3 | Self::SmolLM2_1BInstruct - | Self::SmolLM2_360MInstruct => false, + | Self::SmolLM2_360MInstruct + | Self::DeepseekR1Llama8b => false, // Zephyr and OpenChat are fine tuned versions of mistral and should be treated in the // same way. Starling is a fine tuned version of OpenChat. Self::OpenChat35 @@ -132,7 +135,8 @@ impl Which { | Self::L8b | Self::SmolLM2_1BInstruct | Self::SmolLM2_360MInstruct - | Self::Phi3 => false, + | Self::Phi3 + | Self::DeepseekR1Llama8b => false, Self::Zephyr7bAlpha | Self::Zephyr7bBeta => true, } } @@ -160,11 +164,41 @@ impl Which { | Self::L8b | Self::SmolLM2_1BInstruct | Self::SmolLM2_360MInstruct - | Self::Phi3 => false, + | Self::Phi3 + | Self::DeepseekR1Llama8b => false, Self::OpenChat35 | Self::Starling7bAlpha => true, } } + fn is_deepseek(&self) -> bool { + match self { + Self::L7b + | Self::L13b + | Self::L70b + | Self::L7bChat + | Self::L13bChat + | Self::L70bChat + | Self::L7bCode + | Self::L13bCode + | Self::L34bCode + | Self::Leo7b + | Self::Leo13b + | Self::Mixtral + | Self::MixtralInstruct + | Self::Mistral7b + | Self::Mistral7bInstruct + | Self::Mistral7bInstructV02 + | Self::Zephyr7bAlpha + | Self::Zephyr7bBeta + | Self::L8b + | Self::SmolLM2_1BInstruct + | Self::SmolLM2_360MInstruct + | Self::Phi3 + | Self::OpenChat35 + | Self::Starling7bAlpha => false, + Self::DeepseekR1Llama8b => true, + } + } fn tokenizer_repo(&self) -> &'static str { match self { Self::L7b @@ -191,6 +225,7 @@ impl Which { Self::Phi3 => "microsoft/Phi-3-mini-4k-instruct", Self::SmolLM2_360MInstruct => "HuggingFaceTB/SmolLM2-360M-Instruct", Self::SmolLM2_1BInstruct => "HuggingFaceTB/SmolLM2-1.7B-Instruct", + Self::DeepseekR1Llama8b => "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", } } } @@ -363,6 +398,10 @@ impl Args { "HuggingFaceTB/SmolLM2-1.7B-Instruct-GGUF", "smollm2-1.7b-instruct-q4_k_m.gguf", ), + Which::DeepseekR1Llama8b => ( + "unsloth/DeepSeek-R1-Distill-Llama-8B-GGUF", + "DeepSeek-R1-Distill-Llama-8B-Q4_K_M.gguf", + ), }; let revision = if self.which == Which::Phi3 { "5eef2ce24766d31909c0b269fe90c817a8f263fb" @@ -477,6 +516,7 @@ fn main() -> anyhow::Result<()> { | Which::L8b | Which::SmolLM2_1BInstruct | Which::SmolLM2_360MInstruct + | Which::DeepseekR1Llama8b | Which::Phi3 => 1, Which::Mixtral | Which::MixtralInstruct @@ -530,6 +570,8 @@ fn main() -> anyhow::Result<()> { } } else if args.which.is_mistral() { format!("[INST] {prompt} [/INST]") + } else if args.which.is_deepseek() { + format!("<|User|>{prompt}<|Assistant|>") } else { prompt } @@ -597,6 +639,7 @@ fn main() -> anyhow::Result<()> { let eos_token = match args.which { Which::SmolLM2_360MInstruct | Which::SmolLM2_1BInstruct => "<|endoftext|>", Which::L8b => "<|end_of_text|>", + Which::DeepseekR1Llama8b => "<|end▁of▁sentence|>", _ => match args.which.is_open_chat() { true => "<|end_of_turn|>", false => "",