From 64296090907922aeaf5e647017197a8c8de6dce4 Mon Sep 17 00:00:00 2001
From: Kyle Birnbaum <kb@huggingface.co>
Date: Sun, 30 Mar 2025 01:55:21 -0700
Subject: [PATCH] Added Deepseekr1 Llama8b variant to quantized example (#2842)

* added deepseekr1 llama8b variant to quantized example

* lint
---
 candle-examples/examples/quantized/main.rs | 49 ++++++++++++++++++++--
 1 file changed, 46 insertions(+), 3 deletions(-)

diff --git a/candle-examples/examples/quantized/main.rs b/candle-examples/examples/quantized/main.rs
index 2b537aac..abd4b389 100644
--- a/candle-examples/examples/quantized/main.rs
+++ b/candle-examples/examples/quantized/main.rs
@@ -75,6 +75,8 @@ enum Which {
     SmolLM2_360MInstruct,
     #[value(name = "SmoLM2-1.7B-Instruct")]
     SmolLM2_1BInstruct,
+    #[value(name = "deepseekr1-llama8b")]
+    DeepseekR1Llama8b,
 }
 
 impl Which {
@@ -94,7 +96,8 @@ impl Which {
             | Self::L8b
             | Self::Phi3
             | Self::SmolLM2_1BInstruct
-            | Self::SmolLM2_360MInstruct => false,
+            | Self::SmolLM2_360MInstruct
+            | Self::DeepseekR1Llama8b => false,
             // Zephyr and OpenChat are fine tuned versions of mistral and should be treated in the
             // same way. Starling is a fine tuned version of OpenChat.
             Self::OpenChat35
@@ -132,7 +135,8 @@ impl Which {
             | Self::L8b
             | Self::SmolLM2_1BInstruct
             | Self::SmolLM2_360MInstruct
-            | Self::Phi3 => false,
+            | Self::Phi3
+            | Self::DeepseekR1Llama8b => false,
             Self::Zephyr7bAlpha | Self::Zephyr7bBeta => true,
         }
     }
@@ -160,11 +164,41 @@ impl Which {
             | Self::L8b
             | Self::SmolLM2_1BInstruct
             | Self::SmolLM2_360MInstruct
-            | Self::Phi3 => false,
+            | Self::Phi3
+            | Self::DeepseekR1Llama8b => false,
             Self::OpenChat35 | Self::Starling7bAlpha => true,
         }
     }
 
+    fn is_deepseek(&self) -> bool {
+        match self {
+            Self::L7b
+            | Self::L13b
+            | Self::L70b
+            | Self::L7bChat
+            | Self::L13bChat
+            | Self::L70bChat
+            | Self::L7bCode
+            | Self::L13bCode
+            | Self::L34bCode
+            | Self::Leo7b
+            | Self::Leo13b
+            | Self::Mixtral
+            | Self::MixtralInstruct
+            | Self::Mistral7b
+            | Self::Mistral7bInstruct
+            | Self::Mistral7bInstructV02
+            | Self::Zephyr7bAlpha
+            | Self::Zephyr7bBeta
+            | Self::L8b
+            | Self::SmolLM2_1BInstruct
+            | Self::SmolLM2_360MInstruct
+            | Self::Phi3
+            | Self::OpenChat35
+            | Self::Starling7bAlpha => false,
+            Self::DeepseekR1Llama8b => true,
+        }
+    }
     fn tokenizer_repo(&self) -> &'static str {
         match self {
             Self::L7b
@@ -191,6 +225,7 @@ impl Which {
             Self::Phi3 => "microsoft/Phi-3-mini-4k-instruct",
             Self::SmolLM2_360MInstruct => "HuggingFaceTB/SmolLM2-360M-Instruct",
             Self::SmolLM2_1BInstruct => "HuggingFaceTB/SmolLM2-1.7B-Instruct",
+            Self::DeepseekR1Llama8b => "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
         }
     }
 }
@@ -363,6 +398,10 @@ impl Args {
                         "HuggingFaceTB/SmolLM2-1.7B-Instruct-GGUF",
                         "smollm2-1.7b-instruct-q4_k_m.gguf",
                     ),
+                    Which::DeepseekR1Llama8b => (
+                        "unsloth/DeepSeek-R1-Distill-Llama-8B-GGUF",
+                        "DeepSeek-R1-Distill-Llama-8B-Q4_K_M.gguf",
+                    ),
                 };
                 let revision = if self.which == Which::Phi3 {
                     "5eef2ce24766d31909c0b269fe90c817a8f263fb"
@@ -477,6 +516,7 @@ fn main() -> anyhow::Result<()> {
                 | Which::L8b
                 | Which::SmolLM2_1BInstruct
                 | Which::SmolLM2_360MInstruct
+                | Which::DeepseekR1Llama8b
                 | Which::Phi3 => 1,
                 Which::Mixtral
                 | Which::MixtralInstruct
@@ -530,6 +570,8 @@ fn main() -> anyhow::Result<()> {
                     }
                 } else if args.which.is_mistral() {
                     format!("[INST] {prompt} [/INST]")
+                } else if args.which.is_deepseek() {
+                    format!("<｜User｜>{prompt}<｜Assistant｜>")
                 } else {
                     prompt
                 }
@@ -597,6 +639,7 @@ fn main() -> anyhow::Result<()> {
         let eos_token = match args.which {
             Which::SmolLM2_360MInstruct | Which::SmolLM2_1BInstruct => "<|endoftext|>",
             Which::L8b => "<|end_of_text|>",
+            Which::DeepseekR1Llama8b => "<｜end▁of▁sentence｜>",
             _ => match args.which.is_open_chat() {
                 true => "<|end_of_turn|>",
                 false => "</s>",