Added DeepseekR1 Qwen7B variant to quantized-qwen2-instruct example (#2843)

* quantized deepseek qwen generating tokens

* removed is_deepseek from Args and replaced prompt if statement with pattern matching
This commit is contained in:
Kyle Birnbaum
2025-03-30 01:54:22 -07:00
committed by GitHub
parent 59c26195db
commit ba473290da

View File

@ -27,6 +27,8 @@ enum Which {
W2_7b, W2_7b,
#[value(name = "72b")] #[value(name = "72b")]
W2_72b, W2_72b,
#[value(name = "deepseekr1-qwen7b")]
DeepseekR1Qwen7B,
} }
#[derive(Parser, Debug)] #[derive(Parser, Debug)]
@ -102,6 +104,7 @@ impl Args {
Which::W2_1_5b => "Qwen/Qwen2-1.5B-Instruct", Which::W2_1_5b => "Qwen/Qwen2-1.5B-Instruct",
Which::W2_7b => "Qwen/Qwen2-7B-Instruct", Which::W2_7b => "Qwen/Qwen2-7B-Instruct",
Which::W2_72b => "Qwen/Qwen2-72B-Instruct", Which::W2_72b => "Qwen/Qwen2-72B-Instruct",
Which::DeepseekR1Qwen7B => "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
}; };
let api = api.model(repo.to_string()); let api = api.model(repo.to_string());
api.get("tokenizer.json")? api.get("tokenizer.json")?
@ -135,6 +138,11 @@ impl Args {
"qwen2-72b-instruct-q4_0.gguf", "qwen2-72b-instruct-q4_0.gguf",
"main", "main",
), ),
Which::DeepseekR1Qwen7B => (
"unsloth/DeepSeek-R1-Distill-Qwen-7B-GGUF",
"DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf",
"main",
),
}; };
let api = hf_hub::api::sync::Api::new()?; let api = hf_hub::api::sync::Api::new()?;
api.repo(hf_hub::Repo::with_revision( api.repo(hf_hub::Repo::with_revision(
@ -211,11 +219,15 @@ fn main() -> anyhow::Result<()> {
let tokenizer = args.tokenizer()?; let tokenizer = args.tokenizer()?;
let mut tos = TokenOutputStream::new(tokenizer); let mut tos = TokenOutputStream::new(tokenizer);
let prompt_str = args.prompt.unwrap_or_else(|| DEFAULT_PROMPT.to_string()); let prompt_str = args
let prompt_str = format!( .prompt
"<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n", .clone()
prompt_str .unwrap_or_else(|| DEFAULT_PROMPT.to_string());
);
let prompt_str = match args.which {
Which::DeepseekR1Qwen7B => format!("<User>{prompt_str}<Assistant>"),
_ => format!("<|im_start|>user\n{prompt_str}<|im_end|>\n<|im_start|>assistant\n"),
};
print!("formatted instruct prompt: {}", &prompt_str); print!("formatted instruct prompt: {}", &prompt_str);
let tokens = tos let tokens = tos
.tokenizer() .tokenizer()
@ -260,7 +272,13 @@ fn main() -> anyhow::Result<()> {
print!("{t}"); print!("{t}");
std::io::stdout().flush()?; std::io::stdout().flush()?;
} }
let eos_token = *tos.tokenizer().get_vocab(true).get("<|im_end|>").unwrap();
let eos_token = match args.which {
Which::DeepseekR1Qwen7B => "<end▁of▁sentence>",
_ => "<|im_end|>",
};
let eos_token = *tos.tokenizer().get_vocab(true).get(eos_token).unwrap();
let start_post_prompt = std::time::Instant::now(); let start_post_prompt = std::time::Instant::now();
let mut sampled = 0; let mut sampled = 0;
for index in 0..to_sample { for index in 0..to_sample {