mirror of
https://github.com/huggingface/candle.git
synced 2025-06-16 10:38:54 +00:00
Added DeepseekR1 Qwen7B variant to quantized-qwen2-instruct example (#2843)
* quantized deepseek qwen generating tokens * removed is_deepseek from Args and replaced prompt if statement with pattern matching
This commit is contained in:
@ -27,6 +27,8 @@ enum Which {
|
|||||||
W2_7b,
|
W2_7b,
|
||||||
#[value(name = "72b")]
|
#[value(name = "72b")]
|
||||||
W2_72b,
|
W2_72b,
|
||||||
|
#[value(name = "deepseekr1-qwen7b")]
|
||||||
|
DeepseekR1Qwen7B,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Parser, Debug)]
|
#[derive(Parser, Debug)]
|
||||||
@ -102,6 +104,7 @@ impl Args {
|
|||||||
Which::W2_1_5b => "Qwen/Qwen2-1.5B-Instruct",
|
Which::W2_1_5b => "Qwen/Qwen2-1.5B-Instruct",
|
||||||
Which::W2_7b => "Qwen/Qwen2-7B-Instruct",
|
Which::W2_7b => "Qwen/Qwen2-7B-Instruct",
|
||||||
Which::W2_72b => "Qwen/Qwen2-72B-Instruct",
|
Which::W2_72b => "Qwen/Qwen2-72B-Instruct",
|
||||||
|
Which::DeepseekR1Qwen7B => "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
|
||||||
};
|
};
|
||||||
let api = api.model(repo.to_string());
|
let api = api.model(repo.to_string());
|
||||||
api.get("tokenizer.json")?
|
api.get("tokenizer.json")?
|
||||||
@ -135,6 +138,11 @@ impl Args {
|
|||||||
"qwen2-72b-instruct-q4_0.gguf",
|
"qwen2-72b-instruct-q4_0.gguf",
|
||||||
"main",
|
"main",
|
||||||
),
|
),
|
||||||
|
Which::DeepseekR1Qwen7B => (
|
||||||
|
"unsloth/DeepSeek-R1-Distill-Qwen-7B-GGUF",
|
||||||
|
"DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf",
|
||||||
|
"main",
|
||||||
|
),
|
||||||
};
|
};
|
||||||
let api = hf_hub::api::sync::Api::new()?;
|
let api = hf_hub::api::sync::Api::new()?;
|
||||||
api.repo(hf_hub::Repo::with_revision(
|
api.repo(hf_hub::Repo::with_revision(
|
||||||
@ -211,11 +219,15 @@ fn main() -> anyhow::Result<()> {
|
|||||||
|
|
||||||
let tokenizer = args.tokenizer()?;
|
let tokenizer = args.tokenizer()?;
|
||||||
let mut tos = TokenOutputStream::new(tokenizer);
|
let mut tos = TokenOutputStream::new(tokenizer);
|
||||||
let prompt_str = args.prompt.unwrap_or_else(|| DEFAULT_PROMPT.to_string());
|
let prompt_str = args
|
||||||
let prompt_str = format!(
|
.prompt
|
||||||
"<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n",
|
.clone()
|
||||||
prompt_str
|
.unwrap_or_else(|| DEFAULT_PROMPT.to_string());
|
||||||
);
|
|
||||||
|
let prompt_str = match args.which {
|
||||||
|
Which::DeepseekR1Qwen7B => format!("<|User|>{prompt_str}<|Assistant|>"),
|
||||||
|
_ => format!("<|im_start|>user\n{prompt_str}<|im_end|>\n<|im_start|>assistant\n"),
|
||||||
|
};
|
||||||
print!("formatted instruct prompt: {}", &prompt_str);
|
print!("formatted instruct prompt: {}", &prompt_str);
|
||||||
let tokens = tos
|
let tokens = tos
|
||||||
.tokenizer()
|
.tokenizer()
|
||||||
@ -260,7 +272,13 @@ fn main() -> anyhow::Result<()> {
|
|||||||
print!("{t}");
|
print!("{t}");
|
||||||
std::io::stdout().flush()?;
|
std::io::stdout().flush()?;
|
||||||
}
|
}
|
||||||
let eos_token = *tos.tokenizer().get_vocab(true).get("<|im_end|>").unwrap();
|
|
||||||
|
let eos_token = match args.which {
|
||||||
|
Which::DeepseekR1Qwen7B => "<|end▁of▁sentence|>",
|
||||||
|
_ => "<|im_end|>",
|
||||||
|
};
|
||||||
|
|
||||||
|
let eos_token = *tos.tokenizer().get_vocab(true).get(eos_token).unwrap();
|
||||||
let start_post_prompt = std::time::Instant::now();
|
let start_post_prompt = std::time::Instant::now();
|
||||||
let mut sampled = 0;
|
let mut sampled = 0;
|
||||||
for index in 0..to_sample {
|
for index in 0..to_sample {
|
||||||
|
Reference in New Issue
Block a user