mirror of
https://github.com/huggingface/candle.git
synced 2025-06-16 18:48:51 +00:00
Add OpenChat 3.5 to quantized examples (#1346)
* Add OpenChat to quantized examples * Add chat prompt * Make the openchat example more in line with the other models. * Fix a typo. --------- Co-authored-by: laurent <laurent.mazare@gmail.com>
This commit is contained in:

committed by
GitHub

parent
8d8f48c60c
commit
992a788da1
@ -53,6 +53,8 @@ enum Which {
|
||||
Zephyr7bAlpha,
|
||||
#[value(name = "7b-zephyr-b")]
|
||||
Zephyr7bBeta,
|
||||
#[value(name = "7b-open-chat-3.5")]
|
||||
OpenChat35,
|
||||
}
|
||||
|
||||
impl Which {
|
||||
@ -67,8 +69,10 @@ impl Which {
|
||||
| Self::L7bCode
|
||||
| Self::L13bCode
|
||||
| Self::L34bCode => false,
|
||||
// Zephyr is a fine tuned version of mistral and should be treated in the same way.
|
||||
Self::Zephyr7bAlpha
|
||||
// Zephyr and OpenChat are fine tuned versions of mistral and should be treated in the
|
||||
// same way.
|
||||
Self::OpenChat35
|
||||
| Self::Zephyr7bAlpha
|
||||
| Self::Zephyr7bBeta
|
||||
| Self::Mistral7b
|
||||
| Self::Mistral7bInstruct => true,
|
||||
@ -87,10 +91,30 @@ impl Which {
|
||||
| Self::L13bCode
|
||||
| Self::L34bCode
|
||||
| Self::Mistral7b
|
||||
| Self::Mistral7bInstruct => false,
|
||||
| Self::Mistral7bInstruct
|
||||
| Self::OpenChat35 => false,
|
||||
Self::Zephyr7bAlpha | Self::Zephyr7bBeta => true,
|
||||
}
|
||||
}
|
||||
|
||||
fn is_open_chat(&self) -> bool {
|
||||
match self {
|
||||
Which::L7b
|
||||
| Which::L13b
|
||||
| Which::L70b
|
||||
| Which::L7bChat
|
||||
| Which::L13bChat
|
||||
| Which::L70bChat
|
||||
| Which::L7bCode
|
||||
| Which::L13bCode
|
||||
| Which::L34bCode
|
||||
| Which::Mistral7b
|
||||
| Which::Mistral7bInstruct
|
||||
| Which::Zephyr7bAlpha
|
||||
| Which::Zephyr7bBeta => false,
|
||||
Which::OpenChat35 => true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
@ -207,6 +231,7 @@ impl Args {
|
||||
Which::Zephyr7bBeta => {
|
||||
("TheBloke/zephyr-7B-beta-GGUF", "zephyr-7b-beta.Q4_K_M.gguf")
|
||||
}
|
||||
Which::OpenChat35 => ("TheBloke/openchat_3.5-GGUF", "openchat_3.5.Q4_K_M.gguf"),
|
||||
};
|
||||
let api = hf_hub::api::sync::Api::new()?;
|
||||
let api = api.model(repo.to_string());
|
||||
@ -308,7 +333,8 @@ fn main() -> anyhow::Result<()> {
|
||||
| Which::Zephyr7bAlpha
|
||||
| Which::Zephyr7bBeta
|
||||
| Which::L70b
|
||||
| Which::L70bChat => 8,
|
||||
| Which::L70bChat
|
||||
| Which::OpenChat35 => 8,
|
||||
};
|
||||
ModelWeights::from_ggml(model, args.gqa.unwrap_or(default_gqa))?
|
||||
}
|
||||
@ -340,7 +366,9 @@ fn main() -> anyhow::Result<()> {
|
||||
prompt.pop();
|
||||
}
|
||||
}
|
||||
if args.which.is_zephyr() {
|
||||
if args.which.is_open_chat() {
|
||||
format!("User: {prompt}<|end_of_turn|>Assistant: ")
|
||||
} else if args.which.is_zephyr() {
|
||||
if prompt_index == 0 || is_interactive {
|
||||
format!("<|system|>\n</s>\n<|user|>\n{prompt}</s>\n<|assistant|>",)
|
||||
} else {
|
||||
@ -390,8 +418,12 @@ fn main() -> anyhow::Result<()> {
|
||||
std::io::stdout().flush()?;
|
||||
}
|
||||
|
||||
let eos_token = *tos.tokenizer().get_vocab(true).get("</s>").unwrap();
|
||||
|
||||
let eos_token = if args.which.is_open_chat() {
|
||||
"<|end_of_turn|>"
|
||||
} else {
|
||||
"</s>"
|
||||
};
|
||||
let eos_token = *tos.tokenizer().get_vocab(true).get(eos_token).unwrap();
|
||||
let start_post_prompt = std::time::Instant::now();
|
||||
let mut sampled = 0;
|
||||
for index in 0..to_sample {
|
||||
|
Reference in New Issue
Block a user