Add OpenChat 3.5 to quantized examples (#1346)

* Add OpenChat to quantized examples

* Add chat prompt

* Make the openchat example more in line with the other models.

* Fix a typo.

---------

Co-authored-by: laurent <laurent.mazare@gmail.com>
This commit is contained in:
Lucas de Ávila Martins
2023-11-19 15:28:52 -03:00
committed by GitHub
parent 8d8f48c60c
commit 992a788da1

View File

@ -53,6 +53,8 @@ enum Which {
Zephyr7bAlpha, Zephyr7bAlpha,
#[value(name = "7b-zephyr-b")] #[value(name = "7b-zephyr-b")]
Zephyr7bBeta, Zephyr7bBeta,
#[value(name = "7b-open-chat-3.5")]
OpenChat35,
} }
impl Which { impl Which {
@ -67,8 +69,10 @@ impl Which {
| Self::L7bCode | Self::L7bCode
| Self::L13bCode | Self::L13bCode
| Self::L34bCode => false, | Self::L34bCode => false,
// Zephyr is a fine tuned version of mistral and should be treated in the same way. // Zephyr and OpenChat are fine tuned versions of mistral and should be treated in the
Self::Zephyr7bAlpha // same way.
Self::OpenChat35
| Self::Zephyr7bAlpha
| Self::Zephyr7bBeta | Self::Zephyr7bBeta
| Self::Mistral7b | Self::Mistral7b
| Self::Mistral7bInstruct => true, | Self::Mistral7bInstruct => true,
@ -87,10 +91,30 @@ impl Which {
| Self::L13bCode | Self::L13bCode
| Self::L34bCode | Self::L34bCode
| Self::Mistral7b | Self::Mistral7b
| Self::Mistral7bInstruct => false, | Self::Mistral7bInstruct
| Self::OpenChat35 => false,
Self::Zephyr7bAlpha | Self::Zephyr7bBeta => true, Self::Zephyr7bAlpha | Self::Zephyr7bBeta => true,
} }
} }
fn is_open_chat(&self) -> bool {
match self {
Which::L7b
| Which::L13b
| Which::L70b
| Which::L7bChat
| Which::L13bChat
| Which::L70bChat
| Which::L7bCode
| Which::L13bCode
| Which::L34bCode
| Which::Mistral7b
| Which::Mistral7bInstruct
| Which::Zephyr7bAlpha
| Which::Zephyr7bBeta => false,
Which::OpenChat35 => true,
}
}
} }
#[derive(Parser, Debug)] #[derive(Parser, Debug)]
@ -207,6 +231,7 @@ impl Args {
Which::Zephyr7bBeta => { Which::Zephyr7bBeta => {
("TheBloke/zephyr-7B-beta-GGUF", "zephyr-7b-beta.Q4_K_M.gguf") ("TheBloke/zephyr-7B-beta-GGUF", "zephyr-7b-beta.Q4_K_M.gguf")
} }
Which::OpenChat35 => ("TheBloke/openchat_3.5-GGUF", "openchat_3.5.Q4_K_M.gguf"),
}; };
let api = hf_hub::api::sync::Api::new()?; let api = hf_hub::api::sync::Api::new()?;
let api = api.model(repo.to_string()); let api = api.model(repo.to_string());
@ -308,7 +333,8 @@ fn main() -> anyhow::Result<()> {
| Which::Zephyr7bAlpha | Which::Zephyr7bAlpha
| Which::Zephyr7bBeta | Which::Zephyr7bBeta
| Which::L70b | Which::L70b
| Which::L70bChat => 8, | Which::L70bChat
| Which::OpenChat35 => 8,
}; };
ModelWeights::from_ggml(model, args.gqa.unwrap_or(default_gqa))? ModelWeights::from_ggml(model, args.gqa.unwrap_or(default_gqa))?
} }
@ -340,7 +366,9 @@ fn main() -> anyhow::Result<()> {
prompt.pop(); prompt.pop();
} }
} }
if args.which.is_zephyr() { if args.which.is_open_chat() {
format!("User: {prompt}<|end_of_turn|>Assistant: ")
} else if args.which.is_zephyr() {
if prompt_index == 0 || is_interactive { if prompt_index == 0 || is_interactive {
format!("<|system|>\n</s>\n<|user|>\n{prompt}</s>\n<|assistant|>",) format!("<|system|>\n</s>\n<|user|>\n{prompt}</s>\n<|assistant|>",)
} else { } else {
@ -390,8 +418,12 @@ fn main() -> anyhow::Result<()> {
std::io::stdout().flush()?; std::io::stdout().flush()?;
} }
let eos_token = *tos.tokenizer().get_vocab(true).get("</s>").unwrap(); let eos_token = if args.which.is_open_chat() {
"<|end_of_turn|>"
} else {
"</s>"
};
let eos_token = *tos.tokenizer().get_vocab(true).get(eos_token).unwrap();
let start_post_prompt = std::time::Instant::now(); let start_post_prompt = std::time::Instant::now();
let mut sampled = 0; let mut sampled = 0;
for index in 0..to_sample { for index in 0..to_sample {