mirror of
https://github.com/huggingface/candle.git
synced 2025-06-16 02:38:10 +00:00
Add support for llama3 on the quantized example (#2086)
* add support for l3b, new tokenizer * add todo * Add todo and use k_s model * Use the official tokenizers. --------- Co-authored-by: laurent <laurent.mazare@gmail.com>
This commit is contained in:
@ -67,6 +67,8 @@ enum Which {
|
|||||||
Mixtral,
|
Mixtral,
|
||||||
#[value(name = "mixtral-instruct")]
|
#[value(name = "mixtral-instruct")]
|
||||||
MixtralInstruct,
|
MixtralInstruct,
|
||||||
|
#[value(name = "llama3-8b")]
|
||||||
|
L8b,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Which {
|
impl Which {
|
||||||
@ -82,7 +84,8 @@ impl Which {
|
|||||||
| Self::L13bCode
|
| Self::L13bCode
|
||||||
| Self::L34bCode
|
| Self::L34bCode
|
||||||
| Self::Leo7b
|
| Self::Leo7b
|
||||||
| Self::Leo13b => false,
|
| Self::Leo13b
|
||||||
|
| Self::L8b => false,
|
||||||
// Zephyr and OpenChat are fine tuned versions of mistral and should be treated in the
|
// Zephyr and OpenChat are fine tuned versions of mistral and should be treated in the
|
||||||
// same way. Starling is a fine tuned version of OpenChat.
|
// same way. Starling is a fine tuned version of OpenChat.
|
||||||
Self::OpenChat35
|
Self::OpenChat35
|
||||||
@ -116,7 +119,8 @@ impl Which {
|
|||||||
| Self::Mistral7bInstruct
|
| Self::Mistral7bInstruct
|
||||||
| Self::Mistral7bInstructV02
|
| Self::Mistral7bInstructV02
|
||||||
| Self::OpenChat35
|
| Self::OpenChat35
|
||||||
| Self::Starling7bAlpha => false,
|
| Self::Starling7bAlpha
|
||||||
|
| Self::L8b => false,
|
||||||
Self::Zephyr7bAlpha | Self::Zephyr7bBeta => true,
|
Self::Zephyr7bAlpha | Self::Zephyr7bBeta => true,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -140,7 +144,8 @@ impl Which {
|
|||||||
| Self::Mistral7bInstruct
|
| Self::Mistral7bInstruct
|
||||||
| Self::Mistral7bInstructV02
|
| Self::Mistral7bInstructV02
|
||||||
| Self::Zephyr7bAlpha
|
| Self::Zephyr7bAlpha
|
||||||
| Self::Zephyr7bBeta => false,
|
| Self::Zephyr7bBeta
|
||||||
|
| Self::L8b => false,
|
||||||
Self::OpenChat35 | Self::Starling7bAlpha => true,
|
Self::OpenChat35 | Self::Starling7bAlpha => true,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -167,6 +172,7 @@ impl Which {
|
|||||||
| Which::Zephyr7bBeta => "mistralai/Mistral-7B-v0.1",
|
| Which::Zephyr7bBeta => "mistralai/Mistral-7B-v0.1",
|
||||||
Which::OpenChat35 => "openchat/openchat_3.5",
|
Which::OpenChat35 => "openchat/openchat_3.5",
|
||||||
Which::Starling7bAlpha => "berkeley-nest/Starling-LM-7B-alpha",
|
Which::Starling7bAlpha => "berkeley-nest/Starling-LM-7B-alpha",
|
||||||
|
Self::L8b => "meta-llama/Meta-Llama-3-8B",
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -322,6 +328,11 @@ impl Args {
|
|||||||
"TheBloke/Starling-LM-7B-alpha-GGUF",
|
"TheBloke/Starling-LM-7B-alpha-GGUF",
|
||||||
"starling-lm-7b-alpha.Q4_K_M.gguf",
|
"starling-lm-7b-alpha.Q4_K_M.gguf",
|
||||||
),
|
),
|
||||||
|
// TODO: swap to TheBloke model when available
|
||||||
|
Which::L8b => (
|
||||||
|
"QuantFactory/Meta-Llama-3-8B-GGUF",
|
||||||
|
"Meta-Llama-3-8B.Q4_K_S.gguf",
|
||||||
|
),
|
||||||
};
|
};
|
||||||
let api = hf_hub::api::sync::Api::new()?;
|
let api = hf_hub::api::sync::Api::new()?;
|
||||||
let api = api.model(repo.to_string());
|
let api = api.model(repo.to_string());
|
||||||
@ -420,7 +431,8 @@ fn main() -> anyhow::Result<()> {
|
|||||||
| Which::L13bCode
|
| Which::L13bCode
|
||||||
| Which::L34bCode
|
| Which::L34bCode
|
||||||
| Which::Leo7b
|
| Which::Leo7b
|
||||||
| Which::Leo13b => 1,
|
| Which::Leo13b
|
||||||
|
| Which::L8b => 1,
|
||||||
Which::Mixtral
|
Which::Mixtral
|
||||||
| Which::MixtralInstruct
|
| Which::MixtralInstruct
|
||||||
| Which::Mistral7b
|
| Which::Mistral7b
|
||||||
@ -537,11 +549,14 @@ fn main() -> anyhow::Result<()> {
|
|||||||
std::io::stdout().flush()?;
|
std::io::stdout().flush()?;
|
||||||
}
|
}
|
||||||
|
|
||||||
let eos_token = if args.which.is_open_chat() {
|
let eos_token = match args.which {
|
||||||
"<|end_of_turn|>"
|
Which::L8b => "<|end_of_text|>",
|
||||||
} else {
|
_ => match args.which.is_open_chat() {
|
||||||
"</s>"
|
true => "<|end_of_turn|>",
|
||||||
|
false => "</s>",
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
let eos_token = *tos.tokenizer().get_vocab(true).get(eos_token).unwrap();
|
let eos_token = *tos.tokenizer().get_vocab(true).get(eos_token).unwrap();
|
||||||
let start_post_prompt = std::time::Instant::now();
|
let start_post_prompt = std::time::Instant::now();
|
||||||
let mut sampled = 0;
|
let mut sampled = 0;
|
||||||
|
Reference in New Issue
Block a user