mirror of
https://github.com/huggingface/candle.git
synced 2025-06-15 18:28:24 +00:00
StableLM-2 models were updated to use GPT-2 tokenization. (#1847)
This commit is contained in:
@ -10,11 +10,6 @@ order to be able to use it.
|
|||||||
|
|
||||||
Other available models are Stable-Code-3B, StableLM-2 and Zephyr variants.
|
Other available models are Stable-Code-3B, StableLM-2 and Zephyr variants.
|
||||||
|
|
||||||
StableLM-2 uses a Tiktoken based GPT-3.5/GPT-4 tokenizer not supported by
|
|
||||||
Candle, so to run it you can download a somewhat compatible
|
|
||||||
[tokenizer.json](https://huggingface.co/Xenova/gpt-4/resolve/main/tokenizer.json?download=true)
|
|
||||||
and pass it via the --tokenizer-file argument.
|
|
||||||
|
|
||||||
## Running some example
|
## Running some example
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
@ -239,14 +239,7 @@ fn main() -> Result<()> {
|
|||||||
));
|
));
|
||||||
let tokenizer_filename = match args.tokenizer_file {
|
let tokenizer_filename = match args.tokenizer_file {
|
||||||
Some(file) => std::path::PathBuf::from(file),
|
Some(file) => std::path::PathBuf::from(file),
|
||||||
None => match args.which {
|
None => repo.get("tokenizer.json")?,
|
||||||
Which::V1Orig | Which::V1 | Which::V1Zephyr | Which::Code => {
|
|
||||||
repo.get("tokenizer.json")?
|
|
||||||
}
|
|
||||||
Which::V2 | Which::V2Zephyr => api
|
|
||||||
.model("lmz/candle-stablelm".to_string())
|
|
||||||
.get("tokenizer-gpt4.json")?,
|
|
||||||
},
|
|
||||||
};
|
};
|
||||||
let filenames = match args.weight_files {
|
let filenames = match args.weight_files {
|
||||||
Some(files) => files
|
Some(files) => files
|
||||||
|
Reference in New Issue
Block a user