From e1f9c3776d4e3b77cc2958c835314e680ac6f54f Mon Sep 17 00:00:00 2001 From: Jani Monoses Date: Thu, 14 Mar 2024 22:01:36 +0200 Subject: [PATCH] StableLM-2 models were updated to use GPT-2 tokenization. (#1847) --- candle-examples/examples/stable-lm/README.md | 5 ----- candle-examples/examples/stable-lm/main.rs | 9 +-------- 2 files changed, 1 insertion(+), 13 deletions(-) diff --git a/candle-examples/examples/stable-lm/README.md b/candle-examples/examples/stable-lm/README.md index 546124a2..6f5e7597 100644 --- a/candle-examples/examples/stable-lm/README.md +++ b/candle-examples/examples/stable-lm/README.md @@ -10,11 +10,6 @@ order to be able to use it. Other available models are Stable-Code-3B, StableLM-2 and Zephyr variants. -StableLM-2 uses a Tiktoken based GPT-3.5/GPT-4 tokenizer not supported by -Candle, so to run it you can download a somewhat compatible -[tokenizer.json](https://huggingface.co/Xenova/gpt-4/resolve/main/tokenizer.json?download=true) -and pass it via the --tokenizer-file argument. - ## Running some example ```bash diff --git a/candle-examples/examples/stable-lm/main.rs b/candle-examples/examples/stable-lm/main.rs index abe7020c..f467903a 100644 --- a/candle-examples/examples/stable-lm/main.rs +++ b/candle-examples/examples/stable-lm/main.rs @@ -239,14 +239,7 @@ fn main() -> Result<()> { )); let tokenizer_filename = match args.tokenizer_file { Some(file) => std::path::PathBuf::from(file), - None => match args.which { - Which::V1Orig | Which::V1 | Which::V1Zephyr | Which::Code => { - repo.get("tokenizer.json")? - } - Which::V2 | Which::V2Zephyr => api - .model("lmz/candle-stablelm".to_string()) - .get("tokenizer-gpt4.json")?, - }, + None => repo.get("tokenizer.json")?, }; let filenames = match args.weight_files { Some(files) => files