From e1f9c3776d4e3b77cc2958c835314e680ac6f54f Mon Sep 17 00:00:00 2001
From: Jani Monoses <jani.monoses@gmail.com>
Date: Thu, 14 Mar 2024 22:01:36 +0200
Subject: [PATCH] StableLM-2 models were updated to use GPT-2 tokenization.
 (#1847)

---
 candle-examples/examples/stable-lm/README.md | 5 -----
 candle-examples/examples/stable-lm/main.rs   | 9 +--------
 2 files changed, 1 insertion(+), 13 deletions(-)

diff --git a/candle-examples/examples/stable-lm/README.md b/candle-examples/examples/stable-lm/README.md
index 546124a2..6f5e7597 100644
--- a/candle-examples/examples/stable-lm/README.md
+++ b/candle-examples/examples/stable-lm/README.md
@@ -10,11 +10,6 @@ order to be able to use it.
 
 Other available models are Stable-Code-3B, StableLM-2 and Zephyr variants.
 
-StableLM-2 uses a Tiktoken based GPT-3.5/GPT-4 tokenizer not supported by
-Candle, so to run it you can download a somewhat compatible
-[tokenizer.json](https://huggingface.co/Xenova/gpt-4/resolve/main/tokenizer.json?download=true)
-and pass it via the --tokenizer-file argument.
-
 ## Running some example
 
 ```bash
diff --git a/candle-examples/examples/stable-lm/main.rs b/candle-examples/examples/stable-lm/main.rs
index abe7020c..f467903a 100644
--- a/candle-examples/examples/stable-lm/main.rs
+++ b/candle-examples/examples/stable-lm/main.rs
@@ -239,14 +239,7 @@ fn main() -> Result<()> {
     ));
     let tokenizer_filename = match args.tokenizer_file {
         Some(file) => std::path::PathBuf::from(file),
-        None => match args.which {
-            Which::V1Orig | Which::V1 | Which::V1Zephyr | Which::Code => {
-                repo.get("tokenizer.json")?
-            }
-            Which::V2 | Which::V2Zephyr => api
-                .model("lmz/candle-stablelm".to_string())
-                .get("tokenizer-gpt4.json")?,
-        },
+        None => repo.get("tokenizer.json")?,
     };
     let filenames = match args.weight_files {
         Some(files) => files