From d72c44705c4f6de9b96eb253e05b08bce3ae4b9b Mon Sep 17 00:00:00 2001 From: laurent Date: Thu, 3 Apr 2025 12:25:41 +0200 Subject: [PATCH] Load the text tokenizer. --- candle-examples/examples/csm/main.rs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/candle-examples/examples/csm/main.rs b/candle-examples/examples/csm/main.rs index a4bafb55..1148a2e5 100644 --- a/candle-examples/examples/csm/main.rs +++ b/candle-examples/examples/csm/main.rs @@ -4,7 +4,7 @@ extern crate intel_mkl_src; #[cfg(feature = "accelerate")] extern crate accelerate_src; -use anyhow::Result; +use anyhow::{Error as E, Result}; use clap::Parser; use candle_transformers::models::csm::{Config, Model}; @@ -12,6 +12,7 @@ use candle_transformers::models::csm::{Config, Model}; use candle::DType; use candle_nn::VarBuilder; use hf_hub::{api::sync::Api, Repo, RepoType}; +use tokenizers::Tokenizer; #[derive(Clone, Debug, Copy, PartialEq, Eq, clap::ValueEnum)] enum Which { @@ -132,7 +133,15 @@ fn main() -> Result<()> { .collect::>(), None => vec![repo.get("model.safetensors")?], }; + let tokenizer_filename = match args.tokenizer { + Some(file) => std::path::PathBuf::from(file), + None => api + .model("meta-llama/Llama-3.2-1B".to_string()) + .get("tokenizer.json")?, + }; + println!("retrieved the files in {:?}", start.elapsed()); + let _tokenizer = Tokenizer::from_file(tokenizer_filename).map_err(E::msg)?; let start = std::time::Instant::now(); let config: Config = match args.config {