diff --git a/candle-examples/examples/ggml/main.rs b/candle-examples/examples/ggml/main.rs index 68e2267c..7d6ec2ca 100644 --- a/candle-examples/examples/ggml/main.rs +++ b/candle-examples/examples/ggml/main.rs @@ -248,7 +248,7 @@ impl ModelWeights { struct Args { /// GGML file to load, typically a .bin file generated by the quantize command from llama.cpp #[arg(long)] - model: String, + model: Option, /// The initial prompt. #[arg(long)] @@ -283,12 +283,24 @@ impl Args { }; Tokenizer::from_file(tokenizer_path).map_err(anyhow::Error::msg) } + + fn model(&self) -> anyhow::Result { + let model_path = match &self.model { + Some(config) => std::path::PathBuf::from(config), + None => { + let api = hf_hub::api::sync::Api::new()?; + let api = api.model("TheBloke/Llama-2-7B-GGML".to_string()); + api.get("llama-2-7b.ggmlv3.q4_0.bin")? + } + }; + Ok(model_path) + } } fn main() -> anyhow::Result<()> { let args = Args::parse(); - let mut file = std::fs::File::open(&args.model)?; + let mut file = std::fs::File::open(&args.model()?)?; let start = std::time::Instant::now(); let model = Content::read(&mut file)?;