Use cudarc 0.16. (#2900)

* Use cudarc 0.16.

* Allow for disabling event tracking.

* Tweaks.

* Bump the ug version.

* And bump the candle version too.
This commit is contained in:
Laurent Mazare
2025-04-15 21:40:18 +02:00
committed by GitHub
parent b01ebbad8a
commit e4e7b0b2da
7 changed files with 44 additions and 20 deletions

View File

@ -256,6 +256,12 @@ fn run_inference(args: &InferenceCmd, common_args: &Args) -> Result<()> {
let tokenizer = common_args.tokenizer()?;
let device = candle_examples::device(common_args.cpu)?;
#[cfg(feature = "cuda")]
if let candle::Device::Cuda(d) = &device {
unsafe {
d.disable_event_tracking();
}
};
let is_gguf = config_path.extension().map_or(false, |v| v == "gguf");
let is_safetensors = config_path