Remove the old MFA gemm kernels. (#2742)

* Remove the old MFA gemm kernels.

* Use bf16 in helium on metal.
This commit is contained in:
Laurent Mazare
2025-01-26 20:36:31 +01:00
committed by GitHub
parent 1a32107fab
commit 27996a1a9e
6 changed files with 41 additions and 492 deletions

View File

@ -263,11 +263,7 @@ fn main() -> Result<()> {
};
let device = candle_examples::device(args.cpu)?;
let (model, device) = {
let dtype = if device.is_cuda() {
DType::BF16
} else {
DType::F32
};
let dtype = device.bf16_default_to_f32();
let vb = unsafe { VarBuilder::from_mmaped_safetensors(&filenames, dtype, &device)? };
let model = Model::new(&config, vb)?;
(model, device)