Remove the old MFA gemm kernels. (#2742)

* Remove the old MFA gemm kernels.

* Use bf16 in helium on metal.
This commit is contained in:
Laurent Mazare
2025-01-26 20:36:31 +01:00
committed by GitHub
parent 1a32107fab
commit 27996a1a9e
6 changed files with 41 additions and 492 deletions

View File

@ -121,8 +121,6 @@ pub struct MetalDevice {
pub(crate) kernels: Arc<Kernels>,
/// Seed for random number generation.
pub(crate) seed: Arc<Mutex<Buffer>>,
/// Whether to use the MLX matmul kernels instead of the MFA ones.
pub(crate) use_mlx_mm: bool,
}
impl std::fmt::Debug for MetalDevice {
@ -140,10 +138,6 @@ impl std::ops::Deref for MetalDevice {
}
impl MetalDevice {
pub fn set_use_mlx_mm(&mut self, use_mlx_mm: bool) {
self.use_mlx_mm = use_mlx_mm
}
pub fn compile(
&self,
func_name: &'static str,

View File

@ -1469,7 +1469,7 @@ impl BackendStorage for MetalStorage {
&buffer,
)
.map_err(MetalError::from)?;
} else if self.device.use_mlx_mm {
} else {
let dtype = match self.dtype {
DType::F32 => candle_metal_kernels::GemmDType::F32,
DType::F16 => candle_metal_kernels::GemmDType::F16,
@ -1496,32 +1496,6 @@ impl BackendStorage for MetalStorage {
&buffer,
)
.map_err(MetalError::from)?;
} else {
let name = match self.dtype {
DType::F32 => "sgemm",
DType::F16 => "hgemm",
dtype => {
return Err(
MetalError::Message(format!("matmul doesn't support {dtype:?}")).into(),
)
}
};
candle_metal_kernels::call_gemm(
&self.device.device,
&command_buffer,
&self.device.kernels,
name,
(b, m, n, k),
lhs_l.stride(),
lhs_l.start_offset() * self.dtype.size_in_bytes(),
&self.buffer,
rhs_l.stride(),
rhs_l.start_offset() * rhs.dtype.size_in_bytes(),
&rhs.buffer,
&buffer,
)
.map_err(MetalError::from)?;
}
Ok(Self::new(
buffer,
@ -1884,10 +1858,6 @@ impl BackendDevice for MetalDevice {
let device = metal::Device::all().swap_remove(ordinal);
let command_queue = device.new_command_queue();
let kernels = Arc::new(Kernels::new());
let use_mlx_mm = match std::env::var("CANDLE_USE_MFA_MM").as_deref() {
Ok("false") | Ok("False") | Ok("FALSE") | Ok("0") | Err(_) => true,
Ok(_) => false,
};
let seed = Arc::new(Mutex::new(device.new_buffer_with_data(
[299792458].as_ptr() as *const c_void,
4,
@ -1901,7 +1871,6 @@ impl BackendDevice for MetalDevice {
buffers: Arc::new(RwLock::new(HashMap::new())),
kernels,
seed,
use_mlx_mm,
})
}