Cleanup the fence.

This commit is contained in:
Nicolas Patry
2024-01-05 21:57:07 +01:00
parent c8c603ce96
commit 3aefc709c7
4 changed files with 12 additions and 94 deletions

View File

@ -84,13 +84,8 @@ pub struct MetalDevice {
command_buffer_index: Arc<RwLock<usize>>,
/// The maximum amount of [compute command encoder](https://developer.apple.com/documentation/metal/mtlcomputecommandencoder?language=objc) per [command buffer](https://developer.apple.com/documentation/metal/mtlcommandbuffer?language=objc)
compute_per_buffer: usize,
/// Every compute command encoder (and blit encoders) are defended with this Fence, forcing the
/// execution order to be linear.
/// It could be relaxed in some circumstances, by managing ourselves the dependencies in the
/// compute graph.
// fence: metal::Fence,
/// Simple keeper struct to keep track of the already compiled kernels so we can reuse them.
/// Heavily used by [`candle_metal_kernels`], both fences need to match
/// Heavily used by [`candle_metal_kernels`]
kernels: Arc<candle_metal_kernels::Kernels>,
/// Simple allocator struct.
/// The buffers are stored in size buckets since ML tends to use similar shapes over and over.
@ -131,10 +126,6 @@ impl MetalDevice {
&self.device
}
// pub(crate) fn fence(&self) -> &metal::Fence {
// &self.fence
// }
pub fn command_queue(&self) -> &CommandQueue {
&self.command_queue
}
@ -225,10 +216,8 @@ impl MetalDevice {
let command_buffer = self.command_buffer()?;
command_buffer.set_label("with_data");
let blit = command_buffer.new_blit_command_encoder();
// blit.wait_for_fence(&self.fence);
blit.set_label("with_data_blit");
blit.copy_from_buffer(&tmp, 0, &real, 0, tmp.length());
// blit.update_fence(&self.fence);
blit.end_encoding();
// This is necessary, for mmaped safetensors
@ -251,7 +240,6 @@ impl MetalDevice {
let command_buffer = self.command_buffer()?;
command_buffer.set_label("zeros");
let blit = command_buffer.new_blit_command_encoder();
// blit.wait_for_fence(&self.fence);
blit.fill_buffer(
&buffer,
metal::NSRange {
@ -260,7 +248,6 @@ impl MetalDevice {
},
0,
);
// blit.update_fence(&self.fence);
blit.end_encoding();
Ok(buffer)
}
@ -1543,9 +1530,7 @@ impl MetalStorage {
command_buffer.set_label("to_cpu");
let blit = command_buffer.new_blit_command_encoder();
blit.set_label("blit_to_cpu");
// blit.wait_for_fence(&self.device.fence);
blit.copy_from_buffer(&self.buffer, 0, &buffer, 0, self.buffer.length());
// blit.update_fence(&self.device.fence);
blit.end_encoding();
}
self.device.wait_until_completed()?;
@ -1563,7 +1548,6 @@ impl BackendDevice for MetalDevice {
command_buffer.enqueue();
let command_buffer = Arc::new(RwLock::new(command_buffer));
let command_buffer_index = Arc::new(RwLock::new(0));
// let fence = device.new_fence();
let kernels = Arc::new(Kernels::new());
let buffers = Arc::new(RwLock::new(HashMap::new()));
let compute_per_buffer = match std::env::var("CANDLE_METAL_COMPUTE_PER_BUFFER") {
@ -1572,7 +1556,6 @@ impl BackendDevice for MetalDevice {
};
Ok(Self {
device,
// fence,
command_queue,
command_buffer,
command_buffer_index,

View File

@ -32,9 +32,7 @@ impl QMetalStorage {
command_buffer.set_label("to_cpu");
let blit = command_buffer.new_blit_command_encoder();
blit.set_label("blit_to_cpu");
// blit.wait_for_fence(&self.device.fence());
blit.copy_from_buffer(&self.buffer, 0, &buffer, 0, self.buffer.length());
// blit.update_fence(&self.device.fence());
blit.end_encoding();
self.device.wait_until_completed()?;
let mut out = vec![0.0; elem_count];