Improve metal buffer usage (#1807)

* Improve metal buffer usage

* Clone cpu storage when loading to reduce wait_until_complete calls
* Use powers of two for buffer sizes so reuse is more likely.
* Select best available buffer by size.
* Add count to MetalStorage -> can use buffer with different size

Co-authored-by: Chris Fleetwood <christopher.fleetwood@huggingface.co>

* Simplify new buffer creation without blit copy. Revert &[] -> Vec

* Add documentation on newBufferWithBytes safety / synchronization

* Drop unused buffers after command buffer is done syncing.

---------

Co-authored-by: Chris Fleetwood <christopher.fleetwood@huggingface.co>
This commit is contained in:
ivarflakstad
2024-03-07 09:42:34 +01:00
committed by GitHub
parent 8a99cf7dd2
commit 0c09d10f32
3 changed files with 139 additions and 87 deletions

View File

@ -106,7 +106,12 @@ impl QMetalStorage {
}
let buffer = self.device.new_buffer_with_data(&out)?;
Ok(MetalStorage::new(buffer, self.device.clone(), DType::F32))
Ok(MetalStorage::new(
buffer,
self.device.clone(),
elem_count,
DType::F32,
))
}
pub fn quantize(&mut self, src: &MetalStorage) -> Result<()> {
@ -170,7 +175,7 @@ impl QMetalStorage {
&dst,
)
.map_err(MetalError::from)?;
let dst_storage = crate::MetalStorage::new(dst, device, DType::F32);
let dst_storage = crate::MetalStorage::new(dst, device, dst_shape.elem_count(), DType::F32);
Ok((dst_storage, dst_shape))
}
}