mirror of
https://github.com/huggingface/candle.git
synced 2025-06-16 02:38:10 +00:00
Improve metal buffer usage (#1807)
* Improve metal buffer usage * Clone cpu storage when loading to reduce wait_until_complete calls * Use powers of two for buffer sizes so reuse is more likely. * Select best available buffer by size. * Add count to MetalStorage -> can use buffer with different size Co-authored-by: Chris Fleetwood <christopher.fleetwood@huggingface.co> * Simplify new buffer creation without blit copy. Revert &[] -> Vec * Add documentation on newBufferWithBytes safety / synchronization * Drop unused buffers after command buffer is done syncing. --------- Co-authored-by: Chris Fleetwood <christopher.fleetwood@huggingface.co>
This commit is contained in:
@ -238,7 +238,8 @@ impl candle::CustomOp1 for SoftmaxLastDim {
|
||||
&output,
|
||||
)
|
||||
.unwrap();
|
||||
let newstorage = candle::MetalStorage::new(output, device.clone(), storage.dtype());
|
||||
let newstorage =
|
||||
candle::MetalStorage::new(output, device.clone(), elem_count, storage.dtype());
|
||||
Ok((newstorage, layout.shape().clone()))
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user