mirror of
https://github.com/huggingface/candle.git
synced 2025-06-16 02:38:10 +00:00
Attempt at fixing M1/M2 metal async copy bug
This commit is contained in:
@ -607,10 +607,12 @@ METAL_FUNC void async_access_accumulator(threadgroup T *C_block, device T *C,
|
|||||||
min(uint(M_group), M - C_offset.y));
|
min(uint(M_group), M - C_offset.y));
|
||||||
auto C_src = simdgroup_matrix_storage<T>::apply_offset(C, N, C_offset);
|
auto C_src = simdgroup_matrix_storage<T>::apply_offset(C, N, C_offset);
|
||||||
|
|
||||||
simdgroup_event event;
|
|
||||||
if (is_store) {
|
if (is_store) {
|
||||||
|
simdgroup_event event;
|
||||||
event.async_copy(C_src, N, C_tile, C_block, N_group, C_tile);
|
event.async_copy(C_src, N, C_tile, C_block, N_group, C_tile);
|
||||||
|
simdgroup_event::wait(1, &event);
|
||||||
} else {
|
} else {
|
||||||
|
simdgroup_event event;
|
||||||
event.async_copy(C_block, N_group, C_tile, C_src, N, C_tile);
|
event.async_copy(C_block, N_group, C_tile, C_src, N, C_tile);
|
||||||
simdgroup_event::wait(1, &event);
|
simdgroup_event::wait(1, &event);
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user