Revert "slight changes to async ops"

This reverts commit 16b49dfd89.
This commit is contained in:
Ivar Flakstad
2024-09-02 12:34:11 +02:00
parent f9b2bb4d46
commit 8712ceb84f

View File

@ -9,33 +9,71 @@
#ifndef __METAL_SIMDGROUP_EVENT
#define __METAL_SIMDGROUP_EVENT
struct _simdgroup_event_t {};
// Invoking the generation of LLVM bitcode for async copies.
//
// %struct._simdgroup_event_t = type opaque
//
struct _simdgroup_event_t;
thread _simdgroup_event_t* __metal_simdgroup_async_copy_1d(
ulong, ulong,
threadgroup void*, const device void*, ulong)
// Invoking the generation of LLVM bitcode for async copies.
thread _simdgroup_event_t*
__metal_simdgroup_async_copy_1d(
ulong, ulong, threadgroup void *, const device void *, ulong)
__asm("air.simdgroup_async_copy_1d.p3i8.p1i8");
thread _simdgroup_event_t* __metal_simdgroup_async_copy_1d(
ulong, ulong,
device void*, const threadgroup void*, ulong)
// Invoking the generation of LLVM bitcode for async copies.
thread _simdgroup_event_t*
__metal_simdgroup_async_copy_1d(
ulong, ulong, device void *, const threadgroup void *, ulong)
__asm("air.simdgroup_async_copy_1d.p1i8.p3i8");
thread _simdgroup_event_t* __metal_simdgroup_async_copy_2d(
// Invoking the generation of LLVM bitcode for async copies.
//
// ; Function Attrs: argmemonly convergent nounwind
// declare %struct._simdgroup_event_t*
// @air.simdgroup_async_copy_2d.p3i8.p1i8(
// i64, i64,
// i8 addrspace(3)* nocapture writeonly, i64, i64, <2 x i64>,
// i8 addrspace(1)* nocapture readonly, i64, i64, <2 x i64>,
// <2 x i64>, i32)
// local_unnamed_addr #4
//
thread _simdgroup_event_t*
__metal_simdgroup_async_copy_2d(
ulong, ulong,
threadgroup void*, ulong, ulong, ulong2,
const device void*, ulong, ulong, ulong2,
threadgroup void *, ulong, ulong, ulong2,
const device void *, ulong, ulong, ulong2,
long2, int)
__asm("air.simdgroup_async_copy_2d.p3i8.p1i8");
thread _simdgroup_event_t* __metal_simdgroup_async_copy_2d(
// Invoking the generation of LLVM bitcode for async copies.
//
// ; Function Attrs: argmemonly convergent nounwind
// declare %struct._simdgroup_event_t*
// @air.simdgroup_async_copy_2d.p1i8.p3i8(
// i64, i64,
// i8 addrspace(1)* nocapture writeonly, i64, i64, <2 x i64>,
// i8 addrspace(3)* nocapture readonly, i64, i64, <2 x i64>,
// <2 x i64>, i32)
// local_unnamed_addr #4
//
thread _simdgroup_event_t*
__metal_simdgroup_async_copy_2d(
ulong, ulong,
device void*, ulong, ulong, ulong2,
const threadgroup void*, ulong, ulong, ulong2,
device void *, ulong, ulong, ulong2,
const threadgroup void *, ulong, ulong, ulong2,
long2, int)
__asm("air.simdgroup_async_copy_2d.p1i8.p3i8");
void __metal_wait_simdgroup_events(int, const thread _simdgroup_event_t**)
// Invoking the generation of LLVM bitcode for async copies.
//
// ; Function Attrs: convergent nounwind
// declare void
// @air.wait_simdgroup_events(i32, %struct._simdgroup_event_t** nocapture)
// local_unnamed_addr #3
//
void __metal_wait_simdgroup_events(
int, thread _simdgroup_event_t**)
__asm("air.wait_simdgroup_events");
#pragma METAL internals : enable
@ -55,14 +93,14 @@ namespace metal
const device T *src,
ulong n_elements
) thread {
event = *__metal_simdgroup_async_copy_1d(
event = __metal_simdgroup_async_copy_1d(
// Description of the data type.
sizeof(T),
alignof(T),
// Description of the arguments.
reinterpret_cast<threadgroup void*>(dst),
reinterpret_cast<const device void*>(src),
reinterpret_cast<threadgroup void *>(dst),
reinterpret_cast<const device void *>(src),
n_elements);
}
@ -72,7 +110,7 @@ namespace metal
const threadgroup T *src,
ulong n_elements
) thread {
event = *__metal_simdgroup_async_copy_1d(
event = __metal_simdgroup_async_copy_1d(
// Description of the data type.
sizeof(T),
alignof(T),
@ -104,7 +142,7 @@ namespace metal
src_tile_dimensions = src_tile_dimensions.yx;
dst_tile_dimensions = dst_tile_dimensions.yx;
}
event = *__metal_simdgroup_async_copy_2d(
event = __metal_simdgroup_async_copy_2d(
// Description of the data type.
sizeof(T),
alignof(T),
@ -145,7 +183,7 @@ namespace metal
src_tile_dimensions = src_tile_dimensions.yx;
dst_tile_dimensions = dst_tile_dimensions.yx;
}
event = *__metal_simdgroup_async_copy_2d(
event = __metal_simdgroup_async_copy_2d(
// Description of the data type.
sizeof(T),
alignof(T),
@ -168,11 +206,16 @@ namespace metal
}
METAL_FUNC static void wait(int count, thread simdgroup_event *events) {
__metal_wait_simdgroup_events(count, reinterpret_cast<const thread _simdgroup_event_t**>(events));
__metal_wait_simdgroup_events(
count, reinterpret_cast<thread _simdgroup_event_t**>(events));
}
private:
thread _simdgroup_event_t event;
// Invoking the generation of LLVM bitcode for async copies.
//
// %"struct.metal::simdgroup_event" = type { %struct._simdgroup_event_t* }
//
thread _simdgroup_event_t* event;
};
} // namespace metal
#pragma METAL internals : disable