Revert "slight changes to async ops"

This reverts commit 16b49dfd89.
This commit is contained in:
Ivar Flakstad
2024-09-02 12:34:11 +02:00
parent f9b2bb4d46
commit 8712ceb84f

View File

@ -9,34 +9,72 @@
#ifndef __METAL_SIMDGROUP_EVENT #ifndef __METAL_SIMDGROUP_EVENT
#define __METAL_SIMDGROUP_EVENT #define __METAL_SIMDGROUP_EVENT
struct _simdgroup_event_t {}; // Invoking the generation of LLVM bitcode for async copies.
//
// %struct._simdgroup_event_t = type opaque
//
struct _simdgroup_event_t;
thread _simdgroup_event_t* __metal_simdgroup_async_copy_1d( // Invoking the generation of LLVM bitcode for async copies.
ulong, ulong, thread _simdgroup_event_t*
threadgroup void*, const device void*, ulong) __metal_simdgroup_async_copy_1d(
__asm("air.simdgroup_async_copy_1d.p3i8.p1i8"); ulong, ulong, threadgroup void *, const device void *, ulong)
__asm("air.simdgroup_async_copy_1d.p3i8.p1i8");
thread _simdgroup_event_t* __metal_simdgroup_async_copy_1d( // Invoking the generation of LLVM bitcode for async copies.
ulong, ulong, thread _simdgroup_event_t*
device void*, const threadgroup void*, ulong) __metal_simdgroup_async_copy_1d(
__asm("air.simdgroup_async_copy_1d.p1i8.p3i8"); ulong, ulong, device void *, const threadgroup void *, ulong)
__asm("air.simdgroup_async_copy_1d.p1i8.p3i8");
thread _simdgroup_event_t* __metal_simdgroup_async_copy_2d( // Invoking the generation of LLVM bitcode for async copies.
ulong, ulong, //
threadgroup void*, ulong, ulong, ulong2, // ; Function Attrs: argmemonly convergent nounwind
const device void*, ulong, ulong, ulong2, // declare %struct._simdgroup_event_t*
long2, int) // @air.simdgroup_async_copy_2d.p3i8.p1i8(
__asm("air.simdgroup_async_copy_2d.p3i8.p1i8"); // i64, i64,
// i8 addrspace(3)* nocapture writeonly, i64, i64, <2 x i64>,
// i8 addrspace(1)* nocapture readonly, i64, i64, <2 x i64>,
// <2 x i64>, i32)
// local_unnamed_addr #4
//
thread _simdgroup_event_t*
__metal_simdgroup_async_copy_2d(
ulong, ulong,
threadgroup void *, ulong, ulong, ulong2,
const device void *, ulong, ulong, ulong2,
long2, int)
__asm("air.simdgroup_async_copy_2d.p3i8.p1i8");
thread _simdgroup_event_t* __metal_simdgroup_async_copy_2d( // Invoking the generation of LLVM bitcode for async copies.
ulong, ulong, //
device void*, ulong, ulong, ulong2, // ; Function Attrs: argmemonly convergent nounwind
const threadgroup void*, ulong, ulong, ulong2, // declare %struct._simdgroup_event_t*
long2, int) // @air.simdgroup_async_copy_2d.p1i8.p3i8(
__asm("air.simdgroup_async_copy_2d.p1i8.p3i8"); // i64, i64,
// i8 addrspace(1)* nocapture writeonly, i64, i64, <2 x i64>,
// i8 addrspace(3)* nocapture readonly, i64, i64, <2 x i64>,
// <2 x i64>, i32)
// local_unnamed_addr #4
//
thread _simdgroup_event_t*
__metal_simdgroup_async_copy_2d(
ulong, ulong,
device void *, ulong, ulong, ulong2,
const threadgroup void *, ulong, ulong, ulong2,
long2, int)
__asm("air.simdgroup_async_copy_2d.p1i8.p3i8");
void __metal_wait_simdgroup_events(int, const thread _simdgroup_event_t**) // Invoking the generation of LLVM bitcode for async copies.
__asm("air.wait_simdgroup_events"); //
// ; Function Attrs: convergent nounwind
// declare void
// @air.wait_simdgroup_events(i32, %struct._simdgroup_event_t** nocapture)
// local_unnamed_addr #3
//
void __metal_wait_simdgroup_events(
int, thread _simdgroup_event_t**)
__asm("air.wait_simdgroup_events");
#pragma METAL internals : enable #pragma METAL internals : enable
namespace metal namespace metal
@ -55,14 +93,14 @@ namespace metal
const device T *src, const device T *src,
ulong n_elements ulong n_elements
) thread { ) thread {
event = *__metal_simdgroup_async_copy_1d( event = __metal_simdgroup_async_copy_1d(
// Description of the data type. // Description of the data type.
sizeof(T), sizeof(T),
alignof(T), alignof(T),
// Description of the arguments. // Description of the arguments.
reinterpret_cast<threadgroup void*>(dst), reinterpret_cast<threadgroup void *>(dst),
reinterpret_cast<const device void*>(src), reinterpret_cast<const device void *>(src),
n_elements); n_elements);
} }
@ -72,7 +110,7 @@ namespace metal
const threadgroup T *src, const threadgroup T *src,
ulong n_elements ulong n_elements
) thread { ) thread {
event = *__metal_simdgroup_async_copy_1d( event = __metal_simdgroup_async_copy_1d(
// Description of the data type. // Description of the data type.
sizeof(T), sizeof(T),
alignof(T), alignof(T),
@ -104,7 +142,7 @@ namespace metal
src_tile_dimensions = src_tile_dimensions.yx; src_tile_dimensions = src_tile_dimensions.yx;
dst_tile_dimensions = dst_tile_dimensions.yx; dst_tile_dimensions = dst_tile_dimensions.yx;
} }
event = *__metal_simdgroup_async_copy_2d( event = __metal_simdgroup_async_copy_2d(
// Description of the data type. // Description of the data type.
sizeof(T), sizeof(T),
alignof(T), alignof(T),
@ -145,7 +183,7 @@ namespace metal
src_tile_dimensions = src_tile_dimensions.yx; src_tile_dimensions = src_tile_dimensions.yx;
dst_tile_dimensions = dst_tile_dimensions.yx; dst_tile_dimensions = dst_tile_dimensions.yx;
} }
event = *__metal_simdgroup_async_copy_2d( event = __metal_simdgroup_async_copy_2d(
// Description of the data type. // Description of the data type.
sizeof(T), sizeof(T),
alignof(T), alignof(T),
@ -168,11 +206,16 @@ namespace metal
} }
METAL_FUNC static void wait(int count, thread simdgroup_event *events) { METAL_FUNC static void wait(int count, thread simdgroup_event *events) {
__metal_wait_simdgroup_events(count, reinterpret_cast<const thread _simdgroup_event_t**>(events)); __metal_wait_simdgroup_events(
count, reinterpret_cast<thread _simdgroup_event_t**>(events));
} }
private: private:
thread _simdgroup_event_t event; // Invoking the generation of LLVM bitcode for async copies.
//
// %"struct.metal::simdgroup_event" = type { %struct._simdgroup_event_t* }
//
thread _simdgroup_event_t* event;
}; };
} // namespace metal } // namespace metal
#pragma METAL internals : disable #pragma METAL internals : disable