mirror of
https://github.com/huggingface/candle.git
synced 2025-06-16 10:38:54 +00:00
@ -9,33 +9,71 @@
|
||||
#ifndef __METAL_SIMDGROUP_EVENT
|
||||
#define __METAL_SIMDGROUP_EVENT
|
||||
|
||||
struct _simdgroup_event_t {};
|
||||
// Invoking the generation of LLVM bitcode for async copies.
|
||||
//
|
||||
// %struct._simdgroup_event_t = type opaque
|
||||
//
|
||||
struct _simdgroup_event_t;
|
||||
|
||||
thread _simdgroup_event_t* __metal_simdgroup_async_copy_1d(
|
||||
ulong, ulong,
|
||||
threadgroup void*, const device void*, ulong)
|
||||
// Invoking the generation of LLVM bitcode for async copies.
|
||||
thread _simdgroup_event_t*
|
||||
__metal_simdgroup_async_copy_1d(
|
||||
ulong, ulong, threadgroup void *, const device void *, ulong)
|
||||
__asm("air.simdgroup_async_copy_1d.p3i8.p1i8");
|
||||
|
||||
thread _simdgroup_event_t* __metal_simdgroup_async_copy_1d(
|
||||
ulong, ulong,
|
||||
device void*, const threadgroup void*, ulong)
|
||||
// Invoking the generation of LLVM bitcode for async copies.
|
||||
thread _simdgroup_event_t*
|
||||
__metal_simdgroup_async_copy_1d(
|
||||
ulong, ulong, device void *, const threadgroup void *, ulong)
|
||||
__asm("air.simdgroup_async_copy_1d.p1i8.p3i8");
|
||||
|
||||
thread _simdgroup_event_t* __metal_simdgroup_async_copy_2d(
|
||||
// Invoking the generation of LLVM bitcode for async copies.
|
||||
//
|
||||
// ; Function Attrs: argmemonly convergent nounwind
|
||||
// declare %struct._simdgroup_event_t*
|
||||
// @air.simdgroup_async_copy_2d.p3i8.p1i8(
|
||||
// i64, i64,
|
||||
// i8 addrspace(3)* nocapture writeonly, i64, i64, <2 x i64>,
|
||||
// i8 addrspace(1)* nocapture readonly, i64, i64, <2 x i64>,
|
||||
// <2 x i64>, i32)
|
||||
// local_unnamed_addr #4
|
||||
//
|
||||
thread _simdgroup_event_t*
|
||||
__metal_simdgroup_async_copy_2d(
|
||||
ulong, ulong,
|
||||
threadgroup void*, ulong, ulong, ulong2,
|
||||
const device void*, ulong, ulong, ulong2,
|
||||
threadgroup void *, ulong, ulong, ulong2,
|
||||
const device void *, ulong, ulong, ulong2,
|
||||
long2, int)
|
||||
__asm("air.simdgroup_async_copy_2d.p3i8.p1i8");
|
||||
|
||||
thread _simdgroup_event_t* __metal_simdgroup_async_copy_2d(
|
||||
// Invoking the generation of LLVM bitcode for async copies.
|
||||
//
|
||||
// ; Function Attrs: argmemonly convergent nounwind
|
||||
// declare %struct._simdgroup_event_t*
|
||||
// @air.simdgroup_async_copy_2d.p1i8.p3i8(
|
||||
// i64, i64,
|
||||
// i8 addrspace(1)* nocapture writeonly, i64, i64, <2 x i64>,
|
||||
// i8 addrspace(3)* nocapture readonly, i64, i64, <2 x i64>,
|
||||
// <2 x i64>, i32)
|
||||
// local_unnamed_addr #4
|
||||
//
|
||||
thread _simdgroup_event_t*
|
||||
__metal_simdgroup_async_copy_2d(
|
||||
ulong, ulong,
|
||||
device void*, ulong, ulong, ulong2,
|
||||
const threadgroup void*, ulong, ulong, ulong2,
|
||||
device void *, ulong, ulong, ulong2,
|
||||
const threadgroup void *, ulong, ulong, ulong2,
|
||||
long2, int)
|
||||
__asm("air.simdgroup_async_copy_2d.p1i8.p3i8");
|
||||
|
||||
void __metal_wait_simdgroup_events(int, const thread _simdgroup_event_t**)
|
||||
// Invoking the generation of LLVM bitcode for async copies.
|
||||
//
|
||||
// ; Function Attrs: convergent nounwind
|
||||
// declare void
|
||||
// @air.wait_simdgroup_events(i32, %struct._simdgroup_event_t** nocapture)
|
||||
// local_unnamed_addr #3
|
||||
//
|
||||
void __metal_wait_simdgroup_events(
|
||||
int, thread _simdgroup_event_t**)
|
||||
__asm("air.wait_simdgroup_events");
|
||||
|
||||
#pragma METAL internals : enable
|
||||
@ -55,14 +93,14 @@ namespace metal
|
||||
const device T *src,
|
||||
ulong n_elements
|
||||
) thread {
|
||||
event = *__metal_simdgroup_async_copy_1d(
|
||||
event = __metal_simdgroup_async_copy_1d(
|
||||
// Description of the data type.
|
||||
sizeof(T),
|
||||
alignof(T),
|
||||
|
||||
// Description of the arguments.
|
||||
reinterpret_cast<threadgroup void*>(dst),
|
||||
reinterpret_cast<const device void*>(src),
|
||||
reinterpret_cast<threadgroup void *>(dst),
|
||||
reinterpret_cast<const device void *>(src),
|
||||
n_elements);
|
||||
}
|
||||
|
||||
@ -72,7 +110,7 @@ namespace metal
|
||||
const threadgroup T *src,
|
||||
ulong n_elements
|
||||
) thread {
|
||||
event = *__metal_simdgroup_async_copy_1d(
|
||||
event = __metal_simdgroup_async_copy_1d(
|
||||
// Description of the data type.
|
||||
sizeof(T),
|
||||
alignof(T),
|
||||
@ -104,7 +142,7 @@ namespace metal
|
||||
src_tile_dimensions = src_tile_dimensions.yx;
|
||||
dst_tile_dimensions = dst_tile_dimensions.yx;
|
||||
}
|
||||
event = *__metal_simdgroup_async_copy_2d(
|
||||
event = __metal_simdgroup_async_copy_2d(
|
||||
// Description of the data type.
|
||||
sizeof(T),
|
||||
alignof(T),
|
||||
@ -145,7 +183,7 @@ namespace metal
|
||||
src_tile_dimensions = src_tile_dimensions.yx;
|
||||
dst_tile_dimensions = dst_tile_dimensions.yx;
|
||||
}
|
||||
event = *__metal_simdgroup_async_copy_2d(
|
||||
event = __metal_simdgroup_async_copy_2d(
|
||||
// Description of the data type.
|
||||
sizeof(T),
|
||||
alignof(T),
|
||||
@ -168,11 +206,16 @@ namespace metal
|
||||
}
|
||||
|
||||
METAL_FUNC static void wait(int count, thread simdgroup_event *events) {
|
||||
__metal_wait_simdgroup_events(count, reinterpret_cast<const thread _simdgroup_event_t**>(events));
|
||||
__metal_wait_simdgroup_events(
|
||||
count, reinterpret_cast<thread _simdgroup_event_t**>(events));
|
||||
}
|
||||
|
||||
private:
|
||||
thread _simdgroup_event_t event;
|
||||
// Invoking the generation of LLVM bitcode for async copies.
|
||||
//
|
||||
// %"struct.metal::simdgroup_event" = type { %struct._simdgroup_event_t* }
|
||||
//
|
||||
thread _simdgroup_event_t* event;
|
||||
};
|
||||
} // namespace metal
|
||||
#pragma METAL internals : disable
|
||||
|
Reference in New Issue
Block a user