Add a cuda kernel for dequantizing q8_0. (#1804)

This commit is contained in:
Laurent Mazare
2024-03-05 09:50:37 +01:00
committed by GitHub
parent 8cc0a183ba
commit bd9ab9bc04
2 changed files with 24 additions and 4 deletions

View File

@ -877,6 +877,30 @@ extern "C" __global__ void dequantize_block_q6_K(const void * __restrict__ vx, f
#endif
}
extern "C" __global__ void dequantize_block_q8_0(const void * __restrict__ vx, float * __restrict__ yy, int nb32) {
const int i = blockIdx.x;
// assume 32 threads
const int tid = threadIdx.x;
const int il = tid/8;
const int ir = tid%8;
const int ib = 8*i + ir;
if (ib >= nb32) {
return;
}
float * y = yy + 256*i + 32*ir + 8*il;
const block_q8_0 * x = (const block_q8_0 *)vx + ib;
const float d = __half2float(x->d);
const int8_t * q = x->qs + 8*il;
for (int l = 0; l < 8; ++l) {
y[l] = d * q[l];
}
}
extern "C" __global__ void dequantize_block_q8_K(const void * __restrict__ vx, float * __restrict__ yy) {
const block_q8_K * x = (const block_q8_K *) vx;