Fix for q5_1 quantization. (#617)

* Fix for q5_1 quantization.

* Fix some typos.
This commit is contained in:
Laurent Mazare
2023-08-27 08:31:18 +01:00
committed by GitHub
parent fa0d75b18d
commit a8b39dd7b7
2 changed files with 28 additions and 49 deletions

View File

@ -503,7 +503,7 @@ impl GgmlType for BlockQ5_1 {
let xi0 = (x0 + 0.5) as u8;
let xi1 = (x1 + 0.5) as u8;
*q = (xi0 & 0x0F) | ((xi1 & 0x0F0) << 4);
*q = (xi0 & 0x0F) | ((xi1 & 0x0F) << 4);
// get the 5-th bit and store it in qh at the right position
qh |= ((xi0 as u32 & 0x10) >> 4) << j;
qh |= ((xi1 as u32 & 0x10) >> 4) << (j + qk / 2);