mirror of
https://github.com/huggingface/candle.git
synced 2025-06-19 19:58:35 +00:00
Bugfix for dequantizing q5k layers. (#1569)
This commit is contained in:
@ -1545,13 +1545,13 @@ impl GgmlType for BlockQ5K {
|
|||||||
let d2 = d * sc as f32;
|
let d2 = d * sc as f32;
|
||||||
let m2 = min * m as f32;
|
let m2 = min * m as f32;
|
||||||
for (ql, qh) in ql.iter().zip(qh) {
|
for (ql, qh) in ql.iter().zip(qh) {
|
||||||
let to_add = if qh & u1 != 0 { 16 } else { 1 };
|
let to_add = if qh & u1 != 0 { 16f32 } else { 0f32 };
|
||||||
y[ys_index] = d1 * ((ql & 0xF) + to_add) as f32 - m1;
|
y[ys_index] = d1 * ((ql & 0xF) as f32 + to_add) - m1;
|
||||||
ys_index += 1;
|
ys_index += 1;
|
||||||
}
|
}
|
||||||
for (ql, qh) in ql.iter().zip(qh) {
|
for (ql, qh) in ql.iter().zip(qh) {
|
||||||
let to_add = if qh & u2 != 0 { 16 } else { 1 };
|
let to_add = if qh & u2 != 0 { 16f32 } else { 0f32 };
|
||||||
y[ys_index] = d2 * ((ql >> 4) + to_add) as f32 - m2;
|
y[ys_index] = d2 * ((ql >> 4) as f32 + to_add) - m2;
|
||||||
ys_index += 1;
|
ys_index += 1;
|
||||||
}
|
}
|
||||||
is += 2;
|
is += 2;
|
||||||
|
@ -407,7 +407,7 @@ fn quantize_q5k() -> Result<()> {
|
|||||||
let dst = round_vector(&dst);
|
let dst = round_vector(&dst);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
[dst[0], dst[128], dst[256], dst[512], dst[800], dst[1023]],
|
[dst[0], dst[128], dst[256], dst[512], dst[800], dst[1023]],
|
||||||
[-0.499, -0.372, -0.249, 0.001, 0.279, 0.499]
|
[-0.5, -0.373, -0.25, 0.0, 0.279, 0.499]
|
||||||
);
|
);
|
||||||
|
|
||||||
let (src_big, mut dst_big) = get_test_vector(128.0, 1024);
|
let (src_big, mut dst_big) = get_test_vector(128.0, 1024);
|
||||||
|
Reference in New Issue
Block a user