Revert "Avoid some mut in quantized functions. (#550)" (#552)

This reverts commit cf27b9b636.
This commit is contained in:
Laurent Mazare
2023-08-22 15:57:46 +01:00
committed by GitHub
parent cf27b9b636
commit ec665acad7
2 changed files with 39 additions and 30 deletions

View File

@ -503,7 +503,8 @@ impl GgmlType for BlockQ2K {
} }
let dm = block.dmin.to_f32() * (block.scales[j] >> 4) as f32; let dm = block.dmin.to_f32() * (block.scales[j] >> 4) as f32;
for ii in 0..16 { for ii in 0..16 {
let ll = nearest_int((x[16 * j + ii] + dm) / d).clamp(0, 3); let mut ll = nearest_int((x[16 * j + ii] + dm) / d);
ll = ll.clamp(0, 3);
big_l[16 * j + ii] = ll as u8; big_l[16 * j + ii] = ll as u8;
} }
} }
@ -586,14 +587,14 @@ impl GgmlType for BlockQ3K {
if max_scale != 0.0 { if max_scale != 0.0 {
let iscale = -32.0 / max_scale; let iscale = -32.0 / max_scale;
for (j, scale) in scales.iter().enumerate() { for (j, scale) in scales.iter().enumerate() {
let l_val = nearest_int(iscale * scale); let mut l_val = nearest_int(iscale * scale);
let l_val = l_val.clamp(-32, 31) + 32; l_val = l_val.clamp(-32, 31) + 32;
if j < 8 { if j < 8 {
block.scales[j] = (l_val & 0xF) as u8; block.scales[j] = (l_val & 0xF) as u8;
} else { } else {
block.scales[j - 8] |= ((l_val & 0xF) << 4) as u8; block.scales[j - 8] |= ((l_val & 0xF) << 4) as u8;
} }
let l_val = l_val >> 4; l_val >>= 4;
block.scales[j % 4 + 8] |= (l_val << (2 * (j / 4))) as u8; block.scales[j % 4 + 8] |= (l_val << (2 * (j / 4))) as u8;
} }
block.d = f16::from_f32(1.0 / iscale); block.d = f16::from_f32(1.0 / iscale);
@ -613,8 +614,9 @@ impl GgmlType for BlockQ3K {
let d = block.d.to_f32() * sc as f32; let d = block.d.to_f32() * sc as f32;
if d != 0.0 { if d != 0.0 {
for ii in 0..16 { for ii in 0..16 {
let l_val = nearest_int(x[16 * j + ii] / d); let mut l_val = nearest_int(x[16 * j + ii] / d);
l[16 * j + ii] = (l_val.clamp(-4, 3) + 4) as i8; l_val = l_val.clamp(-4, 3);
l[16 * j + ii] = (l_val + 4) as i8;
} }
} }
} }
@ -741,8 +743,10 @@ impl GgmlType for BlockQ4K {
let inv_min = if max_min > 0.0 { 63.0 / max_min } else { 0.0 }; let inv_min = if max_min > 0.0 { 63.0 / max_min } else { 0.0 };
for j in 0..QK_K / 32 { for j in 0..QK_K / 32 {
let ls = nearest_int(inv_scale * scales[j]).min(63) as u8; let mut ls = nearest_int(inv_scale * scales[j]) as u8;
let lm = nearest_int(inv_min * mins[j]).min(63) as u8; let mut lm = nearest_int(inv_min * mins[j]) as u8;
ls = std::cmp::min(63, ls);
lm = std::cmp::min(63, lm);
if j < 4 { if j < 4 {
block.scales[j] = ls; block.scales[j] = ls;
block.scales[j + 4] = lm; block.scales[j + 4] = lm;
@ -764,8 +768,9 @@ impl GgmlType for BlockQ4K {
if d != 0.0 { if d != 0.0 {
let dm = block.dmin.to_f32() * m as f32; let dm = block.dmin.to_f32() * m as f32;
for ii in 0..32 { for ii in 0..32 {
let l_val = nearest_int((x[32 * j + ii] + dm) / d); let mut l_val = nearest_int((x[32 * j + ii] + dm) / d);
l[32 * j + ii] = l_val.clamp(0, 15) as u8; l_val = l_val.clamp(0, 15);
l[32 * j + ii] = l_val as u8;
} }
} }
} }
@ -786,10 +791,10 @@ impl GgmlType for BlockQ4K {
let d = block.d.to_f32(); let d = block.d.to_f32();
let min = block.dmin.to_f32(); let min = block.dmin.to_f32();
let q = &block.qs; let q = &block.qs;
let mut is = 0;
let mut ys_index = 0; let mut ys_index = 0;
for j in (0..QK_K).step_by(64) { for j in (0..QK_K).step_by(64) {
let is = j * 2;
let q = &q[j / 2..j / 2 + 32]; let q = &q[j / 2..j / 2 + 32];
let (sc, m) = get_scale_min_k4(is, &block.scales); let (sc, m) = get_scale_min_k4(is, &block.scales);
let d1 = d * sc as f32; let d1 = d * sc as f32;
@ -805,6 +810,7 @@ impl GgmlType for BlockQ4K {
y[ys_index] = d2 * (q >> 4) as f32 - m2; y[ys_index] = d2 * (q >> 4) as f32 - m2;
ys_index += 1; ys_index += 1;
} }
is += 2;
} }
} }
Ok(()) Ok(())
@ -842,8 +848,10 @@ impl GgmlType for BlockQ5K {
}; };
let inv_min = if max_min > 0.0 { 63.0 / max_min } else { 0.0 }; let inv_min = if max_min > 0.0 { 63.0 / max_min } else { 0.0 };
for j in 0..QK_K / 32 { for j in 0..QK_K / 32 {
let ls = nearest_int(inv_scale * scales[j]).min(63) as u8; let mut ls = nearest_int(inv_scale * scales[j]) as u8;
let lm = nearest_int(inv_min * mins[j]).min(63) as u8; let mut lm = nearest_int(inv_min * mins[j]) as u8;
ls = ls.min(63);
lm = lm.min(63);
if j < 4 { if j < 4 {
block.scales[j] = ls; block.scales[j] = ls;
block.scales[j + 4] = lm; block.scales[j + 4] = lm;
@ -865,8 +873,9 @@ impl GgmlType for BlockQ5K {
} }
let dm = block.dmin.to_f32() * m as f32; let dm = block.dmin.to_f32() * m as f32;
for ii in 0..32 { for ii in 0..32 {
let ll = nearest_int((x[32 * j + ii] + dm) / d); let mut ll = nearest_int((x[32 * j + ii] + dm) / d);
l[32 * j + ii] = ll.clamp(0, 31) as u8; ll = ll.min(31).max(0);
l[32 * j + ii] = ll as u8;
} }
} }

View File

@ -4,9 +4,7 @@ pub(super) fn nearest_int(v: f32) -> i32 {
v.round() as i32 v.round() as i32
} }
/// Validates that the input and output are the right size and returns an iterator which maps each /// Validates that the input and output are the right size and returns an iterator which maps each input region `xs` to its corresponding output block in `ys`. Each output region is guaranteed to be `T::BLCK_SIZE` long.
/// input region `xs` to its corresponding output block in `ys`. Each output region is guaranteed
/// to be `T::BLCK_SIZE` long.
pub(super) fn group_for_quantization<'a, 'b, T: super::k_quants::GgmlType>( pub(super) fn group_for_quantization<'a, 'b, T: super::k_quants::GgmlType>(
xs: &'b [f32], xs: &'b [f32],
ys: &'a mut [T], ys: &'a mut [T],
@ -25,9 +23,7 @@ pub(super) fn group_for_quantization<'a, 'b, T: super::k_quants::GgmlType>(
Ok(ys.iter_mut().zip(xs.chunks_exact(block_size)).collect()) Ok(ys.iter_mut().zip(xs.chunks_exact(block_size)).collect())
} }
/// Validates that the input and output are the right size and returns an iterator which maps each /// Validates that the input and output are the right size and returns an iterator which maps each input block `xs` to its corresponding output region in `ys`. Each output region is guaranteed to be `T::BLCK_SIZE` long.
/// input block `xs` to its corresponding output region in `ys`. Each output region is guaranteed
/// to be `T::BLCK_SIZE` long.
pub(super) fn group_for_dequantization<'a, 'b, T: super::k_quants::GgmlType>( pub(super) fn group_for_dequantization<'a, 'b, T: super::k_quants::GgmlType>(
xs: &'a [T], xs: &'a [T],
ys: &'b mut [f32], ys: &'b mut [f32],
@ -178,7 +174,7 @@ pub(super) unsafe fn make_qx_quants(
for i in 0..n { for i in 0..n {
let x = *x.add(i); let x = *x.add(i);
let l = nearest_int(iscale * x); let l = nearest_int(iscale * x);
let l = l.clamp(-nmax, nmax - 1); let l = i32::max(-nmax, i32::min(nmax - 1, l));
let w = if weight_type == 1 { x * x } else { 1. }; let w = if weight_type == 1 { x * x } else { 1. };
let l = l as f32; let l = l as f32;
sumlx += w * x * l; sumlx += w * x * l;
@ -202,7 +198,7 @@ pub(super) fn make_qkx1_quants(nmax: i32, ntry: usize, x: &[f32]) -> (f32, f32)
let n = x.len(); let n = x.len();
let mut l = vec![0; n]; let mut l = vec![0; n];
// Get min/max // Get min/max
let min = *x let mut min = *x
.iter() .iter()
.take(n) .take(n)
.min_by(|a, b| a.total_cmp(b)) .min_by(|a, b| a.total_cmp(b))
@ -215,7 +211,9 @@ pub(super) fn make_qkx1_quants(nmax: i32, ntry: usize, x: &[f32]) -> (f32, f32)
} }
// Ensure min <= 0.0 // Ensure min <= 0.0
let mut min = min.min(0.); if min > 0.0 {
min = 0.0;
}
// Compute scale and inverse scale // Compute scale and inverse scale
let mut iscale = nmax as f32 / (max - min); let mut iscale = nmax as f32 / (max - min);
@ -227,7 +225,8 @@ pub(super) fn make_qkx1_quants(nmax: i32, ntry: usize, x: &[f32]) -> (f32, f32)
let mut did_change = false; let mut did_change = false;
for (i, value) in x.iter().enumerate().take(n) { for (i, value) in x.iter().enumerate().take(n) {
let li = nearest_int(iscale * (value - min)).clamp(0, nmax); let mut li = nearest_int(iscale * (value - min));
li = li.clamp(0, nmax);
let clamped_li = li as u8; let clamped_li = li as u8;
if clamped_li != l[i] { if clamped_li != l[i] {
l[i] = clamped_li; l[i] = clamped_li;
@ -281,8 +280,8 @@ pub(super) fn make_q3_quants(x: &[f32], nmax: i32, do_rmse: bool) -> f32 {
let mut sumlx = 0.0; let mut sumlx = 0.0;
let mut suml2 = 0.0; let mut suml2 = 0.0;
for i in 0..n { for i in 0..n {
let li = (iscale * x[i]).round() as i32; let mut li = (iscale * x[i]).round() as i32;
let li = li.clamp(-nmax, nmax - 1); li = li.clamp(-nmax, nmax - 1);
l[i] = li as i8; l[i] = li as i8;
let w = x[i] * x[i]; let w = x[i] * x[i];
sumlx += w * x[i] * li as f32; sumlx += w * x[i] * li as f32;
@ -319,8 +318,9 @@ pub(super) fn make_q3_quants(x: &[f32], nmax: i32, do_rmse: bool) -> f32 {
return sumlx / suml2; return sumlx / suml2;
} }
for i in 0..n { for i in 0..n {
let li = (iscale * x[i]).round() as i32; let mut li = (iscale * x[i]).round() as i32;
l[i] = (li.clamp(-nmax, nmax - 1) + nmax) as i8; li = li.clamp(-nmax, nmax - 1);
l[i] = (li + nmax) as i8;
} }
1.0 / iscale 1.0 / iscale
} }