diff --git a/candle-core/src/quantized/k_quants.rs b/candle-core/src/quantized/k_quants.rs
index 3e45bc6d..bfc471a3 100644
--- a/candle-core/src/quantized/k_quants.rs
+++ b/candle-core/src/quantized/k_quants.rs
@@ -503,7 +503,8 @@ impl GgmlType for BlockQ2K {
                 }
                 let dm = block.dmin.to_f32() * (block.scales[j] >> 4) as f32;
                 for ii in 0..16 {
-                    let ll = nearest_int((x[16 * j + ii] + dm) / d).clamp(0, 3);
+                    let mut ll = nearest_int((x[16 * j + ii] + dm) / d);
+                    ll = ll.clamp(0, 3);
                     big_l[16 * j + ii] = ll as u8;
                 }
             }
@@ -586,14 +587,14 @@ impl GgmlType for BlockQ3K {
             if max_scale != 0.0 {
                 let iscale = -32.0 / max_scale;
                 for (j, scale) in scales.iter().enumerate() {
-                    let l_val = nearest_int(iscale * scale);
-                    let l_val = l_val.clamp(-32, 31) + 32;
+                    let mut l_val = nearest_int(iscale * scale);
+                    l_val = l_val.clamp(-32, 31) + 32;
                     if j < 8 {
                         block.scales[j] = (l_val & 0xF) as u8;
                     } else {
                         block.scales[j - 8] |= ((l_val & 0xF) << 4) as u8;
                     }
-                    let l_val = l_val >> 4;
+                    l_val >>= 4;
                     block.scales[j % 4 + 8] |= (l_val << (2 * (j / 4))) as u8;
                 }
                 block.d = f16::from_f32(1.0 / iscale);
@@ -613,8 +614,9 @@ impl GgmlType for BlockQ3K {
                 let d = block.d.to_f32() * sc as f32;
                 if d != 0.0 {
                     for ii in 0..16 {
-                        let l_val = nearest_int(x[16 * j + ii] / d);
-                        l[16 * j + ii] = (l_val.clamp(-4, 3) + 4) as i8;
+                        let mut l_val = nearest_int(x[16 * j + ii] / d);
+                        l_val = l_val.clamp(-4, 3);
+                        l[16 * j + ii] = (l_val + 4) as i8;
                     }
                 }
             }
@@ -700,7 +702,7 @@ impl GgmlType for BlockQ3K {
                         // 16 block finished => advance scale index
                         is += 1;
                     }
-                    // 32 block finished => increase shift and m
+                    //32 block finished => increase shift and m
                     shift += 2;
                     m <<= 1;
                 }
@@ -741,8 +743,10 @@ impl GgmlType for BlockQ4K {
             let inv_min = if max_min > 0.0 { 63.0 / max_min } else { 0.0 };
 
             for j in 0..QK_K / 32 {
-                let ls = nearest_int(inv_scale * scales[j]).min(63) as u8;
-                let lm = nearest_int(inv_min * mins[j]).min(63) as u8;
+                let mut ls = nearest_int(inv_scale * scales[j]) as u8;
+                let mut lm = nearest_int(inv_min * mins[j]) as u8;
+                ls = std::cmp::min(63, ls);
+                lm = std::cmp::min(63, lm);
                 if j < 4 {
                     block.scales[j] = ls;
                     block.scales[j + 4] = lm;
@@ -764,8 +768,9 @@ impl GgmlType for BlockQ4K {
                 if d != 0.0 {
                     let dm = block.dmin.to_f32() * m as f32;
                     for ii in 0..32 {
-                        let l_val = nearest_int((x[32 * j + ii] + dm) / d);
-                        l[32 * j + ii] = l_val.clamp(0, 15) as u8;
+                        let mut l_val = nearest_int((x[32 * j + ii] + dm) / d);
+                        l_val = l_val.clamp(0, 15);
+                        l[32 * j + ii] = l_val as u8;
                     }
                 }
             }
@@ -786,10 +791,10 @@ impl GgmlType for BlockQ4K {
             let d = block.d.to_f32();
             let min = block.dmin.to_f32();
             let q = &block.qs;
+            let mut is = 0;
             let mut ys_index = 0;
 
             for j in (0..QK_K).step_by(64) {
-                let is = j * 2;
                 let q = &q[j / 2..j / 2 + 32];
                 let (sc, m) = get_scale_min_k4(is, &block.scales);
                 let d1 = d * sc as f32;
@@ -805,6 +810,7 @@ impl GgmlType for BlockQ4K {
                     y[ys_index] = d2 * (q >> 4) as f32 - m2;
                     ys_index += 1;
                 }
+                is += 2;
             }
         }
         Ok(())
@@ -842,8 +848,10 @@ impl GgmlType for BlockQ5K {
             };
             let inv_min = if max_min > 0.0 { 63.0 / max_min } else { 0.0 };
             for j in 0..QK_K / 32 {
-                let ls = nearest_int(inv_scale * scales[j]).min(63) as u8;
-                let lm = nearest_int(inv_min * mins[j]).min(63) as u8;
+                let mut ls = nearest_int(inv_scale * scales[j]) as u8;
+                let mut lm = nearest_int(inv_min * mins[j]) as u8;
+                ls = ls.min(63);
+                lm = lm.min(63);
                 if j < 4 {
                     block.scales[j] = ls;
                     block.scales[j + 4] = lm;
@@ -865,8 +873,9 @@ impl GgmlType for BlockQ5K {
                 }
                 let dm = block.dmin.to_f32() * m as f32;
                 for ii in 0..32 {
-                    let ll = nearest_int((x[32 * j + ii] + dm) / d);
-                    l[32 * j + ii] = ll.clamp(0, 31) as u8;
+                    let mut ll = nearest_int((x[32 * j + ii] + dm) / d);
+                    ll = ll.min(31).max(0);
+                    l[32 * j + ii] = ll as u8;
                 }
             }
 
diff --git a/candle-core/src/quantized/utils.rs b/candle-core/src/quantized/utils.rs
index edbffa35..fded9d61 100644
--- a/candle-core/src/quantized/utils.rs
+++ b/candle-core/src/quantized/utils.rs
@@ -4,9 +4,7 @@ pub(super) fn nearest_int(v: f32) -> i32 {
     v.round() as i32
 }
 
-/// Validates that the input and output are the right size and returns an iterator which maps each
-/// input region `xs` to its corresponding output block in `ys`. Each output region is guaranteed
-/// to be `T::BLCK_SIZE` long.
+/// Validates that the input and output are the right size and returns an iterator which maps each input region `xs` to its corresponding output block in `ys`. Each output region is guaranteed to be `T::BLCK_SIZE` long.
 pub(super) fn group_for_quantization<'a, 'b, T: super::k_quants::GgmlType>(
     xs: &'b [f32],
     ys: &'a mut [T],
@@ -25,9 +23,7 @@ pub(super) fn group_for_quantization<'a, 'b, T: super::k_quants::GgmlType>(
     Ok(ys.iter_mut().zip(xs.chunks_exact(block_size)).collect())
 }
 
-/// Validates that the input and output are the right size and returns an iterator which maps each
-/// input block `xs` to its corresponding output region in `ys`. Each output region is guaranteed
-/// to be `T::BLCK_SIZE` long.
+/// Validates that the input and output are the right size and returns an iterator which maps each input block `xs` to its corresponding output region in `ys`. Each output region is guaranteed to be `T::BLCK_SIZE` long.
 pub(super) fn group_for_dequantization<'a, 'b, T: super::k_quants::GgmlType>(
     xs: &'a [T],
     ys: &'b mut [f32],
@@ -178,7 +174,7 @@ pub(super) unsafe fn make_qx_quants(
         for i in 0..n {
             let x = *x.add(i);
             let l = nearest_int(iscale * x);
-            let l = l.clamp(-nmax, nmax - 1);
+            let l = i32::max(-nmax, i32::min(nmax - 1, l));
             let w = if weight_type == 1 { x * x } else { 1. };
             let l = l as f32;
             sumlx += w * x * l;
@@ -202,7 +198,7 @@ pub(super) fn make_qkx1_quants(nmax: i32, ntry: usize, x: &[f32]) -> (f32, f32)
     let n = x.len();
     let mut l = vec![0; n];
     // Get min/max
-    let min = *x
+    let mut min = *x
         .iter()
         .take(n)
         .min_by(|a, b| a.total_cmp(b))
@@ -215,7 +211,9 @@ pub(super) fn make_qkx1_quants(nmax: i32, ntry: usize, x: &[f32]) -> (f32, f32)
     }
 
     // Ensure min <= 0.0
-    let mut min = min.min(0.);
+    if min > 0.0 {
+        min = 0.0;
+    }
 
     // Compute scale and inverse scale
     let mut iscale = nmax as f32 / (max - min);
@@ -227,7 +225,8 @@ pub(super) fn make_qkx1_quants(nmax: i32, ntry: usize, x: &[f32]) -> (f32, f32)
         let mut did_change = false;
 
         for (i, value) in x.iter().enumerate().take(n) {
-            let li = nearest_int(iscale * (value - min)).clamp(0, nmax);
+            let mut li = nearest_int(iscale * (value - min));
+            li = li.clamp(0, nmax);
             let clamped_li = li as u8;
             if clamped_li != l[i] {
                 l[i] = clamped_li;
@@ -281,8 +280,8 @@ pub(super) fn make_q3_quants(x: &[f32], nmax: i32, do_rmse: bool) -> f32 {
         let mut sumlx = 0.0;
         let mut suml2 = 0.0;
         for i in 0..n {
-            let li = (iscale * x[i]).round() as i32;
-            let li = li.clamp(-nmax, nmax - 1);
+            let mut li = (iscale * x[i]).round() as i32;
+            li = li.clamp(-nmax, nmax - 1);
             l[i] = li as i8;
             let w = x[i] * x[i];
             sumlx += w * x[i] * li as f32;
@@ -319,8 +318,9 @@ pub(super) fn make_q3_quants(x: &[f32], nmax: i32, do_rmse: bool) -> f32 {
         return sumlx / suml2;
     }
     for i in 0..n {
-        let li = (iscale * x[i]).round() as i32;
-        l[i] = (li.clamp(-nmax, nmax - 1) + nmax) as i8;
+        let mut li = (iscale * x[i]).round() as i32;
+        li = li.clamp(-nmax, nmax - 1);
+        l[i] = (li + nmax) as i8;
     }
     1.0 / iscale
 }