Quantized support for f16 and f32 (#457)

* Add f32 as a quantized type. * Add f16 as a quantized type too.
2025-06-19 03:54:56 +00:00 · 2023-08-15 21:09:37 +01:00
parent e68b2accb4
commit b8263aa15c
3 changed files with 75 additions and 4 deletions
--- a/candle-core/src/cpu/neon.rs
+++ b/candle-core/src/cpu/neon.rs
@ -69,9 +69,6 @@ impl Cpu<ARR> for CurrentCpu {
        for i in 0..ARR / 4 {
            x[4 * i] = vaddq_f32(x[4 * i], x[4 * i + 2]);
        }
-        for i in 0..ARR / 8 {
-            x[8 * i] = vaddq_f32(x[8 * i], x[8 * i + 4]);
-        }
        *y = Self::reduce_one(x[0]);
    }
 }