mirror of
https://github.com/huggingface/candle.git
synced 2025-06-19 11:56:45 +00:00
AVX version of the vecdot for q4_0. (#474)
* AVX version of the vecdot for q4_0. * Tweak the avx bits. * Add a qmatmul benchmark. * Fix the quantized test.
This commit is contained in:
@ -1,3 +1,8 @@
|
||||
#[cfg(target_arch = "x86")]
|
||||
use core::arch::x86::*;
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
use core::arch::x86_64::*;
|
||||
|
||||
use super::GgmlDType;
|
||||
use crate::Result;
|
||||
use half::f16;
|
||||
@ -620,6 +625,48 @@ impl GgmlType for BlockQ8K {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(target_feature = "avx")]
|
||||
#[inline(always)]
|
||||
unsafe fn sum_i16_pairs_float(x: __m256i) -> __m256 {
|
||||
let ones = _mm256_set1_epi16(1);
|
||||
let summed_pairs = _mm256_madd_epi16(ones, x);
|
||||
_mm256_cvtepi32_ps(summed_pairs)
|
||||
}
|
||||
|
||||
#[cfg(target_feature = "avx")]
|
||||
#[inline(always)]
|
||||
unsafe fn mul_sum_us8_pairs_float(ax: __m256i, sy: __m256i) -> __m256 {
|
||||
let dot = _mm256_maddubs_epi16(ax, sy);
|
||||
sum_i16_pairs_float(dot)
|
||||
}
|
||||
|
||||
#[cfg(target_feature = "avx")]
|
||||
#[inline(always)]
|
||||
unsafe fn hsum_float_8(x: __m256) -> f32 {
|
||||
let mut res = _mm256_extractf128_ps(x, 1);
|
||||
res = _mm_add_ps(res, _mm256_castps256_ps128(x));
|
||||
res = _mm_add_ps(res, _mm_movehl_ps(res, res));
|
||||
res = _mm_add_ss(res, _mm_movehdup_ps(res));
|
||||
_mm_cvtss_f32(res)
|
||||
}
|
||||
|
||||
#[cfg(target_feature = "avx")]
|
||||
#[inline(always)]
|
||||
unsafe fn bytes_from_nibbles_32(rsi: *const u8) -> __m256i {
|
||||
let tmp = _mm_loadu_si128(rsi as *const __m128i);
|
||||
let bytes = _mm256_insertf128_si256::<1>(_mm256_castsi128_si256(tmp), _mm_srli_epi16(tmp, 4));
|
||||
let low_mask = _mm256_set1_epi8(0xF);
|
||||
_mm256_and_si256(low_mask, bytes)
|
||||
}
|
||||
|
||||
#[cfg(target_feature = "avx")]
|
||||
#[inline(always)]
|
||||
unsafe fn mul_sum_i8_pairs_float(x: __m256i, y: __m256i) -> __m256 {
|
||||
let ax = _mm256_sign_epi8(x, x);
|
||||
let sy = _mm256_sign_epi8(y, x);
|
||||
mul_sum_us8_pairs_float(ax, sy)
|
||||
}
|
||||
|
||||
impl GgmlType for BlockQ4_0 {
|
||||
const DTYPE: GgmlDType = GgmlDType::Q4_0;
|
||||
const BLCK_SIZE: usize = QK4_0;
|
||||
@ -685,7 +732,35 @@ impl GgmlType for BlockQ4_0 {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(target_feature = "avx")]
|
||||
fn vec_dot(n: usize, xs: &[Self], ys: &[Self::VecDotType]) -> Result<f32> {
|
||||
let qk = QK8_0;
|
||||
let nb = n / qk;
|
||||
if n % QK8_0 != 0 {
|
||||
crate::bail!("vec_dot_q4_0_q8_0: {n} is not divisible by {qk}")
|
||||
}
|
||||
if nb % 2 != 0 {
|
||||
crate::bail!("vec_dot_q4_0_q8_0: {nb} is not even")
|
||||
}
|
||||
|
||||
unsafe {
|
||||
// Generic implementation.
|
||||
let mut acc = _mm256_setzero_ps();
|
||||
for (x, y) in xs.iter().zip(ys.iter()) {
|
||||
let d = _mm256_set1_ps(f16::to_f32(x.d) * f16::to_f32(y.d));
|
||||
let bx = bytes_from_nibbles_32(x.qs.as_ptr());
|
||||
let off = _mm256_set1_epi8(8);
|
||||
let bx = _mm256_sub_epi8(bx, off);
|
||||
let by = _mm256_loadu_si256(y.qs.as_ptr() as *const __m256i);
|
||||
let q = mul_sum_i8_pairs_float(bx, by);
|
||||
acc = _mm256_fmadd_ps(d, q, acc);
|
||||
}
|
||||
Ok(hsum_float_8(acc))
|
||||
}
|
||||
}
|
||||
|
||||
// https://github.com/ggerganov/llama.cpp/blob/b5ffb2849d23afe73647f68eec7b68187af09be6/ggml.c#L2361C10-L2361C122
|
||||
#[cfg(not(target_feature = "avx"))]
|
||||
fn vec_dot(n: usize, xs: &[Self], ys: &[Self::VecDotType]) -> Result<f32> {
|
||||
let qk = QK8_0;
|
||||
let nb = n / qk;
|
||||
|
Reference in New Issue
Block a user