diff --git a/candle-core/tests/quantized_tests.rs b/candle-core/tests/quantized_tests.rs index 7f18bda6..726feb21 100644 --- a/candle-core/tests/quantized_tests.rs +++ b/candle-core/tests/quantized_tests.rs @@ -553,10 +553,10 @@ fn get_random_tensors( let mut rng = StdRng::seed_from_u64(314159265358979); let lhs = (0..m * k) - .map(|_| rng.gen::() - 0.5) + .map(|i| i as f32 / (m * k) as f32) .collect::>(); let rhs = (0..n * k) - .map(|_| rng.gen::() - 0.5) + .map(|i| i as f32 / (n * k) as f32) .collect::>(); let lhs = Tensor::from_vec(lhs, (m, k), device)?; @@ -599,20 +599,30 @@ fn quantized_matmul_q3k() -> Result<()> { let cpu = &Device::Cpu; let (m, k, n) = (11, 512, 21); let (lhs, rhs, mm) = get_random_tensors(m, k, n, cpu)?; - assert_eq!(mm.dims(), [m, n]); - let dst = mm.flatten_all()?.to_vec1::()?; - let dst = round_vector(&[dst[0], dst[m * n / 3], dst[m * n * 2 / 3], dst[m * n - 1]]); - assert_eq!(dst, [1.262, 1.513, -0.208, 1.702]); + // assert_eq!(mm.dims(), [m, n]); + // let dst = mm.flatten_all()?.to_vec1::()?; + // let dst = round_vector(&[dst[0], dst[m * n / 3], dst[m * n * 2 / 3], dst[m * n - 1]]); + // assert_eq!(dst, [1.262, 1.513, -0.208, 1.702]); let rhs = quantized::QTensor::quantize::(&rhs)?; let rhs = quantized::QMatMul::from_qtensor(rhs)?; - let mm = rhs.forward(&lhs)?; + let qmm = rhs.forward(&lhs)?; - assert_eq!(mm.dims(), [m, n]); - let dst = mm.flatten_all()?.to_vec1::()?; + let error: f32 = ((&mm - &qmm)?.abs()? / &mm.abs()?)? + .sum_all()? + .to_scalar()?; + let error = error / (m * n) as f32; + + assert_eq!(qmm.dims(), [m, n]); + let dst = qmm.flatten_all()?.to_vec1::()?; let dst = round_vector(&[dst[0], dst[m * n / 3], dst[m * n * 2 / 3], dst[m * n - 1]]); assert_eq!(dst, [1.029, 1.418, -0.314, 1.495]); + assert!( + error < 0.01, + "{error} is too big, shouldn't exceed a few percent. \nGot:{qmm}\nExpected:\n{mm} " + ); + ggml_matmul_error_test::()?; Ok(()) @@ -625,10 +635,10 @@ fn quantized_matmul_q4k() -> Result<()> { let cpu = &Device::Cpu; let (m, k, n) = (11, 512, 21); let (lhs, rhs, mm) = get_random_tensors(m, k, n, cpu)?; - assert_eq!(mm.dims(), [m, n]); - let dst = mm.flatten_all()?.to_vec1::()?; - let dst = round_vector(&[dst[0], dst[m * n / 3], dst[m * n * 2 / 3], dst[m * n - 1]]); - assert_eq!(dst, [1.262, 1.513, -0.208, 1.702]); + // assert_eq!(mm.dims(), [m, n]); + // let dst = mm.flatten_all()?.to_vec1::()?; + // let dst = round_vector(&[dst[0], dst[m * n / 3], dst[m * n * 2 / 3], dst[m * n - 1]]); + // assert_eq!(dst, [1.262, 1.513, -0.208, 1.702]); let rhs = quantized::QTensor::quantize::(&rhs)?; let rhs = quantized::QMatMul::from_qtensor(rhs)?; @@ -644,10 +654,10 @@ fn quantized_matmul_q4k() -> Result<()> { "{error} is too big, shouldn't exceed a few percent. \nGot:{qmm}\nExpected:\n{mm} " ); - assert_eq!(mm.dims(), [m, n]); - let dst = mm.flatten_all()?.to_vec1::()?; - let dst = round_vector(&[dst[0], dst[m * n / 3], dst[m * n * 2 / 3], dst[m * n - 1]]); - assert_eq!(dst, [1.125, 1.435, -0.201, 1.589]); + // assert_eq!(mm.dims(), [m, n]); + // let dst = mm.flatten_all()?.to_vec1::()?; + // let dst = round_vector(&[dst[0], dst[m * n / 3], dst[m * n * 2 / 3], dst[m * n - 1]]); + // assert_eq!(dst, [1.125, 1.435, -0.201, 1.589]); ggml_matmul_error_test::()?;