mirror of
https://github.com/huggingface/candle.git
synced 2025-06-15 10:26:33 +00:00
Fix for the batch dim in the quantized matmul example. (#2073)
* Fix for the batch dim in the quantized matmul example. * Enable more tests on cuda. * Add a test for qmm with a batch. * Fix the zeros-dim test on metal.
This commit is contained in:
@ -283,5 +283,5 @@ impl MetalDevice {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn buf_size(size: NSUInteger) -> NSUInteger {
|
fn buf_size(size: NSUInteger) -> NSUInteger {
|
||||||
(size - 1).next_power_of_two() as NSUInteger
|
size.saturating_sub(1).next_power_of_two() as NSUInteger
|
||||||
}
|
}
|
||||||
|
@ -464,7 +464,7 @@ impl QCudaStorage {
|
|||||||
/* x_rows */ n,
|
/* x_rows */ n,
|
||||||
/* x_cols */ k,
|
/* x_cols */ k,
|
||||||
/* y_rows */ k,
|
/* y_rows */ k,
|
||||||
/* y_cols */ m,
|
/* y_cols */ b * m,
|
||||||
self.device(),
|
self.device(),
|
||||||
)?
|
)?
|
||||||
};
|
};
|
||||||
|
@ -3,7 +3,7 @@ use candle_core::{
|
|||||||
quantized::{self, GgmlDType},
|
quantized::{self, GgmlDType},
|
||||||
test_device,
|
test_device,
|
||||||
test_utils::to_vec2_round,
|
test_utils::to_vec2_round,
|
||||||
Device, Module, Result, Tensor,
|
Device, IndexOp, Module, Result, Tensor,
|
||||||
};
|
};
|
||||||
use quantized::{k_quants, GgmlType};
|
use quantized::{k_quants, GgmlType};
|
||||||
use rand::prelude::*;
|
use rand::prelude::*;
|
||||||
@ -47,18 +47,14 @@ fn test_matmul(
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn quantized_matmul(device: &Device) -> Result<()> {
|
fn quantized_matmul(device: &Device) -> Result<()> {
|
||||||
// TODO Enable this later when we enable cuda.
|
|
||||||
if device.is_cuda() {
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
let (m, k, n) = (3, 64, 4);
|
let (m, k, n) = (3, 64, 4);
|
||||||
let lhs = (0..(m * k)).map(|v| v as f32).collect::<Vec<_>>();
|
let lhs_s = (0..(m * k)).map(|v| v as f32).collect::<Vec<_>>();
|
||||||
let tensor_lhs = Tensor::from_slice(&lhs, (m, k), device)?;
|
let lhs = Tensor::from_slice(&lhs_s, (m, k), device)?;
|
||||||
let mut dst = vec![42.; 3 * 4];
|
let mut dst = vec![42.; 3 * 4];
|
||||||
let mut rhs_t = vec![k_quants::BlockQ4_0::zeros(); 8];
|
let mut rhs_t = vec![k_quants::BlockQ4_0::zeros(); 8];
|
||||||
let rhs = (0..(k * n)).map(|v| v as f32).collect::<Vec<_>>();
|
let rhs = (0..(k * n)).map(|v| v as f32).collect::<Vec<_>>();
|
||||||
k_quants::BlockQ4_0::from_float(&rhs, &mut rhs_t)?;
|
k_quants::BlockQ4_0::from_float(&rhs, &mut rhs_t)?;
|
||||||
k_quants::matmul((m, k, n), &lhs, &rhs_t, &mut dst)?;
|
k_quants::matmul((m, k, n), &lhs_s, &rhs_t, &mut dst)?;
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
dst.iter().map(|x| x.round()).collect::<Vec<_>>(),
|
dst.iter().map(|x| x.round()).collect::<Vec<_>>(),
|
||||||
&[
|
&[
|
||||||
@ -67,7 +63,7 @@ fn quantized_matmul(device: &Device) -> Result<()> {
|
|||||||
]
|
]
|
||||||
);
|
);
|
||||||
let tensor_rhs = Tensor::from_slice(&rhs, (n, k), device)?.t()?;
|
let tensor_rhs = Tensor::from_slice(&rhs, (n, k), device)?.t()?;
|
||||||
let mm = tensor_lhs.matmul(&tensor_rhs)?;
|
let mm = lhs.matmul(&tensor_rhs)?;
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
mm.to_vec2::<f32>()?,
|
mm.to_vec2::<f32>()?,
|
||||||
&[
|
&[
|
||||||
@ -79,7 +75,7 @@ fn quantized_matmul(device: &Device) -> Result<()> {
|
|||||||
|
|
||||||
let qtensor = quantized::QTensor::quantize(&tensor_rhs.t()?, GgmlDType::Q4_0)?;
|
let qtensor = quantized::QTensor::quantize(&tensor_rhs.t()?, GgmlDType::Q4_0)?;
|
||||||
let matmul = quantized::QMatMul::from_qtensor(qtensor)?;
|
let matmul = quantized::QMatMul::from_qtensor(qtensor)?;
|
||||||
let res = matmul.forward(&tensor_lhs)?;
|
let res = matmul.forward(&lhs)?;
|
||||||
match device {
|
match device {
|
||||||
Device::Metal(_) => assert_eq!(
|
Device::Metal(_) => assert_eq!(
|
||||||
to_vec2_round(&res, 0)?,
|
to_vec2_round(&res, 0)?,
|
||||||
@ -89,7 +85,15 @@ fn quantized_matmul(device: &Device) -> Result<()> {
|
|||||||
[341970.0, 994574.0, 1656181.0, 2302182.0]
|
[341970.0, 994574.0, 1656181.0, 2302182.0]
|
||||||
]
|
]
|
||||||
),
|
),
|
||||||
_ => assert_eq!(
|
Device::Cuda(_) => assert_eq!(
|
||||||
|
to_vec2_round(&res, 0)?,
|
||||||
|
&[
|
||||||
|
[84866.0, 214045.0, 344676.0, 473707.0],
|
||||||
|
[213425.0, 604313.0, 1000431.0, 1387960.0],
|
||||||
|
[342030.0, 994630.0, 1656248.0, 2302250.0]
|
||||||
|
]
|
||||||
|
),
|
||||||
|
Device::Cpu => assert_eq!(
|
||||||
to_vec2_round(&res, 0)?,
|
to_vec2_round(&res, 0)?,
|
||||||
&[
|
&[
|
||||||
[85120.0, 214562.0, 345455.0, 474748.0],
|
[85120.0, 214562.0, 345455.0, 474748.0],
|
||||||
@ -98,22 +102,16 @@ fn quantized_matmul(device: &Device) -> Result<()> {
|
|||||||
]
|
]
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
test_matmul(device, (1, 3, 4, 256), GgmlDType::Q4_0)?;
|
test_matmul(device, (1, 3, 4, 256), GgmlDType::Q4_0)?;
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn quantized_matmul_neg(device: &Device) -> Result<()> {
|
fn quantized_matmul_neg(device: &Device) -> Result<()> {
|
||||||
// TODO Enable this later when we enable cuda.
|
|
||||||
if device.is_cuda() {
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
let (m, k, n) = (3, 64, 4);
|
let (m, k, n) = (3, 64, 4);
|
||||||
let lhs = (0..(m * k))
|
let lhs_s = (0..(m * k))
|
||||||
.map(|v| v as f32 - (m * k) as f32 / 2.0)
|
.map(|v| v as f32 - (m * k) as f32 / 2.0)
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
let tensor_lhs = Tensor::from_slice(&lhs, (m, k), device)?;
|
let lhs = Tensor::from_slice(&lhs_s, (m, k), device)?;
|
||||||
let mut dst = vec![42.; 3 * 4];
|
let mut dst = vec![42.; 3 * 4];
|
||||||
let mut rhs_t = vec![k_quants::BlockQ4_0::zeros(); 8];
|
let mut rhs_t = vec![k_quants::BlockQ4_0::zeros(); 8];
|
||||||
let rhs = (0..k * n)
|
let rhs = (0..k * n)
|
||||||
@ -121,7 +119,7 @@ fn quantized_matmul_neg(device: &Device) -> Result<()> {
|
|||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
let tensor_rhs = Tensor::from_slice(&rhs, (n, k), device)?.t()?;
|
let tensor_rhs = Tensor::from_slice(&rhs, (n, k), device)?.t()?;
|
||||||
k_quants::BlockQ4_0::from_float(&rhs, &mut rhs_t)?;
|
k_quants::BlockQ4_0::from_float(&rhs, &mut rhs_t)?;
|
||||||
k_quants::matmul((m, k, n), &lhs, &rhs_t, &mut dst)?;
|
k_quants::matmul((m, k, n), &lhs_s, &rhs_t, &mut dst)?;
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
dst.iter().map(|x| x.round()).collect::<Vec<_>>(),
|
dst.iter().map(|x| x.round()).collect::<Vec<_>>(),
|
||||||
&[
|
&[
|
||||||
@ -129,7 +127,7 @@ fn quantized_matmul_neg(device: &Device) -> Result<()> {
|
|||||||
-196472.0, 63012.0, 324585.0, 587902.0
|
-196472.0, 63012.0, 324585.0, 587902.0
|
||||||
]
|
]
|
||||||
);
|
);
|
||||||
let mm = tensor_lhs.matmul(&tensor_rhs)?;
|
let mm = lhs.matmul(&tensor_rhs)?;
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
to_vec2_round(&mm, 0)?,
|
to_vec2_round(&mm, 0)?,
|
||||||
&[
|
&[
|
||||||
@ -141,7 +139,7 @@ fn quantized_matmul_neg(device: &Device) -> Result<()> {
|
|||||||
|
|
||||||
let qtensor = quantized::QTensor::quantize(&tensor_rhs.t()?, GgmlDType::Q4_0)?;
|
let qtensor = quantized::QTensor::quantize(&tensor_rhs.t()?, GgmlDType::Q4_0)?;
|
||||||
let matmul = quantized::QMatMul::from_qtensor(qtensor)?;
|
let matmul = quantized::QMatMul::from_qtensor(qtensor)?;
|
||||||
let res = matmul.forward(&tensor_lhs)?;
|
let res = matmul.forward(&lhs)?;
|
||||||
match device {
|
match device {
|
||||||
Device::Metal(_) => assert_eq!(
|
Device::Metal(_) => assert_eq!(
|
||||||
to_vec2_round(&res, 0)?,
|
to_vec2_round(&res, 0)?,
|
||||||
@ -151,7 +149,15 @@ fn quantized_matmul_neg(device: &Device) -> Result<()> {
|
|||||||
[-196102.0, 63022.0, 324233.0, 587191.0]
|
[-196102.0, 63022.0, 324233.0, 587191.0]
|
||||||
]
|
]
|
||||||
),
|
),
|
||||||
_ => assert_eq!(
|
Device::Cuda(_) => assert_eq!(
|
||||||
|
to_vec2_round(&res, 0)?,
|
||||||
|
&[
|
||||||
|
[243740.0, -19762.0, -285476.0, -550498.0],
|
||||||
|
[23774.0, 21645.0, 19395.0, 18364.0],
|
||||||
|
[-196045.0, 63030.0, 324120.0, 587079.0]
|
||||||
|
]
|
||||||
|
),
|
||||||
|
Device::Cpu => assert_eq!(
|
||||||
to_vec2_round(&res, 0)?,
|
to_vec2_round(&res, 0)?,
|
||||||
&[
|
&[
|
||||||
[243524.0, -19596.0, -285051.0, -549815.0],
|
[243524.0, -19596.0, -285051.0, -549815.0],
|
||||||
@ -160,22 +166,16 @@ fn quantized_matmul_neg(device: &Device) -> Result<()> {
|
|||||||
]
|
]
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
let lhs2 = Tensor::stack(&[&lhs, &lhs], 0)?;
|
||||||
|
let res2 = matmul.forward(&lhs2)?;
|
||||||
|
let res2 = res2.i(1)?;
|
||||||
|
let diff = (res - res2)?.abs()?.sum_all()?.to_vec0::<f32>()?;
|
||||||
|
assert_eq!(diff, 0.);
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
test_device!(
|
test_device!(quantized_matmul, qmm_cpu, qmm_cuda, qmm_metal);
|
||||||
quantized_matmul,
|
test_device!(quantized_matmul_neg, qmm_n_cpu, qmm_n_cuda, qmm_n_metal);
|
||||||
quantized_matmul_cpu,
|
|
||||||
quantized_matmul_cuda,
|
|
||||||
quantized_matmul_metal
|
|
||||||
);
|
|
||||||
test_device!(
|
|
||||||
quantized_matmul_neg,
|
|
||||||
quantized_matmul_neg_cpu,
|
|
||||||
quantized_matmul_neg_cuda,
|
|
||||||
quantized_matmul_neg_metal
|
|
||||||
);
|
|
||||||
|
|
||||||
fn quantize_q4_0(device: &Device) -> Result<()> {
|
fn quantize_q4_0(device: &Device) -> Result<()> {
|
||||||
let src = (0..32 * 4).map(|v| v as f32).collect::<Vec<_>>();
|
let src = (0..32 * 4).map(|v| v as f32).collect::<Vec<_>>();
|
||||||
|
Reference in New Issue
Block a user