This commit is contained in:
laurent
2023-06-22 20:25:14 +01:00
parent aebffcfc13
commit 6463d661d8
2 changed files with 10 additions and 2 deletions

View File

@ -14,5 +14,13 @@ fn main() -> Result<()> {
println!("{:?}", y.to_vec2::<f32>()?);
let z = x.matmul(&y)?;
println!("{:?}", z.to_vec2::<f32>()?);
let x = Tensor::new(
&[[11f32, 22.], [33., 44.], [55., 66.], [77., 78.]],
&Device::Cpu,
)?;
let y = Tensor::new(&[[1f32, 2., 3.], [4., 5., 6.]], &Device::Cpu)?;
println!("{:?}", y.to_vec2::<f32>()?);
let z = x.matmul(&y)?;
println!("{:?}", z.to_vec2::<f32>()?);
Ok(())
}

View File

@ -177,7 +177,7 @@ fn gemm_config<T>(
gemm,
stride_a: (m * k) as i64,
stride_b: (n * k) as i64,
stride_c: (m * n * k) as i64,
stride_c: (m * n) as i64,
}
}
@ -332,7 +332,7 @@ impl CudaStorage {
lhs_stride: &[usize],
rhs_stride: &[usize],
) -> Result<Self> {
let elem_count = b * m * n * k;
let elem_count = b * m * n;
let dev = &self.device;
let slice = match (&self.slice, &rhs.slice) {
(CudaStorageSlice::F32(lhs), CudaStorageSlice::F32(rhs)) => {