mirror of
https://github.com/huggingface/candle.git
synced 2025-06-17 02:58:50 +00:00
Improve the gemm config.
This commit is contained in:
@ -150,6 +150,36 @@ pub struct CudaStorage {
|
|||||||
device: CudaDevice,
|
device: CudaDevice,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn gemm_config<T>(
|
||||||
|
alpha: T,
|
||||||
|
beta: T,
|
||||||
|
(b, m, n, k): (usize, usize, usize, usize),
|
||||||
|
lhs_stride: &[usize],
|
||||||
|
rhs_stride: &[usize],
|
||||||
|
) -> StridedBatchedConfig<T> {
|
||||||
|
// https://docs.nvidia.com/cuda/cublas/index.html#cublas-t-gemm
|
||||||
|
use cudarc::cublas::sys::cublasOperation_t;
|
||||||
|
let gemm = GemmConfig {
|
||||||
|
alpha,
|
||||||
|
beta,
|
||||||
|
m: m as i32,
|
||||||
|
n: n as i32,
|
||||||
|
k: k as i32,
|
||||||
|
lda: lhs_stride[lhs_stride.len() - 2] as i32,
|
||||||
|
ldb: rhs_stride[rhs_stride.len() - 2] as i32,
|
||||||
|
ldc: m as i32,
|
||||||
|
transa: cublasOperation_t::CUBLAS_OP_N,
|
||||||
|
transb: cublasOperation_t::CUBLAS_OP_N,
|
||||||
|
};
|
||||||
|
StridedBatchedConfig {
|
||||||
|
batch_size: b as i32,
|
||||||
|
gemm,
|
||||||
|
stride_a: lhs_stride[0] as i64,
|
||||||
|
stride_b: rhs_stride[0] as i64,
|
||||||
|
stride_c: (m * n * k) as i64,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl CudaStorage {
|
impl CudaStorage {
|
||||||
pub fn try_clone(&self) -> Result<Self> {
|
pub fn try_clone(&self) -> Result<Self> {
|
||||||
let slice = match &self.slice {
|
let slice = match &self.slice {
|
||||||
@ -301,30 +331,11 @@ impl CudaStorage {
|
|||||||
lhs_stride: &[usize],
|
lhs_stride: &[usize],
|
||||||
rhs_stride: &[usize],
|
rhs_stride: &[usize],
|
||||||
) -> Result<Self> {
|
) -> Result<Self> {
|
||||||
use cudarc::cublas::sys::cublasOperation_t;
|
|
||||||
let elem_count = b * m * n * k;
|
let elem_count = b * m * n * k;
|
||||||
let dev = &self.device;
|
let dev = &self.device;
|
||||||
let slice = match (&self.slice, &rhs.slice) {
|
let slice = match (&self.slice, &rhs.slice) {
|
||||||
(CudaStorageSlice::F32(lhs), CudaStorageSlice::F32(rhs)) => {
|
(CudaStorageSlice::F32(lhs), CudaStorageSlice::F32(rhs)) => {
|
||||||
let gemm = GemmConfig {
|
let cfg = gemm_config(1., 0., (b, m, n, k), lhs_stride, rhs_stride);
|
||||||
alpha: 1.,
|
|
||||||
beta: 1.,
|
|
||||||
m: m as i32,
|
|
||||||
n: n as i32,
|
|
||||||
k: k as i32,
|
|
||||||
lda: n as i32, // TODO
|
|
||||||
ldb: k as i32, // TODO
|
|
||||||
ldc: n as i32, // TODO
|
|
||||||
transa: cublasOperation_t::CUBLAS_OP_N,
|
|
||||||
transb: cublasOperation_t::CUBLAS_OP_T,
|
|
||||||
};
|
|
||||||
let cfg = StridedBatchedConfig {
|
|
||||||
batch_size: b as i32,
|
|
||||||
gemm,
|
|
||||||
stride_a: lhs_stride[0] as i64,
|
|
||||||
stride_b: rhs_stride[0] as i64,
|
|
||||||
stride_c: 42, // TODO,
|
|
||||||
};
|
|
||||||
let mut out = unsafe { dev.alloc::<f32>(elem_count) }?;
|
let mut out = unsafe { dev.alloc::<f32>(elem_count) }?;
|
||||||
unsafe {
|
unsafe {
|
||||||
self.device
|
self.device
|
||||||
@ -334,25 +345,7 @@ impl CudaStorage {
|
|||||||
CudaStorageSlice::F32(out)
|
CudaStorageSlice::F32(out)
|
||||||
}
|
}
|
||||||
(CudaStorageSlice::F64(lhs), CudaStorageSlice::F64(rhs)) => {
|
(CudaStorageSlice::F64(lhs), CudaStorageSlice::F64(rhs)) => {
|
||||||
let gemm = GemmConfig {
|
let cfg = gemm_config(1., 0., (b, m, n, k), lhs_stride, rhs_stride);
|
||||||
alpha: 1.,
|
|
||||||
beta: 1.,
|
|
||||||
m: m as i32,
|
|
||||||
n: n as i32,
|
|
||||||
k: k as i32,
|
|
||||||
lda: n as i32, // TODO
|
|
||||||
ldb: k as i32, // TODO
|
|
||||||
ldc: n as i32, // TODO
|
|
||||||
transa: cublasOperation_t::CUBLAS_OP_N,
|
|
||||||
transb: cublasOperation_t::CUBLAS_OP_T,
|
|
||||||
};
|
|
||||||
let cfg = StridedBatchedConfig {
|
|
||||||
batch_size: b as i32,
|
|
||||||
gemm,
|
|
||||||
stride_a: lhs_stride[0] as i64,
|
|
||||||
stride_b: rhs_stride[0] as i64,
|
|
||||||
stride_c: 42, // TODO,
|
|
||||||
};
|
|
||||||
let mut out = unsafe { dev.alloc::<f64>(elem_count) }?;
|
let mut out = unsafe { dev.alloc::<f64>(elem_count) }?;
|
||||||
unsafe {
|
unsafe {
|
||||||
self.device
|
self.device
|
||||||
|
Reference in New Issue
Block a user