mirror of
https://github.com/huggingface/candle.git
synced 2025-06-17 11:08:52 +00:00
Add the alloc_uninit function. (#1901)
* Add the alloc_uninit function. * Dummy metal fix. * Lazy initialization.
This commit is contained in:
@ -2582,7 +2582,10 @@ impl BackendStorage for CpuStorage {
|
||||
col.matmul(kernel, (b, m, n, k), &col_l, &kernel_l)?
|
||||
} else {
|
||||
// Make the kernel contiguous if not already the case.
|
||||
let mut kernel_c = self.device().zeros_impl(kernel_l.shape(), kernel.dtype())?;
|
||||
let mut kernel_c = unsafe {
|
||||
self.device()
|
||||
.alloc_uninit(kernel_l.shape(), kernel.dtype())?
|
||||
};
|
||||
kernel.copy_strided_src(&mut kernel_c, 0, kernel_l)?;
|
||||
let kernel_l = Layout::contiguous_with_offset((1, n, k), kernel_l.start_offset())
|
||||
.transpose(1, 2)?
|
||||
@ -2590,7 +2593,7 @@ impl BackendStorage for CpuStorage {
|
||||
col.matmul(kernel, (b, m, n, k), &col_l, &kernel_l)?
|
||||
};
|
||||
let res_l = Layout::contiguous((b, l_out, params.c_out)).transpose(1, 2)?;
|
||||
let mut res_t = self.device().zeros_impl(res_l.shape(), res.dtype())?;
|
||||
let mut res_t = unsafe { self.device().alloc_uninit(res_l.shape(), res.dtype())? };
|
||||
res.copy_strided_src(&mut res_t, 0, &res_l)?;
|
||||
Ok(res_t)
|
||||
}
|
||||
@ -2681,7 +2684,10 @@ impl BackendStorage for CpuStorage {
|
||||
col.matmul(kernel, (b, m, n, k), &col_l, &kernel_l)?
|
||||
} else {
|
||||
// Make the kernel contiguous if not already the case.
|
||||
let mut kernel_c = self.device().zeros_impl(kernel_l.shape(), kernel.dtype())?;
|
||||
let mut kernel_c = unsafe {
|
||||
self.device()
|
||||
.alloc_uninit(kernel_l.shape(), kernel.dtype())?
|
||||
};
|
||||
kernel.copy_strided_src(&mut kernel_c, 0, kernel_l)?;
|
||||
let kernel_l = Layout::contiguous_with_offset((1, n, k), kernel_l.start_offset())
|
||||
.transpose(1, 2)?
|
||||
@ -2691,7 +2697,7 @@ impl BackendStorage for CpuStorage {
|
||||
let res_l = Layout::contiguous((b, h_out, w_out, params.c_out))
|
||||
.transpose(1, 2)?
|
||||
.transpose(1, 3)?;
|
||||
let mut res_t = self.device().zeros_impl(res_l.shape(), res.dtype())?;
|
||||
let mut res_t = unsafe { self.device().alloc_uninit(res_l.shape(), res.dtype())? };
|
||||
res.copy_strided_src(&mut res_t, 0, &res_l)?;
|
||||
Ok(res_t)
|
||||
}
|
||||
@ -2919,6 +2925,53 @@ impl BackendDevice for CpuDevice {
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::uninit_vec)]
|
||||
unsafe fn alloc_uninit(&self, shape: &Shape, dtype: DType) -> Result<CpuStorage> {
|
||||
let elem_count = shape.elem_count();
|
||||
// The code below is highly unsafe but hopefully not directly unsound as we only consider
|
||||
// types that are Copy, not Drop, and for which all bit patterns are proper values.
|
||||
// It's still pretty risky, see the following for more details:
|
||||
// https://github.com/rust-lang/rust-clippy/issues/4483
|
||||
let storage = match dtype {
|
||||
DType::U8 => {
|
||||
let mut v = Vec::with_capacity(elem_count);
|
||||
v.set_len(elem_count);
|
||||
CpuStorage::U8(v)
|
||||
}
|
||||
DType::U32 => {
|
||||
let mut v = Vec::with_capacity(elem_count);
|
||||
v.set_len(elem_count);
|
||||
CpuStorage::U32(v)
|
||||
}
|
||||
DType::I64 => {
|
||||
let mut v = Vec::with_capacity(elem_count);
|
||||
v.set_len(elem_count);
|
||||
CpuStorage::I64(v)
|
||||
}
|
||||
DType::BF16 => {
|
||||
let mut v = Vec::with_capacity(elem_count);
|
||||
v.set_len(elem_count);
|
||||
CpuStorage::BF16(v)
|
||||
}
|
||||
DType::F16 => {
|
||||
let mut v = Vec::with_capacity(elem_count);
|
||||
v.set_len(elem_count);
|
||||
CpuStorage::F16(v)
|
||||
}
|
||||
DType::F32 => {
|
||||
let mut v = Vec::with_capacity(elem_count);
|
||||
v.set_len(elem_count);
|
||||
CpuStorage::F32(v)
|
||||
}
|
||||
DType::F64 => {
|
||||
let mut v = Vec::with_capacity(elem_count);
|
||||
v.set_len(elem_count);
|
||||
CpuStorage::F64(v)
|
||||
}
|
||||
};
|
||||
Ok(storage)
|
||||
}
|
||||
|
||||
fn ones_impl(&self, shape: &Shape, dtype: DType) -> Result<CpuStorage> {
|
||||
let elem_count = shape.elem_count();
|
||||
let storage = match dtype {
|
||||
|
Reference in New Issue
Block a user