mirror of
https://github.com/huggingface/candle.git
synced 2025-06-17 19:18:50 +00:00
Async tensor copying. (#1900)
This commit is contained in:
@ -129,6 +129,8 @@ pub trait BackendDevice: Sized + std::fmt::Debug + Clone {
|
|||||||
|
|
||||||
fn storage_from_cpu_storage(&self, _: &CpuStorage) -> Result<Self::Storage>;
|
fn storage_from_cpu_storage(&self, _: &CpuStorage) -> Result<Self::Storage>;
|
||||||
|
|
||||||
|
fn storage_from_cpu_storage_owned(&self, _: CpuStorage) -> Result<Self::Storage>;
|
||||||
|
|
||||||
fn rand_uniform(&self, _: &Shape, _: DType, _: f64, _: f64) -> Result<Self::Storage>;
|
fn rand_uniform(&self, _: &Shape, _: DType, _: f64, _: f64) -> Result<Self::Storage>;
|
||||||
|
|
||||||
fn rand_normal(&self, _: &Shape, _: DType, _: f64, _: f64) -> Result<Self::Storage>;
|
fn rand_normal(&self, _: &Shape, _: DType, _: f64, _: f64) -> Result<Self::Storage>;
|
||||||
|
@ -2814,6 +2814,10 @@ impl BackendDevice for CpuDevice {
|
|||||||
Ok(s.clone())
|
Ok(s.clone())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn storage_from_cpu_storage_owned(&self, s: CpuStorage) -> Result<Self::Storage> {
|
||||||
|
Ok(s)
|
||||||
|
}
|
||||||
|
|
||||||
fn new(_: usize) -> Result<Self> {
|
fn new(_: usize) -> Result<Self> {
|
||||||
Ok(Self)
|
Ok(Self)
|
||||||
}
|
}
|
||||||
|
@ -420,6 +420,43 @@ impl BackendDevice for CudaDevice {
|
|||||||
device: self.clone(),
|
device: self.clone(),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn storage_from_cpu_storage_owned(&self, storage: CpuStorage) -> Result<CudaStorage> {
|
||||||
|
let slice = match storage {
|
||||||
|
CpuStorage::U8(storage) => {
|
||||||
|
let data = self.htod_copy(storage).w()?;
|
||||||
|
CudaStorageSlice::U8(data)
|
||||||
|
}
|
||||||
|
CpuStorage::U32(storage) => {
|
||||||
|
let data = self.htod_copy(storage).w()?;
|
||||||
|
CudaStorageSlice::U32(data)
|
||||||
|
}
|
||||||
|
CpuStorage::I64(storage) => {
|
||||||
|
let data = self.htod_copy(storage).w()?;
|
||||||
|
CudaStorageSlice::I64(data)
|
||||||
|
}
|
||||||
|
CpuStorage::BF16(storage) => {
|
||||||
|
let data = self.htod_copy(storage).w()?;
|
||||||
|
CudaStorageSlice::BF16(data)
|
||||||
|
}
|
||||||
|
CpuStorage::F16(storage) => {
|
||||||
|
let data = self.htod_copy(storage).w()?;
|
||||||
|
CudaStorageSlice::F16(data)
|
||||||
|
}
|
||||||
|
CpuStorage::F32(storage) => {
|
||||||
|
let data = self.htod_copy(storage).w()?;
|
||||||
|
CudaStorageSlice::F32(data)
|
||||||
|
}
|
||||||
|
CpuStorage::F64(storage) => {
|
||||||
|
let data = self.htod_copy(storage).w()?;
|
||||||
|
CudaStorageSlice::F64(data)
|
||||||
|
}
|
||||||
|
};
|
||||||
|
Ok(CudaStorage {
|
||||||
|
slice,
|
||||||
|
device: self.clone(),
|
||||||
|
})
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
|
@ -294,12 +294,12 @@ impl Device {
|
|||||||
Device::Cpu => Ok(Storage::Cpu(array.to_cpu_storage())),
|
Device::Cpu => Ok(Storage::Cpu(array.to_cpu_storage())),
|
||||||
Device::Cuda(device) => {
|
Device::Cuda(device) => {
|
||||||
let storage = array.to_cpu_storage();
|
let storage = array.to_cpu_storage();
|
||||||
let storage = device.storage_from_cpu_storage(&storage)?;
|
let storage = device.storage_from_cpu_storage_owned(storage)?;
|
||||||
Ok(Storage::Cuda(storage))
|
Ok(Storage::Cuda(storage))
|
||||||
}
|
}
|
||||||
Device::Metal(device) => {
|
Device::Metal(device) => {
|
||||||
let storage = array.to_cpu_storage();
|
let storage = array.to_cpu_storage();
|
||||||
let storage = device.storage_from_cpu_storage(&storage)?;
|
let storage = device.storage_from_cpu_storage_owned(storage)?;
|
||||||
Ok(Storage::Metal(storage))
|
Ok(Storage::Metal(storage))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -310,12 +310,12 @@ impl Device {
|
|||||||
Device::Cpu => Ok(Storage::Cpu(S::to_cpu_storage_owned(data))),
|
Device::Cpu => Ok(Storage::Cpu(S::to_cpu_storage_owned(data))),
|
||||||
Device::Cuda(device) => {
|
Device::Cuda(device) => {
|
||||||
let storage = S::to_cpu_storage_owned(data);
|
let storage = S::to_cpu_storage_owned(data);
|
||||||
let storage = device.storage_from_cpu_storage(&storage)?;
|
let storage = device.storage_from_cpu_storage_owned(storage)?;
|
||||||
Ok(Storage::Cuda(storage))
|
Ok(Storage::Cuda(storage))
|
||||||
}
|
}
|
||||||
Device::Metal(device) => {
|
Device::Metal(device) => {
|
||||||
let storage = S::to_cpu_storage_owned(data);
|
let storage = S::to_cpu_storage_owned(data);
|
||||||
let storage = device.storage_from_cpu_storage(&storage)?;
|
let storage = device.storage_from_cpu_storage_owned(storage)?;
|
||||||
Ok(Storage::Metal(storage))
|
Ok(Storage::Metal(storage))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -214,6 +214,10 @@ impl crate::backend::BackendDevice for CudaDevice {
|
|||||||
Err(Error::NotCompiledWithCudaSupport)
|
Err(Error::NotCompiledWithCudaSupport)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn storage_from_cpu_storage_owned(&self, _: CpuStorage) -> Result<Self::Storage> {
|
||||||
|
Err(Error::NotCompiledWithCudaSupport)
|
||||||
|
}
|
||||||
|
|
||||||
fn rand_uniform(&self, _: &Shape, _: DType, _: f64, _: f64) -> Result<Self::Storage> {
|
fn rand_uniform(&self, _: &Shape, _: DType, _: f64, _: f64) -> Result<Self::Storage> {
|
||||||
Err(Error::NotCompiledWithCudaSupport)
|
Err(Error::NotCompiledWithCudaSupport)
|
||||||
}
|
}
|
||||||
|
@ -226,6 +226,10 @@ impl crate::backend::BackendDevice for MetalDevice {
|
|||||||
Err(Error::NotCompiledWithMetalSupport)
|
Err(Error::NotCompiledWithMetalSupport)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn storage_from_cpu_storage_owned(&self, _: CpuStorage) -> Result<Self::Storage> {
|
||||||
|
Err(Error::NotCompiledWithMetalSupport)
|
||||||
|
}
|
||||||
|
|
||||||
fn rand_uniform(&self, _: &Shape, _: DType, _: f64, _: f64) -> Result<Self::Storage> {
|
fn rand_uniform(&self, _: &Shape, _: DType, _: f64, _: f64) -> Result<Self::Storage> {
|
||||||
Err(Error::NotCompiledWithMetalSupport)
|
Err(Error::NotCompiledWithMetalSupport)
|
||||||
}
|
}
|
||||||
|
@ -1867,6 +1867,10 @@ impl BackendDevice for MetalDevice {
|
|||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn storage_from_cpu_storage_owned(&self, storage: CpuStorage) -> Result<Self::Storage> {
|
||||||
|
self.storage_from_cpu_storage(&storage)
|
||||||
|
}
|
||||||
|
|
||||||
fn rand_uniform(
|
fn rand_uniform(
|
||||||
&self,
|
&self,
|
||||||
shape: &Shape,
|
shape: &Shape,
|
||||||
|
Reference in New Issue
Block a user