Avoid duplicating the storage by refcounting it.

This commit is contained in:
laurent
2023-06-24 07:03:21 +01:00
parent b4653e41be
commit 47f9c48e7c

View File

@ -17,7 +17,7 @@ impl TensorId {
pub struct Tensor_ { pub struct Tensor_ {
id: TensorId, id: TensorId,
storage: Storage, storage: Arc<Storage>,
shape: Shape, shape: Shape,
// The strides are given in number of elements and not in bytes. // The strides are given in number of elements and not in bytes.
stride: Vec<usize>, stride: Vec<usize>,
@ -25,6 +25,9 @@ pub struct Tensor_ {
is_variable: bool, is_variable: bool,
} }
// Tensors are refcounted so that cloning is cheap when building the op graph.
// Storages are also refcounted independently so that its possible to avoid
// copying the storage for operations that only modify the shape or stride.
#[derive(Clone)] #[derive(Clone)]
pub struct Tensor(Arc<Tensor_>); pub struct Tensor(Arc<Tensor_>);
@ -104,7 +107,7 @@ fn from_storage(storage: Storage, shape: Shape, op: Option<Op>, is_variable: boo
let stride = shape.stride_contiguous(); let stride = shape.stride_contiguous();
let tensor_ = Tensor_ { let tensor_ = Tensor_ {
id: TensorId::new(), id: TensorId::new(),
storage, storage: Arc::new(storage),
shape, shape,
stride, stride,
op, op,
@ -274,7 +277,7 @@ impl Tensor {
let data = S::cpu_storage_as_slice(cpu_storage)?; let data = S::cpu_storage_as_slice(cpu_storage)?;
Ok::<_, Error>(data[0]) Ok::<_, Error>(data[0])
}; };
match &self.storage { match self.storage.as_ref() {
Storage::Cpu(cpu_storage) => from_cpu_storage(cpu_storage), Storage::Cpu(cpu_storage) => from_cpu_storage(cpu_storage),
Storage::Cuda(storage) => from_cpu_storage(&storage.to_cpu_storage()?), Storage::Cuda(storage) => from_cpu_storage(&storage.to_cpu_storage()?),
} }
@ -394,7 +397,7 @@ impl Tensor {
/// into account so the size of the resulting buffer might be larger than the /// into account so the size of the resulting buffer might be larger than the
/// tensor number of elements. /// tensor number of elements.
pub fn storage_data<S: crate::WithDType>(&self) -> Result<std::borrow::Cow<[S]>> { pub fn storage_data<S: crate::WithDType>(&self) -> Result<std::borrow::Cow<[S]>> {
match &self.storage { match self.storage.as_ref() {
Storage::Cpu(cpu_storage) => { Storage::Cpu(cpu_storage) => {
let slice = S::cpu_storage_as_slice(cpu_storage)?; let slice = S::cpu_storage_as_slice(cpu_storage)?;
Ok(std::borrow::Cow::Borrowed(slice)) Ok(std::borrow::Cow::Borrowed(slice))
@ -415,7 +418,7 @@ impl Tensor {
shape: self.shape().clone(), shape: self.shape().clone(),
}); });
} }
match &self.storage { match self.storage.as_ref() {
Storage::Cpu(cpu_storage) => { Storage::Cpu(cpu_storage) => {
let data = S::cpu_storage_as_slice(cpu_storage)?; let data = S::cpu_storage_as_slice(cpu_storage)?;
Ok(self.strided_index().map(|i| data[i]).collect()) Ok(self.strided_index().map(|i| data[i]).collect())
@ -442,7 +445,7 @@ impl Tensor {
assert!(src_index.next().is_none()); assert!(src_index.next().is_none());
Ok(rows) Ok(rows)
}; };
match &self.storage { match self.storage.as_ref() {
Storage::Cpu(storage) => from_cpu_storage(storage), Storage::Cpu(storage) => from_cpu_storage(storage),
Storage::Cuda(storage) => from_cpu_storage(&storage.to_cpu_storage()?), Storage::Cuda(storage) => from_cpu_storage(&storage.to_cpu_storage()?),
} }
@ -465,7 +468,7 @@ impl Tensor {
assert!(src_index.next().is_none()); assert!(src_index.next().is_none());
Ok(top_rows) Ok(top_rows)
}; };
match &self.storage { match self.storage.as_ref() {
Storage::Cpu(storage) => from_cpu_storage(storage), Storage::Cpu(storage) => from_cpu_storage(storage),
Storage::Cuda(storage) => from_cpu_storage(&storage.to_cpu_storage()?), Storage::Cuda(storage) => from_cpu_storage(&storage.to_cpu_storage()?),
} }
@ -539,7 +542,7 @@ impl Tensor {
}; };
let tensor_ = Tensor_ { let tensor_ = Tensor_ {
id: TensorId::new(), id: TensorId::new(),
storage: self.storage.try_clone()?, storage: self.storage.clone(),
shape: Shape::from(dims), shape: Shape::from(dims),
stride, stride,
op, op,
@ -557,7 +560,7 @@ impl Tensor {
pub fn copy(&self) -> Result<Tensor> { pub fn copy(&self) -> Result<Tensor> {
let tensor_ = Tensor_ { let tensor_ = Tensor_ {
id: TensorId::new(), id: TensorId::new(),
storage: self.storage.try_clone()?, storage: Arc::new(self.storage.try_clone()?),
shape: self.shape.clone(), shape: self.shape.clone(),
stride: self.stride.clone(), stride: self.stride.clone(),
op: self.op.clone(), op: self.op.clone(),
@ -566,14 +569,12 @@ impl Tensor {
Ok(Tensor(Arc::new(tensor_))) Ok(Tensor(Arc::new(tensor_)))
} }
// TODO: Currently this duplicates the storage, the PyTorch version would share the storage,
// maybe we should do the same?
/// Returns a new tensor detached from the current graph, gradient are not propagated through /// Returns a new tensor detached from the current graph, gradient are not propagated through
/// this new node. /// this new node.
pub fn detach(&self) -> Result<Tensor> { pub fn detach(&self) -> Result<Tensor> {
let tensor_ = Tensor_ { let tensor_ = Tensor_ {
id: TensorId::new(), id: TensorId::new(),
storage: self.storage.try_clone()?, storage: self.storage.clone(),
shape: self.shape.clone(), shape: self.shape.clone(),
stride: self.stride.clone(), stride: self.stride.clone(),
op: None, op: None,
@ -587,7 +588,7 @@ impl Tensor {
if self.device().same_id(device) { if self.device().same_id(device) {
Ok(self.clone()) Ok(self.clone())
} else { } else {
let storage = match (&self.storage, device) { let storage = match (self.storage.as_ref(), device) {
(Storage::Cpu(storage), Device::Cuda(cuda)) => { (Storage::Cpu(storage), Device::Cuda(cuda)) => {
Storage::Cuda(cuda.cuda_from_cpu_storage(storage)?) Storage::Cuda(cuda.cuda_from_cpu_storage(storage)?)
} }
@ -607,7 +608,7 @@ impl Tensor {
}; };
let tensor_ = Tensor_ { let tensor_ = Tensor_ {
id: TensorId::new(), id: TensorId::new(),
storage, storage: Arc::new(storage),
shape: self.shape.clone(), shape: self.shape.clone(),
stride: self.stride.clone(), stride: self.stride.clone(),
op, op,