mirror of
https://github.com/huggingface/candle.git
synced 2025-06-17 02:58:50 +00:00
Avoid duplicating the storage by refcounting it.
This commit is contained in:
@ -17,7 +17,7 @@ impl TensorId {
|
|||||||
|
|
||||||
pub struct Tensor_ {
|
pub struct Tensor_ {
|
||||||
id: TensorId,
|
id: TensorId,
|
||||||
storage: Storage,
|
storage: Arc<Storage>,
|
||||||
shape: Shape,
|
shape: Shape,
|
||||||
// The strides are given in number of elements and not in bytes.
|
// The strides are given in number of elements and not in bytes.
|
||||||
stride: Vec<usize>,
|
stride: Vec<usize>,
|
||||||
@ -25,6 +25,9 @@ pub struct Tensor_ {
|
|||||||
is_variable: bool,
|
is_variable: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Tensors are refcounted so that cloning is cheap when building the op graph.
|
||||||
|
// Storages are also refcounted independently so that its possible to avoid
|
||||||
|
// copying the storage for operations that only modify the shape or stride.
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct Tensor(Arc<Tensor_>);
|
pub struct Tensor(Arc<Tensor_>);
|
||||||
|
|
||||||
@ -104,7 +107,7 @@ fn from_storage(storage: Storage, shape: Shape, op: Option<Op>, is_variable: boo
|
|||||||
let stride = shape.stride_contiguous();
|
let stride = shape.stride_contiguous();
|
||||||
let tensor_ = Tensor_ {
|
let tensor_ = Tensor_ {
|
||||||
id: TensorId::new(),
|
id: TensorId::new(),
|
||||||
storage,
|
storage: Arc::new(storage),
|
||||||
shape,
|
shape,
|
||||||
stride,
|
stride,
|
||||||
op,
|
op,
|
||||||
@ -274,7 +277,7 @@ impl Tensor {
|
|||||||
let data = S::cpu_storage_as_slice(cpu_storage)?;
|
let data = S::cpu_storage_as_slice(cpu_storage)?;
|
||||||
Ok::<_, Error>(data[0])
|
Ok::<_, Error>(data[0])
|
||||||
};
|
};
|
||||||
match &self.storage {
|
match self.storage.as_ref() {
|
||||||
Storage::Cpu(cpu_storage) => from_cpu_storage(cpu_storage),
|
Storage::Cpu(cpu_storage) => from_cpu_storage(cpu_storage),
|
||||||
Storage::Cuda(storage) => from_cpu_storage(&storage.to_cpu_storage()?),
|
Storage::Cuda(storage) => from_cpu_storage(&storage.to_cpu_storage()?),
|
||||||
}
|
}
|
||||||
@ -394,7 +397,7 @@ impl Tensor {
|
|||||||
/// into account so the size of the resulting buffer might be larger than the
|
/// into account so the size of the resulting buffer might be larger than the
|
||||||
/// tensor number of elements.
|
/// tensor number of elements.
|
||||||
pub fn storage_data<S: crate::WithDType>(&self) -> Result<std::borrow::Cow<[S]>> {
|
pub fn storage_data<S: crate::WithDType>(&self) -> Result<std::borrow::Cow<[S]>> {
|
||||||
match &self.storage {
|
match self.storage.as_ref() {
|
||||||
Storage::Cpu(cpu_storage) => {
|
Storage::Cpu(cpu_storage) => {
|
||||||
let slice = S::cpu_storage_as_slice(cpu_storage)?;
|
let slice = S::cpu_storage_as_slice(cpu_storage)?;
|
||||||
Ok(std::borrow::Cow::Borrowed(slice))
|
Ok(std::borrow::Cow::Borrowed(slice))
|
||||||
@ -415,7 +418,7 @@ impl Tensor {
|
|||||||
shape: self.shape().clone(),
|
shape: self.shape().clone(),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
match &self.storage {
|
match self.storage.as_ref() {
|
||||||
Storage::Cpu(cpu_storage) => {
|
Storage::Cpu(cpu_storage) => {
|
||||||
let data = S::cpu_storage_as_slice(cpu_storage)?;
|
let data = S::cpu_storage_as_slice(cpu_storage)?;
|
||||||
Ok(self.strided_index().map(|i| data[i]).collect())
|
Ok(self.strided_index().map(|i| data[i]).collect())
|
||||||
@ -442,7 +445,7 @@ impl Tensor {
|
|||||||
assert!(src_index.next().is_none());
|
assert!(src_index.next().is_none());
|
||||||
Ok(rows)
|
Ok(rows)
|
||||||
};
|
};
|
||||||
match &self.storage {
|
match self.storage.as_ref() {
|
||||||
Storage::Cpu(storage) => from_cpu_storage(storage),
|
Storage::Cpu(storage) => from_cpu_storage(storage),
|
||||||
Storage::Cuda(storage) => from_cpu_storage(&storage.to_cpu_storage()?),
|
Storage::Cuda(storage) => from_cpu_storage(&storage.to_cpu_storage()?),
|
||||||
}
|
}
|
||||||
@ -465,7 +468,7 @@ impl Tensor {
|
|||||||
assert!(src_index.next().is_none());
|
assert!(src_index.next().is_none());
|
||||||
Ok(top_rows)
|
Ok(top_rows)
|
||||||
};
|
};
|
||||||
match &self.storage {
|
match self.storage.as_ref() {
|
||||||
Storage::Cpu(storage) => from_cpu_storage(storage),
|
Storage::Cpu(storage) => from_cpu_storage(storage),
|
||||||
Storage::Cuda(storage) => from_cpu_storage(&storage.to_cpu_storage()?),
|
Storage::Cuda(storage) => from_cpu_storage(&storage.to_cpu_storage()?),
|
||||||
}
|
}
|
||||||
@ -539,7 +542,7 @@ impl Tensor {
|
|||||||
};
|
};
|
||||||
let tensor_ = Tensor_ {
|
let tensor_ = Tensor_ {
|
||||||
id: TensorId::new(),
|
id: TensorId::new(),
|
||||||
storage: self.storage.try_clone()?,
|
storage: self.storage.clone(),
|
||||||
shape: Shape::from(dims),
|
shape: Shape::from(dims),
|
||||||
stride,
|
stride,
|
||||||
op,
|
op,
|
||||||
@ -557,7 +560,7 @@ impl Tensor {
|
|||||||
pub fn copy(&self) -> Result<Tensor> {
|
pub fn copy(&self) -> Result<Tensor> {
|
||||||
let tensor_ = Tensor_ {
|
let tensor_ = Tensor_ {
|
||||||
id: TensorId::new(),
|
id: TensorId::new(),
|
||||||
storage: self.storage.try_clone()?,
|
storage: Arc::new(self.storage.try_clone()?),
|
||||||
shape: self.shape.clone(),
|
shape: self.shape.clone(),
|
||||||
stride: self.stride.clone(),
|
stride: self.stride.clone(),
|
||||||
op: self.op.clone(),
|
op: self.op.clone(),
|
||||||
@ -566,14 +569,12 @@ impl Tensor {
|
|||||||
Ok(Tensor(Arc::new(tensor_)))
|
Ok(Tensor(Arc::new(tensor_)))
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Currently this duplicates the storage, the PyTorch version would share the storage,
|
|
||||||
// maybe we should do the same?
|
|
||||||
/// Returns a new tensor detached from the current graph, gradient are not propagated through
|
/// Returns a new tensor detached from the current graph, gradient are not propagated through
|
||||||
/// this new node.
|
/// this new node.
|
||||||
pub fn detach(&self) -> Result<Tensor> {
|
pub fn detach(&self) -> Result<Tensor> {
|
||||||
let tensor_ = Tensor_ {
|
let tensor_ = Tensor_ {
|
||||||
id: TensorId::new(),
|
id: TensorId::new(),
|
||||||
storage: self.storage.try_clone()?,
|
storage: self.storage.clone(),
|
||||||
shape: self.shape.clone(),
|
shape: self.shape.clone(),
|
||||||
stride: self.stride.clone(),
|
stride: self.stride.clone(),
|
||||||
op: None,
|
op: None,
|
||||||
@ -587,7 +588,7 @@ impl Tensor {
|
|||||||
if self.device().same_id(device) {
|
if self.device().same_id(device) {
|
||||||
Ok(self.clone())
|
Ok(self.clone())
|
||||||
} else {
|
} else {
|
||||||
let storage = match (&self.storage, device) {
|
let storage = match (self.storage.as_ref(), device) {
|
||||||
(Storage::Cpu(storage), Device::Cuda(cuda)) => {
|
(Storage::Cpu(storage), Device::Cuda(cuda)) => {
|
||||||
Storage::Cuda(cuda.cuda_from_cpu_storage(storage)?)
|
Storage::Cuda(cuda.cuda_from_cpu_storage(storage)?)
|
||||||
}
|
}
|
||||||
@ -607,7 +608,7 @@ impl Tensor {
|
|||||||
};
|
};
|
||||||
let tensor_ = Tensor_ {
|
let tensor_ = Tensor_ {
|
||||||
id: TensorId::new(),
|
id: TensorId::new(),
|
||||||
storage,
|
storage: Arc::new(storage),
|
||||||
shape: self.shape.clone(),
|
shape: self.shape.clone(),
|
||||||
stride: self.stride.clone(),
|
stride: self.stride.clone(),
|
||||||
op,
|
op,
|
||||||
|
Reference in New Issue
Block a user