mirror of
https://github.com/huggingface/candle.git
synced 2025-06-17 19:18:50 +00:00
Merge pull request #24 from LaurentMazare/more-grads
Support gradients for reshape and where_cond.
This commit is contained in:
@ -106,9 +106,8 @@ impl Tensor {
|
|||||||
}
|
}
|
||||||
let grad = grads.remove(node).unwrap();
|
let grad = grads.remove(node).unwrap();
|
||||||
// TODO: We should perform all these operations in place (or at least not track the
|
// TODO: We should perform all these operations in place (or at least not track the
|
||||||
// whole graph).
|
// whole graph). The only drawback would be if we wanted to support grad of grad but
|
||||||
// The only drawback would be if we wanted to support grad of grad but this is out of
|
// this is out of scope.
|
||||||
// scope.
|
|
||||||
if let Some(op) = node.op() {
|
if let Some(op) = node.op() {
|
||||||
match op {
|
match op {
|
||||||
Op::Add(lhs, rhs) => {
|
Op::Add(lhs, rhs) => {
|
||||||
@ -139,8 +138,14 @@ impl Tensor {
|
|||||||
let rhs_sum_grad = grads.or_insert(rhs)?;
|
let rhs_sum_grad = grads.or_insert(rhs)?;
|
||||||
*rhs_sum_grad = rhs_sum_grad.add(&rhs_grad)?;
|
*rhs_sum_grad = rhs_sum_grad.add(&rhs_grad)?;
|
||||||
}
|
}
|
||||||
Op::WhereCond(_pred, _t, _f) => {
|
Op::WhereCond(pred, t, f) => {
|
||||||
return Err(Error::BackwardNotSupported { op: "where_cond" })
|
let zeros = grad.zeros_like()?;
|
||||||
|
let t_sum_grad = grads.or_insert(t)?;
|
||||||
|
let t_grad = pred.where_cond(&grad, &zeros)?;
|
||||||
|
*t_sum_grad = t_sum_grad.add(&t_grad)?;
|
||||||
|
let f_sum_grad = grads.or_insert(f)?;
|
||||||
|
let f_grad = pred.where_cond(&zeros, &grad)?;
|
||||||
|
*f_sum_grad = f_sum_grad.add(&f_grad)?;
|
||||||
}
|
}
|
||||||
Op::Embedding(_lhs, _rhs) => {
|
Op::Embedding(_lhs, _rhs) => {
|
||||||
return Err(Error::BackwardNotSupported { op: "embedding" })
|
return Err(Error::BackwardNotSupported { op: "embedding" })
|
||||||
@ -209,7 +214,11 @@ impl Tensor {
|
|||||||
Op::Softmax(_arg, _) => {
|
Op::Softmax(_arg, _) => {
|
||||||
return Err(Error::BackwardNotSupported { op: "softmax" })
|
return Err(Error::BackwardNotSupported { op: "softmax" })
|
||||||
}
|
}
|
||||||
Op::Reshape(_arg) => return Err(Error::BackwardNotSupported { op: "reshape" }),
|
Op::Reshape(arg) => {
|
||||||
|
let arg_grad = grad.reshape(arg.dims())?;
|
||||||
|
let sum_grad = grads.or_insert(arg)?;
|
||||||
|
*sum_grad = sum_grad.add(&arg_grad)?
|
||||||
|
}
|
||||||
Op::Gelu(_) => return Err(Error::BackwardNotSupported { op: "gelu" }),
|
Op::Gelu(_) => return Err(Error::BackwardNotSupported { op: "gelu" }),
|
||||||
Op::Relu(_) => return Err(Error::BackwardNotSupported { op: "relu" }),
|
Op::Relu(_) => return Err(Error::BackwardNotSupported { op: "relu" }),
|
||||||
Op::Sqr(arg) => {
|
Op::Sqr(arg) => {
|
||||||
|
@ -121,6 +121,7 @@ fn from_storage<S: Into<Shape>>(
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl Tensor {
|
impl Tensor {
|
||||||
|
// TODO: Maybe this should be a broadcast rather than actually creating the full tensor.
|
||||||
fn ones_impl<S: Into<Shape>>(
|
fn ones_impl<S: Into<Shape>>(
|
||||||
shape: S,
|
shape: S,
|
||||||
dtype: DType,
|
dtype: DType,
|
||||||
@ -144,6 +145,7 @@ impl Tensor {
|
|||||||
Tensor::ones(self.shape(), self.dtype(), &self.device())
|
Tensor::ones(self.shape(), self.dtype(), &self.device())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: Maybe this should be a broadcast rather than actually creating the full tensor.
|
||||||
fn zeros_impl<S: Into<Shape>>(
|
fn zeros_impl<S: Into<Shape>>(
|
||||||
shape: S,
|
shape: S,
|
||||||
dtype: DType,
|
dtype: DType,
|
||||||
|
Reference in New Issue
Block a user