Merge pull request #24 from LaurentMazare/more-grads

Support gradients for reshape and where_cond.
This commit is contained in:
Laurent Mazare
2023-06-28 10:04:51 +01:00
committed by GitHub
2 changed files with 17 additions and 6 deletions

View File

@ -106,9 +106,8 @@ impl Tensor {
} }
let grad = grads.remove(node).unwrap(); let grad = grads.remove(node).unwrap();
// TODO: We should perform all these operations in place (or at least not track the // TODO: We should perform all these operations in place (or at least not track the
// whole graph). // whole graph). The only drawback would be if we wanted to support grad of grad but
// The only drawback would be if we wanted to support grad of grad but this is out of // this is out of scope.
// scope.
if let Some(op) = node.op() { if let Some(op) = node.op() {
match op { match op {
Op::Add(lhs, rhs) => { Op::Add(lhs, rhs) => {
@ -139,8 +138,14 @@ impl Tensor {
let rhs_sum_grad = grads.or_insert(rhs)?; let rhs_sum_grad = grads.or_insert(rhs)?;
*rhs_sum_grad = rhs_sum_grad.add(&rhs_grad)?; *rhs_sum_grad = rhs_sum_grad.add(&rhs_grad)?;
} }
Op::WhereCond(_pred, _t, _f) => { Op::WhereCond(pred, t, f) => {
return Err(Error::BackwardNotSupported { op: "where_cond" }) let zeros = grad.zeros_like()?;
let t_sum_grad = grads.or_insert(t)?;
let t_grad = pred.where_cond(&grad, &zeros)?;
*t_sum_grad = t_sum_grad.add(&t_grad)?;
let f_sum_grad = grads.or_insert(f)?;
let f_grad = pred.where_cond(&zeros, &grad)?;
*f_sum_grad = f_sum_grad.add(&f_grad)?;
} }
Op::Embedding(_lhs, _rhs) => { Op::Embedding(_lhs, _rhs) => {
return Err(Error::BackwardNotSupported { op: "embedding" }) return Err(Error::BackwardNotSupported { op: "embedding" })
@ -209,7 +214,11 @@ impl Tensor {
Op::Softmax(_arg, _) => { Op::Softmax(_arg, _) => {
return Err(Error::BackwardNotSupported { op: "softmax" }) return Err(Error::BackwardNotSupported { op: "softmax" })
} }
Op::Reshape(_arg) => return Err(Error::BackwardNotSupported { op: "reshape" }), Op::Reshape(arg) => {
let arg_grad = grad.reshape(arg.dims())?;
let sum_grad = grads.or_insert(arg)?;
*sum_grad = sum_grad.add(&arg_grad)?
}
Op::Gelu(_) => return Err(Error::BackwardNotSupported { op: "gelu" }), Op::Gelu(_) => return Err(Error::BackwardNotSupported { op: "gelu" }),
Op::Relu(_) => return Err(Error::BackwardNotSupported { op: "relu" }), Op::Relu(_) => return Err(Error::BackwardNotSupported { op: "relu" }),
Op::Sqr(arg) => { Op::Sqr(arg) => {

View File

@ -121,6 +121,7 @@ fn from_storage<S: Into<Shape>>(
} }
impl Tensor { impl Tensor {
// TODO: Maybe this should be a broadcast rather than actually creating the full tensor.
fn ones_impl<S: Into<Shape>>( fn ones_impl<S: Into<Shape>>(
shape: S, shape: S,
dtype: DType, dtype: DType,
@ -144,6 +145,7 @@ impl Tensor {
Tensor::ones(self.shape(), self.dtype(), &self.device()) Tensor::ones(self.shape(), self.dtype(), &self.device())
} }
// TODO: Maybe this should be a broadcast rather than actually creating the full tensor.
fn zeros_impl<S: Into<Shape>>( fn zeros_impl<S: Into<Shape>>(
shape: S, shape: S,
dtype: DType, dtype: DType,