Detach the tensors on batch-norm eval. (#1702)

* Detach the tensors on batch-norm eval. * Fix pyo3 bindings. * Black tweak. * Formatting. * Also update the pyo3-onnx formatting. * Apply black.
2025-06-19 19:58:35 +00:00 · 2024-02-13 14:26:32 +01:00
parent 13c67226e6
commit ad73e93da2
14 changed files with 117 additions and 27 deletions
--- a/candle-pyo3/py_src/candle/init.pyi
+++ b/candle-pyo3/py_src/candle/init.pyi
@ -88,23 +88,27 @@ class QTensor:
        Dequantizes the tensor.
        """
        pass
+
    @property
    def ggml_dtype(self) -> str:
        """
        Gets the tensors quantized dtype.
        """
        pass
+
    def matmul_t(self, lhs: Tensor) -> Tensor:
        """
        Performs a quantized matrix multiplication, with the quantized tensor as the right hand side.
        """
        pass
+
    @property
    def rank(self) -> int:
        """
        Gets the rank of the tensor.
        """
        pass
+
    @property
    def shape(self) -> Tuple[int]:
        """
@ -119,178 +123,213 @@ class Tensor:

    def __init__(self, data: _ArrayLike):
        pass
+
    def __add__(self, rhs: Union[Tensor, Scalar]) -> "Tensor":
        """
        Add a scalar to a tensor or two tensors together.
        """
        pass
+
    def __eq__(self, rhs: Union[Tensor, Scalar]) -> "Tensor":
        """
        Compare a tensor with a scalar or one tensor with another.
        """
        pass
+
    def __ge__(self, rhs: Union[Tensor, Scalar]) -> "Tensor":
        """
        Compare a tensor with a scalar or one tensor with another.
        """
        pass
+
    def __getitem__(self, index: Union[Index, Tensor, Sequence[Index]]) -> "Tensor":
        """
        Return a slice of a tensor.
        """
        pass
+
    def __gt__(self, rhs: Union[Tensor, Scalar]) -> "Tensor":
        """
        Compare a tensor with a scalar or one tensor with another.
        """
        pass
+
    def __le__(self, rhs: Union[Tensor, Scalar]) -> "Tensor":
        """
        Compare a tensor with a scalar or one tensor with another.
        """
        pass
+
    def __lt__(self, rhs: Union[Tensor, Scalar]) -> "Tensor":
        """
        Compare a tensor with a scalar or one tensor with another.
        """
        pass
+
    def __mul__(self, rhs: Union[Tensor, Scalar]) -> "Tensor":
        """
        Multiply a tensor by a scalar or one tensor by another.
        """
        pass
+
    def __ne__(self, rhs: Union[Tensor, Scalar]) -> "Tensor":
        """
        Compare a tensor with a scalar or one tensor with another.
        """
        pass
+
    def __radd__(self, rhs: Union[Tensor, Scalar]) -> "Tensor":
        """
        Add a scalar to a tensor or two tensors together.
        """
        pass
+
    def __richcmp__(self, rhs: Union[Tensor, Scalar], op) -> "Tensor":
        """
        Compare a tensor with a scalar or one tensor with another.
        """
        pass
+
    def __rmul__(self, rhs: Union[Tensor, Scalar]) -> "Tensor":
        """
        Multiply a tensor by a scalar or one tensor by another.
        """
        pass
+
    def __sub__(self, rhs: Union[Tensor, Scalar]) -> "Tensor":
        """
        Subtract a scalar from a tensor or one tensor from another.
        """
        pass
+
    def __truediv__(self, rhs: Union[Tensor, Scalar]) -> "Tensor":
        """
        Divide a tensor by a scalar or one tensor by another.
        """
        pass
+
    def abs(self) -> Tensor:
        """
        Performs the `abs` operation on the tensor.
        """
        pass
+
    def argmax_keepdim(self, dim: int) -> Tensor:
        """
        Returns the indices of the maximum value(s) across the selected dimension.
        """
        pass
+
    def argmin_keepdim(self, dim: int) -> Tensor:
        """
        Returns the indices of the minimum value(s) across the selected dimension.
        """
        pass
+
    def broadcast_add(self, rhs: Tensor) -> Tensor:
        """
        Adds the two tensors, while broadcasting the right-hand-side tensor to match the shape of the left-hand-side tensor.
        """
        pass
+
    def broadcast_as(self, *shape: Shape) -> Tensor:
        """
        Broadcasts the tensor to the given shape.
        """
        pass
+
    def broadcast_div(self, rhs: Tensor) -> Tensor:
        """
        Divides the two tensors, while broadcasting the right-hand-side tensor to match the shape of the left-hand-side tensor.
        """
        pass
+
    def broadcast_left(self, *shape: Shape) -> Tensor:
        """
        Broadcasts the tensor to the given shape, adding new dimensions on the left.
        """
        pass
+
    def broadcast_mul(self, rhs: Tensor) -> Tensor:
        """
        Multiplies the two tensors, while broadcasting the right-hand-side tensor to match the shape of the left-hand-side tensor.
        """
        pass
+
    def broadcast_sub(self, rhs: Tensor) -> Tensor:
        """
        Subtracts the two tensors, while broadcasting the right-hand-side tensor to match the shape of the left-hand-side tensor.
        """
        pass
+
    def contiguous(self) -> Tensor:
        """
        Makes the tensor contiguous in memory.
        """
        pass
+
    def copy(self) -> Tensor:
        """
        Returns a copy of the tensor.
        """
        pass
+
    def cos(self) -> Tensor:
        """
        Performs the `cos` operation on the tensor.
        """
        pass
+
    def detach(self) -> Tensor:
        """
        Detach the tensor from the computation graph.
        """
        pass
+
    @property
    def device(self) -> Device:
        """
        Gets the tensor's device.
        """
        pass
+
    @property
    def dtype(self) -> DType:
        """
        Gets the tensor's dtype.
        """
        pass
+
    def exp(self) -> Tensor:
        """
        Performs the `exp` operation on the tensor.
        """
        pass
+
    def flatten_all(self) -> Tensor:
        """
        Flattens the tensor into a 1D tensor.
        """
        pass
+
    def flatten_from(self, dim: int) -> Tensor:
        """
        Flattens the tensor on the dimension indexes from `dim` (inclusive) to the last dimension.
        """
        pass
+
    def flatten_to(self, dim: int) -> Tensor:
        """
        Flattens the tensor on the dimension indexes from `0` to `dim` (inclusive).
        """
        pass
+
    def get(self, index: int) -> Tensor:
        """
        Gets the value at the specified index.
        """
        pass
+
    def index_select(self, rhs: Tensor, dim: int) -> Tensor:
        """
        Select values for the input tensor at the target indexes across the specified dimension.
@ -302,161 +341,192 @@ class Tensor:
        tensor.
        """
        pass
+
    def is_contiguous(self) -> bool:
        """
        Returns true if the tensor is contiguous in C order.
        """
        pass
+
    def is_fortran_contiguous(self) -> bool:
        """
        Returns true if the tensor is contiguous in Fortran order.
        """
        pass
+
    def log(self) -> Tensor:
        """
        Performs the `log` operation on the tensor.
        """
        pass
+
    def matmul(self, rhs: Tensor) -> Tensor:
        """
        Performs a matrix multiplication between the two tensors.
        """
        pass
+
    def max_keepdim(self, dim: int) -> Tensor:
        """
        Gathers the maximum value across the selected dimension.
        """
        pass
+
    def mean_all(self) -> Tensor:
        """
        Returns the mean of the tensor.
        """
        pass
+
    def min_keepdim(self, dim: int) -> Tensor:
        """
        Gathers the minimum value across the selected dimension.
        """
        pass
+
    def narrow(self, dim: int, start: int, len: int) -> Tensor:
        """
        Returns a new tensor that is a narrowed version of the input, the dimension `dim`
        ranges from `start` to `start + len`.
        """
        pass
+
    @property
    def nelement(self) -> int:
        """
        Gets the tensor's element count.
        """
        pass
+
    def powf(self, p: float) -> Tensor:
        """
        Performs the `pow` operation on the tensor with the given exponent.
        """
        pass
+
    def quantize(self, quantized_dtype: str) -> QTensor:
        """
        Quantize the tensor.
        """
        pass
+
    @property
    def rank(self) -> int:
        """
        Gets the tensor's rank.
        """
        pass
+
    def recip(self) -> Tensor:
        """
        Get the `recip` of the tensor.
        """
        pass
+
    def reshape(self, *shape: Shape) -> Tensor:
        """
        Reshapes the tensor to the given shape.
        """
        pass
+
    @property
    def shape(self) -> Tuple[int]:
        """
        Gets the tensor's shape.
        """
        pass
+
    def sin(self) -> Tensor:
        """
        Performs the `sin` operation on the tensor.
        """
        pass
+
    def sqr(self) -> Tensor:
        """
        Squares the tensor.
        """
        pass
+
    def sqrt(self) -> Tensor:
        """
        Calculates the square root of the tensor.
        """
        pass
+
    def squeeze(self, dim: int) -> Tensor:
        """
        Creates a new tensor with the specified dimension removed if its size was one.
        """
        pass
+
    @property
    def stride(self) -> Tuple[int]:
        """
        Gets the tensor's strides.
        """
        pass
+
    def sum_all(self) -> Tensor:
        """
        Returns the sum of the tensor.
        """
        pass
+
    def sum_keepdim(self, dim: Union[int, List[int]]) -> Tensor:
        """
        Returns the sum of all elements in the input tensor. The sum is performed over all the input dimensions.
        """
        pass
+
    def t(self) -> Tensor:
        """
        Transposes the tensor.
        """
        pass
+
    def to(self, *args, **kwargs) -> Tensor:
        """
        Performs Tensor dtype and/or device conversion.
        """
        pass
+
    def to_device(self, device: Union[str, Device]) -> Tensor:
        """
        Move the tensor to a new device.
        """
        pass
+
    def to_dtype(self, dtype: Union[str, DType]) -> Tensor:
        """
        Convert the tensor to a new dtype.
        """
        pass
+
    def to_torch(self) -> torch.Tensor:
        """
        Converts candle's tensor to pytorch's tensor
        """
        pass
+
    def transpose(self, dim1: int, dim2: int) -> Tensor:
        """
        Returns a tensor that is a transposed version of the input, the given dimensions are swapped.
        """
        pass
+
    def unsqueeze(self, dim: int) -> Tensor:
        """
        Creates a new tensor with a dimension of size one inserted at the specified position.
        """
        pass
+
    def values(self) -> _ArrayLike:
        """
        Gets the tensor's data as a Python scalar or array-like object.
        """
        pass
+
    def where_cond(self, on_true: Tensor, on_false: Tensor) -> Tensor:
        """
        Returns a tensor with the same shape as the input tensor, the values are taken from
--- a/candle-pyo3/py_src/candle/nn/container.py
+++ b/candle-pyo3/py_src/candle/nn/container.py
@ -57,12 +57,10 @@ class Sequential(Module):
    _modules: Dict[str, Module]  # type: ignore[assignment]

    @overload
-    def __init__(self, *args: Module) -> None:
-        ...
+    def __init__(self, *args: Module) -> None: ...

    @overload
-    def __init__(self, arg: "OrderedDict[str, Module]") -> None:
-        ...
+    def __init__(self, arg: "OrderedDict[str, Module]") -> None: ...

    def __init__(self, *args):
        super().__init__()
--- a/candle-pyo3/py_src/candle/nn/module.py
+++ b/candle-pyo3/py_src/candle/nn/module.py
@ -204,12 +204,10 @@ class Module:
    T_destination = TypeVar("T_destination", bound=Dict[str, Any])

    @overload
-    def state_dict(self, *, destination: T_destination, prefix: str = ..., keep_vars: bool = ...) -> T_destination:
-        ...
+    def state_dict(self, *, destination: T_destination, prefix: str = ..., keep_vars: bool = ...) -> T_destination: ...

    @overload
-    def state_dict(self, *, prefix: str = ..., keep_vars: bool = ...) -> Dict[str, Any]:
-        ...
+    def state_dict(self, *, prefix: str = ..., keep_vars: bool = ...) -> Dict[str, Any]: ...

    def state_dict(self, *args, destination=None, prefix="", keep_vars=False):
        r"""Returns a dictionary containing references to the whole state of the module.
@ -586,12 +584,10 @@ class Module:
        self: T,
        device: str = ...,
        dtype: Optional[Union[DType, str]] = ...,
-    ) -> T:
-        ...
+    ) -> T: ...

    @overload
-    def to(self: T, dtype: Union[DType, str]) -> T:
-        ...
+    def to(self: T, dtype: Union[DType, str]) -> T: ...

    def to(self, *args, **kwargs):
        r"""Moves and/or casts the parameters and buffers.
--- a/candle-pyo3/py_src/candle/nn/normalization.py
+++ b/candle-pyo3/py_src/candle/nn/normalization.py
@ -14,6 +14,7 @@ class LayerNorm(Module):
    math::
        y = \frac{x - \mathrm{E}[x]}{ \sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta
    """
+
    __constants__ = ["normalized_shape", "eps"]
    normalized_shape: Tuple[int, ...]
    eps: float
--- a/candle-pyo3/py_src/candle/onnx/init.pyi
+++ b/candle-pyo3/py_src/candle/onnx/init.pyi
@ -11,59 +11,69 @@ class ONNXModel:

    def __init__(self, path: str):
        pass
+
    @property
    def doc_string(self) -> str:
        """
        The doc string of the model.
        """
        pass
+
    @property
    def domain(self) -> str:
        """
        The domain of the operator set of the model.
        """
        pass
+
    def initializers(self) -> Dict[str, Tensor]:
        """
        Get the weights of the model.
        """
        pass
+
    @property
    def inputs(self) -> Optional[Dict[str, ONNXTensorDescription]]:
        """
        The inputs of the model.
        """
        pass
+
    @property
    def ir_version(self) -> int:
        """
        The version of the IR this model targets.
        """
        pass
+
    @property
    def model_version(self) -> int:
        """
        The version of the model.
        """
        pass
+
    @property
    def outputs(self) -> Optional[Dict[str, ONNXTensorDescription]]:
        """
        The outputs of the model.
        """
        pass
+
    @property
    def producer_name(self) -> str:
        """
        The producer of the model.
        """
        pass
+
    @property
    def producer_version(self) -> str:
        """
        The version of the producer of the model.
        """
        pass
+
    def run(self, inputs: Dict[str, Tensor]) -> Dict[str, Tensor]:
        """
        Run the model on the given inputs.
@ -81,6 +91,7 @@ class ONNXTensorDescription:
        The data type of the tensor.
        """
        pass
+
    @property
    def shape(self) -> Tuple[Union[int, str, Any]]:
        """