Get the addition/multiplication to work.

2025-06-15 10:26:33 +00:00 · 2023-06-20 11:07:59 +01:00
parent 6c5fc767a8
commit f5b0aa815a
2 changed files with 62 additions and 15 deletions
--- a/src/storage.rs
+++ b/src/storage.rs
@ -119,8 +119,8 @@ impl Storage {
        &self,
        rhs: &Self,
        shape: &Shape,
-        _lhs_stride: &[usize],
-        _rhs_stride: &[usize],
+        lhs_stride: &[usize],
+        rhs_stride: &[usize],
    ) -> Result<Self> {
        self.same_device(rhs, "add")?;
        self.same_dtype(rhs, "add")?;
@ -130,16 +130,22 @@ impl Storage {
        // https://github.com/ggerganov/llama.cpp/blob/aacdbd40562684665b6f7b8ba6695b7a2088bbb0/ggml.c#L7895
        match (self, rhs) {
            (Storage::Cpu(lhs), Storage::Cpu(rhs)) => match (lhs, rhs) {
-                (CpuStorage::F32(_), CpuStorage::F32(_)) => {
-                    let elem_count = shape.elem_count();
-                    let data = vec![0f32; elem_count];
-                    // TODO: properly fill data with the sum
+                (CpuStorage::F32(lhs), CpuStorage::F32(rhs)) => {
+                    let lhs_index = StridedIndex::new(shape.dims(), lhs_stride);
+                    let rhs_index = StridedIndex::new(shape.dims(), rhs_stride);
+                    let data = lhs_index
+                        .zip(rhs_index)
+                        .map(|(lhs_i, rhs_i)| lhs[lhs_i] + rhs[rhs_i])
+                        .collect();
                    Ok(Storage::Cpu(CpuStorage::F32(data)))
                }
-                (CpuStorage::F64(_), CpuStorage::F64(_)) => {
-                    let elem_count = shape.elem_count();
-                    let data = vec![0f64; elem_count];
-                    // TODO: properly fill data with the sum
+                (CpuStorage::F64(lhs), CpuStorage::F64(rhs)) => {
+                    let lhs_index = StridedIndex::new(shape.dims(), lhs_stride);
+                    let rhs_index = StridedIndex::new(shape.dims(), rhs_stride);
+                    let data = lhs_index
+                        .zip(rhs_index)
+                        .map(|(lhs_i, rhs_i)| lhs[lhs_i] + rhs[rhs_i])
+                        .collect();
                    Ok(Storage::Cpu(CpuStorage::F64(data)))
                }
                _ => {
@ -158,12 +164,42 @@ impl Storage {
    pub(crate) fn mul_impl(
        &self,
        rhs: &Self,
-        _shape: &Shape,
-        _lhs_stride: &[usize],
-        _rhs_stride: &[usize],
+        shape: &Shape,
+        lhs_stride: &[usize],
+        rhs_stride: &[usize],
    ) -> Result<Self> {
        self.same_device(rhs, "mul")?;
        self.same_dtype(rhs, "mul")?;
-        todo!()
+        // TODO: share this code with the add implementation, using a macro or a trait?
+        match (self, rhs) {
+            (Storage::Cpu(lhs), Storage::Cpu(rhs)) => match (lhs, rhs) {
+                (CpuStorage::F32(lhs), CpuStorage::F32(rhs)) => {
+                    let lhs_index = StridedIndex::new(shape.dims(), lhs_stride);
+                    let rhs_index = StridedIndex::new(shape.dims(), rhs_stride);
+                    let data = lhs_index
+                        .zip(rhs_index)
+                        .map(|(lhs_i, rhs_i)| lhs[lhs_i] * rhs[rhs_i])
+                        .collect();
+                    Ok(Storage::Cpu(CpuStorage::F32(data)))
+                }
+                (CpuStorage::F64(lhs), CpuStorage::F64(rhs)) => {
+                    let lhs_index = StridedIndex::new(shape.dims(), lhs_stride);
+                    let rhs_index = StridedIndex::new(shape.dims(), rhs_stride);
+                    let data = lhs_index
+                        .zip(rhs_index)
+                        .map(|(lhs_i, rhs_i)| lhs[lhs_i] * rhs[rhs_i])
+                        .collect();
+                    Ok(Storage::Cpu(CpuStorage::F64(data)))
+                }
+                _ => {
+                    // This should be covered by the dtype check above.
+                    Err(Error::DTypeMismatchBinaryOp {
+                        lhs: lhs.dtype(),
+                        rhs: rhs.dtype(),
+                        op: "add",
+                    })
+                }
+            },
+        }
    }
 }
--- a/tests/tensor_tests.rs
+++ b/tests/tensor_tests.rs
@ -1,15 +1,26 @@
 use candle::{DType, Device, Result, Tensor};

 #[test]
-fn add() -> Result<()> {
+fn zeros() -> Result<()> {
    let tensor = Tensor::zeros((5, 2), DType::F32, Device::Cpu);
    let (dim1, dim2) = tensor.shape().r2()?;
    assert_eq!(dim1, 5);
    assert_eq!(dim2, 2);
+    Ok(())
+}
+
+#[test]
+fn add_mul() -> Result<()> {
    let tensor = Tensor::new([3f32, 1., 4.].as_slice(), Device::Cpu)?;
    let dim1 = tensor.shape().r1()?;
    assert_eq!(dim1, 3);
    let content: Vec<f32> = tensor.to_vec1()?;
    assert_eq!(content, [3., 1., 4.]);
+    let tensor = Tensor::add(&tensor, &tensor)?;
+    let content: Vec<f32> = tensor.to_vec1()?;
+    assert_eq!(content, [6., 2., 8.]);
+    let tensor = Tensor::mul(&tensor, &tensor)?;
+    let content: Vec<f32> = tensor.to_vec1()?;
+    assert_eq!(content, [36., 4., 64.]);
    Ok(())
 }