Faster model weight loading.

2025-06-15 18:28:24 +00:00 · 2023-06-26 07:40:11 +01:00
parent d867155ef2
commit 11696e6377
2 changed files with 14 additions and 25 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@ -22,6 +22,7 @@ cudarc = { version = "0.9.9", optional = true }
 candle-kernels = { path = "kernels", optional = true }
 gemm = "0.15.4"
 zip = { version = "0.6.6", default-features=false }
+byteorder = "1.4.3"

 [dev-dependencies]
 anyhow = "1"
--- a/src/npy.rs
+++ b/src/npy.rs
@ -26,6 +26,7 @@
 //! values = np.loadz("test.npz")
 //! ```
 use crate::{DType, Device, Error, Result, Shape, Tensor};
+use byteorder::{ByteOrder, LittleEndian};
 use std::collections::HashMap;
 use std::fs::File;
 use std::io::{BufReader, Read, Write};
@ -192,37 +193,24 @@ impl Header {
 impl Tensor {
    fn from_reader<R: std::io::Read>(shape: Shape, dtype: DType, reader: &mut R) -> Result<Self> {
        let elem_count = shape.elem_count();
+        let mut data = Vec::new();
+        reader.read_to_end(&mut data)?;
        match dtype {
            DType::F32 => {
-                let mut data = Vec::new();
-                data.reserve(elem_count);
-                for _ in 0..elem_count {
-                    let mut buf = [0u8; 4];
-                    reader.read_exact(&mut buf)?;
-                    data.push(f32::from_le_bytes(buf))
-                }
-                // TODO: We should pass the ownership of data here rather than triggering a copy.
-                Tensor::from_slice(&data, shape, &Device::Cpu)
+                // TODO: Avoid the data being copied around multiple times.
+                let mut data_t = vec![0f32; elem_count];
+                LittleEndian::read_f32_into(&data, &mut data_t);
+                Tensor::from_slice(&data_t, shape, &Device::Cpu)
            }
            DType::F64 => {
-                let mut data = Vec::new();
-                data.reserve(elem_count);
-                for _ in 0..elem_count {
-                    let mut buf = [0u8; 8];
-                    reader.read_exact(&mut buf)?;
-                    data.push(f64::from_le_bytes(buf))
-                }
-                Tensor::from_slice(&data, shape, &Device::Cpu)
+                let mut data_t = vec![0f64; elem_count];
+                LittleEndian::read_f64_into(&data, &mut data_t);
+                Tensor::from_slice(&data_t, shape, &Device::Cpu)
            }
            DType::U32 => {
-                let mut data = Vec::new();
-                data.reserve(elem_count);
-                for _ in 0..elem_count {
-                    let mut buf = [0u8; 4];
-                    reader.read_exact(&mut buf)?;
-                    data.push(u32::from_le_bytes(buf))
-                }
-                Tensor::from_slice(&data, shape, &Device::Cpu)
+                let mut data_t = vec![0u32; elem_count];
+                LittleEndian::read_u32_into(&data, &mut data_t);
+                Tensor::from_slice(&data_t, shape, &Device::Cpu)
            }
        }
    }