Add support for Llama 3.1 (#2359)

* Add Llama 3.1 rope * Clippy * Format * Clippy * Add support for multiple eos tokens: * Untagged either * Remove either dep and fix settings.json * Make the max positional embeddings configurable
2025-06-21 12:20:46 +00:00 · 2024-07-26 15:32:26 -04:00
parent ddafc61055
commit 0f5cbb08b3
24 changed files with 165 additions and 71 deletions
--- a/candle-core/benches/benchmarks/affine.rs
+++ b/candle-core/benches/benchmarks/affine.rs
@ -12,7 +12,7 @@ fn run_affine_benchmark(c: &mut Criterion, device: &Device, dtype: DType, name:
    let m = 1024;
    let k = 1024;

-    let tensor = Tensor::zeros((b, m, k), dtype, &device).unwrap();
+    let tensor = Tensor::zeros((b, m, k), dtype, device).unwrap();

    let flops = b * m * k * dtype.size_in_bytes();

--- a/candle-core/benches/benchmarks/qmatmul.rs
+++ b/candle-core/benches/benchmarks/qmatmul.rs
@ -7,7 +7,7 @@ use criterion::{black_box, criterion_group, Criterion, Throughput};
 use std::time::Instant;

 fn run(matmul: &QMatMul, x: &Tensor) {
-    matmul.forward(&x).unwrap();
+    matmul.forward(x).unwrap();
 }

 fn run_bench(c: &mut Criterion, device: &Device, dtype: GgmlDType) {
@ -50,7 +50,7 @@ fn run_bench(c: &mut Criterion, device: &Device, dtype: GgmlDType) {
 fn criterion_benchmark(c: &mut Criterion) {
    let handler = BenchDeviceHandler::new().unwrap();
    for device in handler.devices {
-        for dtype in vec![
+        for dtype in [
            GgmlDType::F32,
            GgmlDType::F16,
            GgmlDType::Q4_0,
--- a/candle-core/benches/benchmarks/unary.rs
+++ b/candle-core/benches/benchmarks/unary.rs
@ -12,7 +12,7 @@ fn run_unary_benchmark(c: &mut Criterion, device: &Device, dtype: DType, name: &
    let m = 1024;
    let k = 1024;

-    let tensor = Tensor::arange(0.0f32, (b * m * k) as f32, &device)
+    let tensor = Tensor::arange(0.0f32, (b * m * k) as f32, device)
        .unwrap()
        .to_dtype(dtype)
        .unwrap()
--- a/candle-core/benches/benchmarks/where_cond.rs
+++ b/candle-core/benches/benchmarks/where_cond.rs
@ -25,9 +25,9 @@ const SIZE: usize = B * M * K;
 const DATA: [u8; SIZE] = create_cond_arr::<SIZE>();

 fn run_where_cond_benchmark(c: &mut Criterion, device: &Device, dtype: DType, name: &str) {
-    let tensor = Tensor::from_slice(DATA.as_slice(), (B, M, K), &device).unwrap();
-    let on_true = Tensor::ones((B, M, K), dtype, &device).unwrap();
-    let on_false = Tensor::zeros((B, M, K), dtype, &device).unwrap();
+    let tensor = Tensor::from_slice(DATA.as_slice(), (B, M, K), device).unwrap();
+    let on_true = Tensor::ones((B, M, K), dtype, device).unwrap();
+    let on_false = Tensor::zeros((B, M, K), dtype, device).unwrap();

    let elements = B * M * K;
    // E.g. 2 f32 tensors + 1 u8 tensor