Add support for Llama 3.1 (#2359)

* Add Llama 3.1 rope * Clippy * Format * Clippy * Add support for multiple eos tokens: * Untagged either * Remove either dep and fix settings.json * Make the max positional embeddings configurable
2025-06-20 04:00:28 +00:00 · 2024-07-26 15:32:26 -04:00
parent ddafc61055
commit 0f5cbb08b3
24 changed files with 165 additions and 71 deletions
--- a/candle-core/benches/benchmarks/qmatmul.rs
+++ b/candle-core/benches/benchmarks/qmatmul.rs
@ -7,7 +7,7 @@ use criterion::{black_box, criterion_group, Criterion, Throughput};
 use std::time::Instant;

 fn run(matmul: &QMatMul, x: &Tensor) {
-    matmul.forward(&x).unwrap();
+    matmul.forward(x).unwrap();
 }

 fn run_bench(c: &mut Criterion, device: &Device, dtype: GgmlDType) {
@ -50,7 +50,7 @@ fn run_bench(c: &mut Criterion, device: &Device, dtype: GgmlDType) {
 fn criterion_benchmark(c: &mut Criterion) {
    let handler = BenchDeviceHandler::new().unwrap();
    for device in handler.devices {
-        for dtype in vec![
+        for dtype in [
            GgmlDType::F32,
            GgmlDType::F16,
            GgmlDType::Q4_0,