Separate quantized phi-3 implementation. (#2157)

* Separate quantized phi-3 implementation. * Integrate the quantized phi3 model.= * Small fixes, get the generation to work properly. * Keep the old llama implementation around. * Change the default.
2025-06-20 12:06:35 +00:00 · 2024-05-04 10:14:57 +02:00
parent 59b18d974e
commit b13a82a438
7 changed files with 323 additions and 12 deletions
--- a/candle-metal-kernels/src/lib.rs
+++ b/candle-metal-kernels/src/lib.rs
@ -350,7 +350,7 @@ pub fn call_unary_contiguous_tiled(
    let pipeline = kernels.load_pipeline(device, Source::Unary, kernel_name.0)?;
    let encoder = command_buffer.new_compute_command_encoder();
    let tile_size = 2;
-    let tiles = length.div_ceil(tile_size);
+    let tiles = (length + tile_size - 1) / tile_size;

    encoder.set_compute_pipeline_state(&pipeline);