mirror of
https://github.com/huggingface/candle.git
synced 2025-06-20 12:06:35 +00:00
Separate quantized phi-3 implementation. (#2157)
* Separate quantized phi-3 implementation. * Integrate the quantized phi3 model.= * Small fixes, get the generation to work properly. * Keep the old llama implementation around. * Change the default.
This commit is contained in:
@ -350,7 +350,7 @@ pub fn call_unary_contiguous_tiled(
|
||||
let pipeline = kernels.load_pipeline(device, Source::Unary, kernel_name.0)?;
|
||||
let encoder = command_buffer.new_compute_command_encoder();
|
||||
let tile_size = 2;
|
||||
let tiles = length.div_ceil(tile_size);
|
||||
let tiles = (length + tile_size - 1) / tile_size;
|
||||
|
||||
encoder.set_compute_pipeline_state(&pipeline);
|
||||
|
||||
|
Reference in New Issue
Block a user