Separate quantized phi-3 implementation. (#2157)

* Separate quantized phi-3 implementation.

* Integrate the quantized phi3 model.=

* Small fixes, get the generation to work properly.

* Keep the old llama implementation around.

* Change the default.
This commit is contained in:
Laurent Mazare
2024-05-04 10:14:57 +02:00
committed by GitHub
parent 59b18d974e
commit b13a82a438
7 changed files with 323 additions and 12 deletions

View File

@ -676,9 +676,6 @@ impl BackendStorage for MetalStorage {
}
}
if layout.is_contiguous() {
} else {
}
Ok(Self::new(buffer, device.clone(), el_count, dtype))
}

View File

@ -178,7 +178,7 @@ impl crate::CustomOp1 for ArgSort {
device.metal_device(),
&command_buffer,
kernels,
&name,
name,
nrows,
ncols,
ncols_pad,