Add a slice_set op. (#2193)

* Add a slice_set op.

* Add some testing.

* Add the dedicated kv-cache module.

* Derive debug and clone.

* Expose more kv-cache functions.

* Return the current data when appending.

* Use the new cache in the quantized phi3 model.
This commit is contained in:
Laurent Mazare
2024-05-18 15:58:18 +02:00
committed by GitHub
parent 349c3e806a
commit 01545f7303
6 changed files with 209 additions and 23 deletions

View File

@ -213,7 +213,7 @@ fn main() -> anyhow::Result<()> {
);
match args.which {
Which::Phi2 => Model::Phi2(Phi2::from_gguf(model, &mut file, &device)?),
Which::Phi3 => Model::Phi3(Phi3::from_gguf(model, &mut file, &device)?),
Which::Phi3 => Model::Phi3(Phi3::from_gguf(1, model, &mut file, &device)?),
Which::Phi3b => Model::Phi3b(Phi3b::from_gguf(model, &mut file, &device)?),
}
};