Merge pull request #216 from LaurentMazare/llama_multiprocess2

TP sharding v2
This commit is contained in:
Nicolas Patry
2023-07-28 08:06:13 +01:00
committed by GitHub
10 changed files with 895 additions and 19 deletions

View File

@ -19,8 +19,10 @@ byteorder = "1.4.3"
clap = { version = "4.2.4", features = ["derive"] }
# Re-enable this once 0.9.13 as been released as it would include the cublas-f16 changes
# cudarc = { version = "0.9.13", optional = true, features = ["f16"] }
cudarc = { git = "https://github.com/LaurentMazare/cudarc.git", branch = "cublas-bf16", features = ["f16"] }
# TODO: Switch back to the official gemm implementation if we manage to upstream the changes.
cudarc = { git = "https://github.com/coreylowman/cudarc.git", features = ["f16", "nccl"] }
# TODO: Switch back to the official gemm implementation once the following are available.
# https://github.com/sarah-ek/gemm/pull/8.
# https://github.com/sarah-ek/gemm/pull/9.
gemm = { git = "https://github.com/LaurentMazare/gemm.git" }
hf-hub = "0.2.0"
half = { version = "2.3.1", features = ["num-traits", "rand_distr"] }