mirror of
https://github.com/huggingface/candle.git
synced 2025-06-15 10:26:33 +00:00
Merge pull request #216 from LaurentMazare/llama_multiprocess2
TP sharding v2
This commit is contained in:
@ -19,8 +19,10 @@ byteorder = "1.4.3"
|
||||
clap = { version = "4.2.4", features = ["derive"] }
|
||||
# Re-enable this once 0.9.13 as been released as it would include the cublas-f16 changes
|
||||
# cudarc = { version = "0.9.13", optional = true, features = ["f16"] }
|
||||
cudarc = { git = "https://github.com/LaurentMazare/cudarc.git", branch = "cublas-bf16", features = ["f16"] }
|
||||
# TODO: Switch back to the official gemm implementation if we manage to upstream the changes.
|
||||
cudarc = { git = "https://github.com/coreylowman/cudarc.git", features = ["f16", "nccl"] }
|
||||
# TODO: Switch back to the official gemm implementation once the following are available.
|
||||
# https://github.com/sarah-ek/gemm/pull/8.
|
||||
# https://github.com/sarah-ek/gemm/pull/9.
|
||||
gemm = { git = "https://github.com/LaurentMazare/gemm.git" }
|
||||
hf-hub = "0.2.0"
|
||||
half = { version = "2.3.1", features = ["num-traits", "rand_distr"] }
|
||||
|
Reference in New Issue
Block a user