Add a custom softmax implementation. (#744)

* Add a custom softmax implementation.

* Add softmaxlastdim to the benchmarks.

* And add a test.

* Support more dtypes.

* Polish the code.

* Use the slow implementation on cuda.

* Add a todo for the cuda kernel.
This commit is contained in:
Laurent Mazare
2023-09-05 15:20:23 +02:00
committed by GitHub
parent a8410bf35e
commit 1c9e5394a5
5 changed files with 109 additions and 18 deletions

View File

@ -12,12 +12,16 @@ readme = "README.md"
[dependencies]
accelerate-src = { workspace = true, optional = true }
candle = { path = "../candle-core", version = "0.2.1", package = "candle-core" }
half = { workspace = true }
thiserror = { workspace = true }
intel-mkl-src = { workspace = true, optional = true }
num-traits = { workspace = true }
rayon = { workspace = true }
safetensors = { workspace = true }
[dev-dependencies]
anyhow = { workspace = true }
clap = { workspace = true }
[features]
default = []