Add a custom softmax implementation. (#744)

* Add a custom softmax implementation. * Add softmaxlastdim to the benchmarks. * And add a test. * Support more dtypes. * Polish the code. * Use the slow implementation on cuda. * Add a todo for the cuda kernel.
2025-06-16 02:38:10 +00:00 · 2023-09-05 15:20:23 +02:00
parent a8410bf35e
commit 1c9e5394a5
5 changed files with 109 additions and 18 deletions
--- a/candle-nn/Cargo.toml
+++ b/candle-nn/Cargo.toml
@ -12,12 +12,16 @@ readme = "README.md"
 [dependencies]
 accelerate-src = { workspace = true, optional = true }
 candle = { path = "../candle-core", version = "0.2.1", package = "candle-core" }
+half = { workspace = true }
 thiserror = { workspace = true }
 intel-mkl-src = { workspace = true, optional = true }
+num-traits = { workspace = true }
+rayon = { workspace = true }
 safetensors = { workspace = true }

 [dev-dependencies]
 anyhow = { workspace = true }
+clap = { workspace = true }

 [features]
 default = []