mirror of
https://github.com/huggingface/candle.git
synced 2025-06-16 10:38:54 +00:00

* Again set a few extra params. * Use the appropriate kernel sizes. * Add all the kernel sizes. * Parallel compiling. * Reduce the amount of parallelism. * Add the missing kernel. * Fix a typo. * Remove bf16 support for now.
21 lines
559 B
TOML
21 lines
559 B
TOML
[package]
|
|
name = "candle-flash-attn"
|
|
version = "0.1.0"
|
|
edition = "2021"
|
|
|
|
description = "Flash attention layer for the candle ML framework."
|
|
repository = "https://github.com/LaurentMazare/candle"
|
|
keywords = ["blas", "tensor", "machine-learning"]
|
|
categories = ["science"]
|
|
license = "MIT/Apache-2.0"
|
|
readme = "README.md"
|
|
|
|
[dependencies]
|
|
candle = { path = "../candle-core", features = ["cuda"] }
|
|
half = { version = "2.3.1", features = ["num-traits"] }
|
|
|
|
[build-dependencies]
|
|
anyhow = { version = "1", features = ["backtrace"] }
|
|
num_cpus = "1.15.0"
|
|
rayon = "1.7.0"
|