mirror of
https://github.com/huggingface/candle.git
synced 2025-06-16 10:38:54 +00:00
Flash-Attn upgrade / SoftCap Candle-FlashAttn [3/n] (#2690)
* update flash-attn v1 * restore: hdim224 * add 224 flash_fwd_template * remove whitespace * softcap is working, including test and api. * make softcap test case better * unpadded lse added
This commit is contained in:
@ -53,6 +53,7 @@ extern "C" void run_mha(
|
||||
|
||||
int is_bf16,
|
||||
int is_causal,
|
||||
int unpadded_lse,
|
||||
|
||||
int window_size_left,
|
||||
int window_size_right,
|
||||
@ -128,6 +129,7 @@ extern "C" void run_mha(
|
||||
|
||||
params.is_seqlens_k_cumulative = true;
|
||||
params.num_splits = 1;
|
||||
params.unpadded_lse = unpadded_lse;
|
||||
|
||||
cudaStream_t stream = 0; // Use the default stream.
|
||||
run_mha_fwd(params, stream);
|
||||
|
Reference in New Issue
Block a user