mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-05 23:41:45 +00:00
metal : FA support F32 K and V and head size = 32 (#16531)
* metal : FA support F32 K and V and head size = 32 * graph : remove obsolete comment [no ci]
This commit is contained in:
parent
e38b7c6e9e
commit
e60f241eac
4 changed files with 106 additions and 52 deletions
|
|
@ -1323,7 +1323,6 @@ ggml_tensor * llm_graph_context::build_attn_mha(
|
|||
|
||||
ggml_tensor * cur;
|
||||
|
||||
// TODO: replace hardcoded padding with ggml-provided padding
|
||||
if (cparams.flash_attn && kq_b == nullptr) {
|
||||
GGML_ASSERT(kq_b == nullptr && "Flash attention does not support KQ bias yet");
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue