mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 09:34:37 +00:00
Merge branch 'upstream' into concedo_experimental
# Conflicts: # .github/actions/windows-setup-curl/action.yml # .github/workflows/build-linux-cross.yml # README.md # common/CMakeLists.txt # examples/parallel/README.md # examples/parallel/parallel.cpp # ggml/src/ggml-sycl/element_wise.cpp # ggml/src/ggml-vulkan/CMakeLists.txt # tools/server/README.md
This commit is contained in:
commit
59300dbdf5
25 changed files with 694 additions and 550 deletions
|
@ -5896,10 +5896,17 @@ static void ggml_vk_flash_attn(ggml_backend_vk_context * ctx, vk_context& subctx
|
|||
vk_pipeline *pipelines;
|
||||
bool small_rows = N <= get_fa_num_small_rows(path);
|
||||
|
||||
// coopmat1 does not actually support "small rows" (it needs 16 rows).
|
||||
// So use scalar instead.
|
||||
if (small_rows && path == FA_COOPMAT1) {
|
||||
path = FA_SCALAR;
|
||||
}
|
||||
|
||||
// scalar is faster than coopmat2 when N==1
|
||||
if (N == 1 && path == FA_COOPMAT2) {
|
||||
path = FA_SCALAR;
|
||||
}
|
||||
|
||||
bool f32acc = path == FA_SCALAR || dst->op_params[3] == GGML_PREC_F32;
|
||||
|
||||
switch (path) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue