mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 09:34:37 +00:00
Merge commit '4ccea213bc
' into concedo_experimental
# Conflicts: # .devops/cpu.Dockerfile # .devops/cuda.Dockerfile # .devops/intel.Dockerfile # .devops/musa.Dockerfile # .devops/rocm.Dockerfile # .github/workflows/bench.yml.disabled # .github/workflows/build.yml # .github/workflows/server.yml # CMakeLists.txt # build-xcframework.sh # ci/run.sh # common/CMakeLists.txt # examples/llama.android/llama/build.gradle.kts # examples/perplexity/perplexity.cpp # examples/run/CMakeLists.txt # examples/server/tests/README.md # examples/sycl/win-build-sycl.bat # ggml/src/ggml-cann/aclnn_ops.cpp # ggml/src/ggml-cann/aclnn_ops.h # ggml/src/ggml-cpu/CMakeLists.txt # ggml/src/ggml-cpu/ggml-cpu.c # licenses/LICENSE-linenoise # scripts/sync-ggml.last # tests/CMakeLists.txt
This commit is contained in:
commit
b99ee451f8
29 changed files with 11032 additions and 12914 deletions
|
@ -1842,6 +1842,8 @@ static void ggml_vk_load_shaders(vk_device& device) {
|
|||
// can't use 256 for D==80.
|
||||
uint32_t wg_size = (small_rows && (D % 32) == 0) ? 256 : 128;
|
||||
auto rows_cols = fa_rows_cols(D, clamp, type, small_rows);
|
||||
// mask dim1 is padded to 64, we rely on this to avoid clamping mask loads
|
||||
GGML_ASSERT((GGML_KQ_MASK_PAD % rows_cols[0]) == 0);
|
||||
return {wg_size, rows_cols[0], rows_cols[1], (D), clamp};
|
||||
};
|
||||
|
||||
|
@ -5528,6 +5530,9 @@ static void ggml_vk_flash_attn(ggml_backend_vk_context * ctx, vk_context& subctx
|
|||
// the "aligned" shader variant will forcibly align strides, for performance
|
||||
(q_stride & 7) == 0 && (k_stride & 7) == 0 && (v_stride & 7) == 0;
|
||||
|
||||
// mask dim1 is padded to 64, we rely on this to avoid clamping mask loads
|
||||
GGML_ASSERT((nem1 % GGML_KQ_MASK_PAD) == 0);
|
||||
|
||||
vk_pipeline pipeline = pipelines[aligned];
|
||||
assert(pipeline);
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue