mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-13 02:19:41 +00:00
merge base support for chroma, however its not working correctly
This commit is contained in:
parent
dcf88d6e78
commit
30cf433ab4
5 changed files with 554 additions and 105 deletions
|
|
@ -871,6 +871,18 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_attention_ext(struct ggml_context*
|
|||
v = ggml_reshape_3d(ctx, v, d_head, L_k, n_head * N); // [N * n_head, L_k, d_head]
|
||||
v = ggml_cast(ctx, v, GGML_TYPE_F16);
|
||||
|
||||
if (mask != nullptr) {
|
||||
mask = ggml_transpose(ctx, mask);
|
||||
|
||||
if (mask->ne[1] < GGML_PAD(q->ne[1], GGML_KQ_MASK_PAD)) {
|
||||
LOG_DEBUG("mask dims %ld, %ld, %ld, %ld\n", mask->ne[0], mask->ne[1], mask->ne[2], mask->ne[3]);
|
||||
LOG_DEBUG("needs padding, padding from %ld to %ld\n", mask->ne[1], GGML_PAD(q->ne[1], GGML_KQ_MASK_PAD));
|
||||
mask = ggml_pad(ctx, mask, 0, GGML_PAD(q->ne[1], GGML_KQ_MASK_PAD) - mask->ne[1], 0, 0);
|
||||
}
|
||||
|
||||
mask = ggml_cast(ctx, mask, GGML_TYPE_F16);
|
||||
}
|
||||
|
||||
kqv = ggml_flash_attn_ext(ctx, q, k, v, mask, scale, 0, 0);
|
||||
ggml_flash_attn_ext_set_prec(kqv, GGML_PREC_F32);
|
||||
|
||||
|
|
@ -883,7 +895,7 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_attention_ext(struct ggml_context*
|
|||
auto kq = ggml_mul_mat(ctx, k, q); // [N * n_head, L_q, L_k]
|
||||
kq = ggml_scale_inplace(ctx, kq, scale);
|
||||
if (mask) {
|
||||
kq = ggml_add(ctx, kq, mask);
|
||||
kq = ggml_add_inplace(ctx, kq, mask);
|
||||
}
|
||||
if (diag_mask_inf) {
|
||||
kq = ggml_diag_mask_inf_inplace(ctx, kq, 0);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue