mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-10 04:00:53 +00:00
graph : support cacheless embeddings with FA and iSWA (#16528)
* graph : support cacheless embeddings with FA and iSWA * cont : deduplicate mask creation * cont : fix name
This commit is contained in:
parent
5016b72862
commit
e38b7c6e9e
4 changed files with 87 additions and 51 deletions
|
|
@ -11358,8 +11358,8 @@ struct llm_build_gemma3n_iswa : public llm_graph_context {
|
|||
}
|
||||
};
|
||||
|
||||
struct llm_build_gemma_embedding_iswa : public llm_graph_context {
|
||||
llm_build_gemma_embedding_iswa(const llama_model & model, const llm_graph_params & params) : llm_graph_context(params) {
|
||||
struct llm_build_gemma_embedding : public llm_graph_context {
|
||||
llm_build_gemma_embedding(const llama_model & model, const llm_graph_params & params) : llm_graph_context(params) {
|
||||
const int64_t n_embd_head = hparams.n_embd_head_k;
|
||||
|
||||
ggml_tensor * cur;
|
||||
|
|
@ -11376,8 +11376,7 @@ struct llm_build_gemma_embedding_iswa : public llm_graph_context {
|
|||
// inp_pos - contains the positions
|
||||
ggml_tensor * inp_pos = build_inp_pos();
|
||||
|
||||
// TODO: support cacheless iSWA embeddings [TAG_NO_CACHE_ISWA]
|
||||
auto * inp_attn = build_attn_inp_kv_iswa();
|
||||
auto * inp_attn = build_attn_inp_no_cache();
|
||||
|
||||
ggml_tensor * inp_out_ids = build_inp_out_ids();
|
||||
|
||||
|
|
@ -19378,7 +19377,7 @@ llama_memory_i * llama_model::create_memory(const llama_memory_params & params,
|
|||
case LLM_ARCH_NOMIC_BERT_MOE:
|
||||
case LLM_ARCH_NEO_BERT:
|
||||
case LLM_ARCH_WAVTOKENIZER_DEC:
|
||||
//case LLM_ARCH_GEMMA_EMBEDDING: // TODO: disabled until the cacheless SWA logic is fixed [TAG_NO_CACHE_ISWA]
|
||||
case LLM_ARCH_GEMMA_EMBEDDING:
|
||||
case LLM_ARCH_DREAM:
|
||||
case LLM_ARCH_LLADA:
|
||||
case LLM_ARCH_LLADA_MOE:
|
||||
|
|
@ -19671,7 +19670,7 @@ ggml_cgraph * llama_model::build_graph(const llm_graph_params & params) const {
|
|||
} break;
|
||||
case LLM_ARCH_GEMMA_EMBEDDING:
|
||||
{
|
||||
llm = std::make_unique<llm_build_gemma_embedding_iswa>(*this, params);
|
||||
llm = std::make_unique<llm_build_gemma_embedding>(*this, params);
|
||||
} break;
|
||||
case LLM_ARCH_STARCODER2:
|
||||
{
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue