diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index f4754d59a..ccb4e7100 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -1804,70 +1804,6 @@ static bool kcpp_eval_image(llama_context * ctx_llama, float * img_embd, int num } return true; } -static bool qwen2vl_eval_image_embed(llama_context * ctx_llama, float * image_embd, int num_img_tokens, - int n_batch, int * n_past) { - auto image_size = clip_get_load_image_size(clp_ctx); - int n_embd = llama_n_embd(llama_get_model(ctx_llama)); - const int patch_size = 14 * 2; - const int ph = image_size->height / patch_size + (image_size->height % patch_size > 0); - const int pw = image_size->width / patch_size + (image_size->width % patch_size > 0); - auto img_tokens = num_img_tokens; - // llama_pos mrope_pos[img_tokens * 4]; - std::vector mrope_pos; - mrope_pos.resize(img_tokens * 4); - - int st_pos_id = *n_past; - - for (int y = 0; y < ph; y++) - { - for (int x = 0; x < pw; x++) - { - int i = y * pw + x; - mrope_pos[i] = st_pos_id; - mrope_pos[i + img_tokens] = st_pos_id + y; - mrope_pos[i + img_tokens * 2] = st_pos_id + x; - mrope_pos[i + img_tokens * 3] = 0; - } - } - st_pos_id += std::max(pw, ph); - - int processed = 0; - std::vector batch_mrope_pos; - batch_mrope_pos.resize(img_tokens * 4); - - for (int i = 0; i < img_tokens; i += n_batch) { - int n_eval = img_tokens - i; - if (n_eval > n_batch) { - n_eval = n_batch; - } - - // llama_pos batch_mrope_pos[n_eval * 4]; - std::fill(batch_mrope_pos.begin(), batch_mrope_pos.end(), 0); - memcpy(batch_mrope_pos.data(), &mrope_pos[processed], n_eval * sizeof(llama_pos)); - memcpy(&batch_mrope_pos[n_eval * 1], &mrope_pos[img_tokens * 1 + processed], n_eval * sizeof(llama_pos)); - memcpy(&batch_mrope_pos[n_eval * 2], &mrope_pos[img_tokens * 2 + processed], n_eval * sizeof(llama_pos)); - memcpy(&batch_mrope_pos[n_eval * 3], &mrope_pos[img_tokens * 3 + processed], n_eval * sizeof(llama_pos)); - - llama_batch batch = { - int32_t(n_eval), // n_tokens - nullptr, // token - (image_embd+i*n_embd), // embed - batch_mrope_pos.data(), // pos - nullptr, // n_seq_id - nullptr, // seq_id - nullptr, // logits - }; - - if (llama_decode(ctx_llama, batch)) { - fprintf(stderr, "\n%s : failed to eval image\n", __func__); - return false; - } - *n_past += n_eval; - processed += n_eval; - } - return true; -} - //given an old GGUF context and a new context that has some middle portion removed, //find and remove the middle portion from the old context from the KV. Does not fast forward after this destructive action