mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 09:04:36 +00:00
voxstral mini is really bad
This commit is contained in:
parent
abf527a207
commit
3284757b56
3 changed files with 29 additions and 5 deletions
|
@ -112,6 +112,7 @@ static std::vector<int> last_media_mem; //for storing dummy tokens that will be
|
|||
static std::string media_composite_image_signature = ""; //for identifying when the llava images change, we need to invalidate the cache
|
||||
static int current_media_identifier = MEDIA_TOKEN_IDENTIFIER_A;
|
||||
static int vision_max_res = 2048;
|
||||
static bool use_mrope = false;
|
||||
|
||||
static kcpp_params * kcpp_data = nullptr;
|
||||
static int max_context_limit_at_load = 0;
|
||||
|
@ -693,7 +694,7 @@ static speculative_draft_result speculative_decoding_eval_chunk(llama_context *
|
|||
|
||||
std::vector<int> real_embd = drafted_ids;
|
||||
real_embd.pop_back();
|
||||
bool use_mrope = (file_format==FileFormat::GGUF_GENERIC && file_format_meta.model_architecture == GGUFArch::ARCH_QWEN2VL);
|
||||
|
||||
kcpp_embd_batch batch2 = kcpp_embd_batch(real_embd, actual_npast, use_mrope, true);
|
||||
auto draftok = (llama_decode(main_ctx, batch2.batch)==0); //actual eval for big model
|
||||
if(!draftok)
|
||||
|
@ -1801,7 +1802,6 @@ static void load_grammar(const std::string & gammarstr)
|
|||
|
||||
static bool kcpp_eval_image(llama_context * ctx_llama, float * img_embd, int num_img_tokens, int n_batch, int * n_past) {
|
||||
int n_embd = llama_n_embd(llama_get_model(ctx_llama));
|
||||
bool use_mrope = (file_format==FileFormat::GGUF_GENERIC && file_format_meta.model_architecture == GGUFArch::ARCH_QWEN2VL);
|
||||
|
||||
for (int i = 0; i < num_img_tokens; i += n_batch) {
|
||||
int n_eval = num_img_tokens - i;
|
||||
|
@ -1987,6 +1987,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
|||
guidance_ctx = nullptr;
|
||||
audio_multimodal_supported = false;
|
||||
vision_multimodal_supported = false;
|
||||
use_mrope = false;
|
||||
|
||||
auto clamped_max_context_length = inputs.max_context_length;
|
||||
|
||||
|
@ -2338,6 +2339,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
|||
{
|
||||
printf("\nMRope is used, context shift will be disabled!\n");
|
||||
kcpp_data->use_contextshift = false;
|
||||
use_mrope = true;
|
||||
}
|
||||
|
||||
if(overwriteRope)
|
||||
|
@ -3321,8 +3323,25 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
|
|||
media_object lv;
|
||||
lv.b64data = item;
|
||||
lv.is_audio = true;
|
||||
TokenizeString("<audio>", lv.chunk_start_seq, file_format, false);
|
||||
TokenizeString("</audio>\n\n", lv.chunk_end_seq, file_format, false);
|
||||
std::string aud_start = "<audio>";
|
||||
std::string aud_end = "</audio>\n\n";
|
||||
if(clp_ctx_a)
|
||||
{
|
||||
int ptype = clip_get_projector_type_ext(clp_ctx_a);
|
||||
if(ptype==14) //qwen omni
|
||||
{
|
||||
aud_start = "<|audio_bos|>";
|
||||
aud_end = "<|audio_eos|>\n";
|
||||
}
|
||||
else if(ptype==16) //voxtral
|
||||
{
|
||||
aud_start = "[INST][BEGIN_AUDIO]";
|
||||
aud_end = "[/INST]\n";
|
||||
}
|
||||
}
|
||||
|
||||
TokenizeString(aud_start, lv.chunk_start_seq, file_format, false);
|
||||
TokenizeString(aud_end, lv.chunk_end_seq, file_format, false);
|
||||
media_objects.push_back(lv);
|
||||
new_media_composite += item;
|
||||
}
|
||||
|
@ -3502,7 +3521,6 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
|
|||
int32_t nctx = kcpp_data->n_ctx;
|
||||
|
||||
TokenizeString(kcpp_data->prompt, embd_inp, file_format, add_bos_token);
|
||||
bool use_mrope = (file_format == FileFormat::GGUF_GENERIC && file_format_meta.model_architecture == GGUFArch::ARCH_QWEN2VL);
|
||||
TokenizeString("\nAttached Media:\n", media_intro, file_format, false);
|
||||
|
||||
if(media_composite_image_signature=="")
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue