mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 17:14:36 +00:00
pixtral is working only on cpu, however the images are distorted
This commit is contained in:
parent
f1eb6c4e36
commit
2f645bb1b4
3 changed files with 15 additions and 0 deletions
|
@ -3560,6 +3560,10 @@ bool clip_is_gemma3(const struct clip_ctx * ctx) {
|
|||
return ctx->proj_type == PROJECTOR_TYPE_GEMMA3;
|
||||
}
|
||||
|
||||
bool clip_is_pixtral(const struct clip_ctx * ctx) {
|
||||
return ctx->proj_type == PROJECTOR_TYPE_PIXTRAL;
|
||||
}
|
||||
|
||||
// Determine the number of encoder layers to iterate over
|
||||
int get_deepest_feature_layer(const struct clip_ctx * ctx) {
|
||||
// Get the index of the second to last layer; this is the
|
||||
|
|
|
@ -113,6 +113,7 @@ CLIP_API bool clip_is_glm(const struct clip_ctx * ctx);
|
|||
CLIP_API bool clip_is_qwen2vl(const struct clip_ctx * ctx);
|
||||
CLIP_API bool clip_is_llava(const struct clip_ctx * ctx);
|
||||
CLIP_API bool clip_is_gemma3(const struct clip_ctx * ctx);
|
||||
CLIP_API bool clip_is_pixtral(const struct clip_ctx * ctx);
|
||||
|
||||
CLIP_API int get_deepest_feature_layer(const struct clip_ctx * ctx);
|
||||
|
||||
|
|
|
@ -340,6 +340,16 @@ static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const cli
|
|||
return false;
|
||||
}
|
||||
}
|
||||
else if (clip_is_pixtral(ctx_clip)){
|
||||
clip_image_f32 * img_res = clip_image_f32_get_img(img_res_v.get(), 0);
|
||||
*n_img_pos = clip_n_patches_by_img(ctx_clip, img_res);
|
||||
bool encoded = clip_image_encode(ctx_clip, n_threads, img_res, image_embd); // image_embd shape is 576 x 4096
|
||||
if (!encoded) {
|
||||
LOG_ERR("Unable to encode image\n");
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else if (strcmp(mm_patch_merge_type, "spatial_unpad") != 0) {
|
||||
// flat / default llava-1.5 type embedding
|
||||
*n_img_pos = clip_n_patches(ctx_clip);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue