mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 01:24:36 +00:00
pixtral is working only on cpu, however the images are distorted
This commit is contained in:
parent
f1eb6c4e36
commit
2f645bb1b4
3 changed files with 15 additions and 0 deletions
|
@ -3560,6 +3560,10 @@ bool clip_is_gemma3(const struct clip_ctx * ctx) {
|
||||||
return ctx->proj_type == PROJECTOR_TYPE_GEMMA3;
|
return ctx->proj_type == PROJECTOR_TYPE_GEMMA3;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool clip_is_pixtral(const struct clip_ctx * ctx) {
|
||||||
|
return ctx->proj_type == PROJECTOR_TYPE_PIXTRAL;
|
||||||
|
}
|
||||||
|
|
||||||
// Determine the number of encoder layers to iterate over
|
// Determine the number of encoder layers to iterate over
|
||||||
int get_deepest_feature_layer(const struct clip_ctx * ctx) {
|
int get_deepest_feature_layer(const struct clip_ctx * ctx) {
|
||||||
// Get the index of the second to last layer; this is the
|
// Get the index of the second to last layer; this is the
|
||||||
|
|
|
@ -113,6 +113,7 @@ CLIP_API bool clip_is_glm(const struct clip_ctx * ctx);
|
||||||
CLIP_API bool clip_is_qwen2vl(const struct clip_ctx * ctx);
|
CLIP_API bool clip_is_qwen2vl(const struct clip_ctx * ctx);
|
||||||
CLIP_API bool clip_is_llava(const struct clip_ctx * ctx);
|
CLIP_API bool clip_is_llava(const struct clip_ctx * ctx);
|
||||||
CLIP_API bool clip_is_gemma3(const struct clip_ctx * ctx);
|
CLIP_API bool clip_is_gemma3(const struct clip_ctx * ctx);
|
||||||
|
CLIP_API bool clip_is_pixtral(const struct clip_ctx * ctx);
|
||||||
|
|
||||||
CLIP_API int get_deepest_feature_layer(const struct clip_ctx * ctx);
|
CLIP_API int get_deepest_feature_layer(const struct clip_ctx * ctx);
|
||||||
|
|
||||||
|
|
|
@ -340,6 +340,16 @@ static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const cli
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else if (clip_is_pixtral(ctx_clip)){
|
||||||
|
clip_image_f32 * img_res = clip_image_f32_get_img(img_res_v.get(), 0);
|
||||||
|
*n_img_pos = clip_n_patches_by_img(ctx_clip, img_res);
|
||||||
|
bool encoded = clip_image_encode(ctx_clip, n_threads, img_res, image_embd); // image_embd shape is 576 x 4096
|
||||||
|
if (!encoded) {
|
||||||
|
LOG_ERR("Unable to encode image\n");
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
else if (strcmp(mm_patch_merge_type, "spatial_unpad") != 0) {
|
else if (strcmp(mm_patch_merge_type, "spatial_unpad") != 0) {
|
||||||
// flat / default llava-1.5 type embedding
|
// flat / default llava-1.5 type embedding
|
||||||
*n_img_pos = clip_n_patches(ctx_clip);
|
*n_img_pos = clip_n_patches(ctx_clip);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue