pixtral is working only on cpu, however the images are distorted

This commit is contained in:
Concedo 2025-04-24 17:59:47 +08:00
parent f1eb6c4e36
commit 2f645bb1b4
3 changed files with 15 additions and 0 deletions

View file

@ -3560,6 +3560,10 @@ bool clip_is_gemma3(const struct clip_ctx * ctx) {
return ctx->proj_type == PROJECTOR_TYPE_GEMMA3;
}
bool clip_is_pixtral(const struct clip_ctx * ctx) {
return ctx->proj_type == PROJECTOR_TYPE_PIXTRAL;
}
// Determine the number of encoder layers to iterate over
int get_deepest_feature_layer(const struct clip_ctx * ctx) {
// Get the index of the second to last layer; this is the

View file

@ -113,6 +113,7 @@ CLIP_API bool clip_is_glm(const struct clip_ctx * ctx);
CLIP_API bool clip_is_qwen2vl(const struct clip_ctx * ctx);
CLIP_API bool clip_is_llava(const struct clip_ctx * ctx);
CLIP_API bool clip_is_gemma3(const struct clip_ctx * ctx);
CLIP_API bool clip_is_pixtral(const struct clip_ctx * ctx);
CLIP_API int get_deepest_feature_layer(const struct clip_ctx * ctx);

View file

@ -340,6 +340,16 @@ static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const cli
return false;
}
}
else if (clip_is_pixtral(ctx_clip)){
clip_image_f32 * img_res = clip_image_f32_get_img(img_res_v.get(), 0);
*n_img_pos = clip_n_patches_by_img(ctx_clip, img_res);
bool encoded = clip_image_encode(ctx_clip, n_threads, img_res, image_embd); // image_embd shape is 576 x 4096
if (!encoded) {
LOG_ERR("Unable to encode image\n");
return false;
}
}
else if (strcmp(mm_patch_merge_type, "spatial_unpad") != 0) {
// flat / default llava-1.5 type embedding
*n_img_pos = clip_n_patches(ctx_clip);