From c61fa9155da48049fc662a6c33bd4360dff57125 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Mon, 26 Aug 2024 13:58:18 +0800 Subject: [PATCH] handle oversized images by downscaling --- examples/llava/clip.cpp | 44 +++++++++++++++++++++++++++++- otherarch/sdcpp/sdtype_adapter.cpp | 2 +- 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp index fbd80134c..345ffb0d5 100644 --- a/examples/llava/clip.cpp +++ b/examples/llava/clip.cpp @@ -27,6 +27,9 @@ #define STB_IMAGE_IMPLEMENTATION #include "stb_image.h" +#define STB_IMAGE_RESIZE_IMPLEMENTATION +#include "stb_image_resize.h" + #include #include #include @@ -1575,6 +1578,30 @@ uint8_t* make_new_letterbox_img(uint8_t* input_image, int nx, int ny, int nc, in } return letterboxed_image; } +uint8_t* scale_down_image(uint8_t* input_image, int& nx, int& ny, int nc, int max_width, int max_height) { + float aspect_ratio = static_cast(nx) / ny; + int new_width = nx; + int new_height = ny; + if (nx > max_width || ny > max_height) { + if (aspect_ratio > 1.0f) { // wider than tall + new_width = max_width; + new_height = static_cast(max_width / aspect_ratio); + } else { // taller than wide + new_height = max_height; + new_width = static_cast(max_height * aspect_ratio); + } + } + uint8_t* resized_image = (uint8_t*)malloc(new_width * new_height * nc); + int resok = stbir_resize_uint8(input_image, nx, ny, 0, resized_image, new_width, new_height, 0, nc); + if (!resok) { + printf("\nKCPP SD: clip resize image failed!\n"); + free(resized_image); + return nullptr; + } + nx = new_width; + ny = new_height; + return resized_image; +} bool clip_image_load_from_bytes(const unsigned char * bytes, size_t bytes_length, struct clip_image_u8 * img) { int nx, ny, nc; @@ -1584,12 +1611,26 @@ bool clip_image_load_from_bytes(const unsigned char * bytes, size_t bytes_length return false; } + float maxaspect = 4.0f; + int maxdims = 2048; + + //check if image needs downscaling + if (nx > maxdims || ny > maxdims) { + LOG_TEE("\nImage requires resizing: original size %d x %d scaling to max %d px\n",nx,ny,maxdims); + uint8_t* resized_image = scale_down_image(data, nx, ny, nc, maxdims, maxdims); + if(resized_image!=nullptr) + { + stbi_image_free(data); // Free the original image buffer and assign the new one + data = resized_image; + LOG_TEE("Resized to clamped to %d x %d\n",nx,ny); + } + } + float aspect_ratio = static_cast(nx) / ny; int new_width = nx; int new_height = ny; bool need_letterbox = false; // Check if the image exceeds the aspect ratio limits - float maxaspect = 4.0f; if (aspect_ratio > maxaspect) { new_height = (int)(nx / maxaspect); need_letterbox = true; @@ -1597,6 +1638,7 @@ bool clip_image_load_from_bytes(const unsigned char * bytes, size_t bytes_length new_width = (int)(ny / maxaspect); need_letterbox = true; } + if (need_letterbox) { LOG_TEE("\nImage requires letterboxing: %d x %d changed to %d x %d\n",nx,ny,new_width, new_height); uint8_t* letterboxed_image = make_new_letterbox_img(data, nx, ny, nc, new_width, new_height); diff --git a/otherarch/sdcpp/sdtype_adapter.cpp b/otherarch/sdcpp/sdtype_adapter.cpp index bba8b3b63..237ed93e5 100644 --- a/otherarch/sdcpp/sdtype_adapter.cpp +++ b/otherarch/sdcpp/sdtype_adapter.cpp @@ -30,7 +30,7 @@ #define STB_IMAGE_WRITE_STATIC #include "stb_image_write.h" -#define STB_IMAGE_RESIZE_IMPLEMENTATION +// #define STB_IMAGE_RESIZE_IMPLEMENTATION //already defined in llava #include "stb_image_resize.h" const char* rng_type_to_str[] = {