mtmd-debug: add color and rainbow mode (#23829)

* mtmd-debug: add color and rainbow mode

* fix M_PI

* max_dist
This commit is contained in:
Xuan-Son Nguyen 2026-05-28 20:59:14 +02:00 committed by GitHub
parent c8914ad4f4
commit 751ebd17a5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 98 additions and 0 deletions

View file

@ -30,7 +30,9 @@ static void show_additional_info(int /*argc*/, char ** argv) {
" -p \"encode\" (debugging encode pass, default case):\n"
" --image can be:\n"
" \"white\", \"black\", \"gray\": filled 1.0f, 0.0f and 0.5f respectively\n"
" \"red\", \"green\", \"blue\": filled with respective colors\n"
" \"cb\": checkerboard pattern, alternate 1.0f and 0.0f\n"
" \"rainbow\": raspberry-pi-like rainbow pattern\n"
" --audio can be:\n"
" \"one\", \"zero\", \"half\": filled 1.0f, 0.0f and 0.5f respectively\n"
" \"1010\": checkerboard pattern, alternate 1.0f and 0.0f\n"
@ -144,6 +146,65 @@ int main(int argc, char ** argv) {
image[y][x * 3 + 2] = v;
}
}
} else if (input == "red") {
for (int i = 0; i < inp_size; ++i) {
auto row = std::vector<float>(inp_size * 3, 0.0f);
for (int j = 0; j < inp_size; ++j) {
row[j * 3 + 0] = 1.0f; // R channel
}
image.push_back(row);
}
} else if (input == "green") {
for (int i = 0; i < inp_size; ++i) {
auto row = std::vector<float>(inp_size * 3, 0.0f);
for (int j = 0; j < inp_size; ++j) {
row[j * 3 + 1] = 1.0f; // G channel
}
image.push_back(row);
}
} else if (input == "blue") {
for (int i = 0; i < inp_size; ++i) {
auto row = std::vector<float>(inp_size * 3, 0.0f);
for (int j = 0; j < inp_size; ++j) {
row[j * 3 + 2] = 1.0f; // B channel
}
image.push_back(row);
}
} else if (input == "rainbow") {
for (int i = 0; i < inp_size; ++i) {
image.push_back(std::vector<float>(inp_size * 3, 0.0f));
}
float cx = inp_size / 2.0f;
float cy = inp_size / 2.0f;
float max_dist = std::sqrt(cx * cx + cy * cy);
for (int y = 0; y < inp_size; ++y) {
for (int x = 0; x < inp_size; ++x) {
float dx = x - cx;
float dy = y - cy;
float hue = std::atan2(dy, dx) / (2.0f * 3.14159265f);
if (hue < 0) hue += 1.0f;
float sat = std::sqrt(dx * dx + dy * dy) / max_dist;
if (sat > 1.0f) sat = 1.0f;
float h6 = hue * 6.0f;
int i6 = (int)h6;
float f = h6 - i6;
float p = 1.0f - sat;
float q = 1.0f - sat * f;
float t = 1.0f - sat * (1.0f - f);
float r, g, b;
switch (i6 % 6) {
case 0: r=1; g=t; b=p; break;
case 1: r=q; g=1; b=p; break;
case 2: r=p; g=1; b=t; break;
case 3: r=p; g=q; b=1; break;
case 4: r=t; g=p; b=1; break;
default: r=1; g=p; b=q; break;
}
image[y][x * 3 + 0] = r;
image[y][x * 3 + 1] = g;
image[y][x * 3 + 2] = b;
}
}
} else if (input == "one") {
samples = std::vector<float>(inp_size, 1.0f);
} else if (input == "zero") {

View file

@ -20,6 +20,43 @@ def test_vision():
test_vision()
```
Example of debugging a rainbow image:
```py
import torch
import math
def make_rainbow(img_size):
cx, cy = img_size / 2.0, img_size / 2.0
max_dist = math.sqrt(cx * cx + cy * cy)
img = torch.zeros(1, 3, img_size, img_size)
for y in range(img_size):
for x in range(img_size):
dx, dy = x - cx, y - cy
hue = math.atan2(dy, dx) / (2 * math.pi)
if hue < 0:
hue += 1
sat = math.sqrt(dx * dx + dy * dy) / max_dist
sat = min(sat, 1.0)
h6 = hue * 6
i6 = int(h6)
f = h6 - i6
p = 1 - sat
q = 1 - sat * f
t = 1 - sat * (1 - f)
rgb = [(1,t,p),(q,1,p),(p,1,t),(p,q,1),(t,p,1),(1,p,q)][i6 % 6]
img[0, 0, y, x] = rgb[0]
img[0, 1, y, x] = rgb[1]
img[0, 2, y, x] = rgb[2]
return img
img_size = 896
pixel_values = make_rainbow(img_size)
with torch.no_grad():
outputs = model.model.get_image_features(pixel_values=pixel_values)
print("last_hidden_state:", outputs.last_hidden_state)
```
## Debugging preprocess pass
(TODO)