koboldcpp/otherarch/sdcpp/upscaler.cpp
2026-05-18 21:19:12 +08:00

191 lines
7.3 KiB
C++

#include "upscaler.h"
#include "ggml_extend.hpp"
#include "model.h"
#include "stable-diffusion.h"
#include "util.h"
#include <utility>
UpscalerGGML::UpscalerGGML(int n_threads,
bool direct,
int tile_size,
std::string backend_spec,
std::string params_backend_spec)
: n_threads(n_threads),
direct(direct),
tile_size(tile_size),
backend_spec(std::move(backend_spec)),
params_backend_spec(std::move(params_backend_spec)) {
}
void UpscalerGGML::set_max_graph_vram_bytes(size_t max_vram_bytes) {
max_graph_vram_bytes = max_vram_bytes;
if (esrgan_upscaler) {
esrgan_upscaler->set_max_graph_vram_bytes(max_vram_bytes);
}
}
bool UpscalerGGML::load_from_file(const std::string& esrgan_path,
bool offload_params_to_cpu,
int n_threads) {
ggml_log_set(ggml_log_callback_default, nullptr);
std::string error;
if (!backend_manager.init(backend_spec.c_str(),
params_backend_spec.c_str(),
offload_params_to_cpu,
false,
false,
false,
&error)) {
LOG_ERROR("upscaler backend config failed: %s", error.c_str());
return false;
}
auto backend_for = [&](SDBackendModule module) {
ggml_backend_t module_backend = backend_manager.runtime_backend(module);
if (module_backend == nullptr) {
LOG_ERROR("failed to initialize %s backend", sd_backend_module_name(module));
}
return module_backend;
};
auto params_backend_for = [&](SDBackendModule module) {
ggml_backend_t module_backend = backend_manager.params_backend(module);
if (module_backend == nullptr) {
LOG_ERROR("failed to initialize %s params backend", sd_backend_module_name(module));
}
return module_backend;
};
auto ensure_backend_pair = [&](SDBackendModule module) {
if (backend_for(module) == nullptr) {
return false;
}
return params_backend_for(module) != nullptr;
};
if (!ensure_backend_pair(SDBackendModule::UPSCALER)) {
return false;
}
ModelLoader model_loader;
if (!model_loader.init_from_file_and_convert_name(esrgan_path)) {
LOG_ERROR("init model loader from file failed: '%s'", esrgan_path.c_str());
}
model_loader.set_wtype_override(model_data_type);
LOG_INFO("Upscaler weight type: %s", ggml_type_name(model_data_type));
esrgan_upscaler = std::make_shared<ESRGAN>(backend_for(SDBackendModule::UPSCALER),
params_backend_for(SDBackendModule::UPSCALER),
tile_size,
model_loader.get_tensor_storage_map());
esrgan_upscaler->set_max_graph_vram_bytes(max_graph_vram_bytes);
if (direct) {
esrgan_upscaler->set_conv2d_direct_enabled(true);
}
if (!esrgan_upscaler->load_from_file(esrgan_path, n_threads)) {
return false;
}
return true;
}
sd::Tensor<float> UpscalerGGML::upscale_tensor(const sd::Tensor<float>& input_tensor) {
sd::Tensor<float> upscaled;
if (tile_size <= 0 || (input_tensor.shape()[0] <= tile_size && input_tensor.shape()[1] <= tile_size)) {
upscaled = esrgan_upscaler->compute(n_threads, input_tensor);
} else {
auto on_processing = [&](const sd::Tensor<float>& input_tile) -> sd::Tensor<float> {
auto output_tile = esrgan_upscaler->compute(n_threads, input_tile);
if (output_tile.empty()) {
LOG_ERROR("esrgan compute failed while processing a tile");
return {};
}
return output_tile;
};
upscaled = process_tiles_2d(input_tensor,
static_cast<int>(input_tensor.shape()[0] * esrgan_upscaler->scale),
static_cast<int>(input_tensor.shape()[1] * esrgan_upscaler->scale),
esrgan_upscaler->scale,
tile_size,
tile_size,
0.25f,
false,
false,
on_processing);
}
esrgan_upscaler->free_compute_buffer();
if (upscaled.empty()) {
LOG_ERROR("esrgan compute failed");
return {};
}
return upscaled;
}
sd_image_t UpscalerGGML::upscale(sd_image_t input_image, uint32_t upscale_factor) {
// upscale_factor, unused for RealESRGAN_x4plus_anime_6B.pth
sd_image_t upscaled_image = {0, 0, 0, nullptr};
int output_width = (int)input_image.width * esrgan_upscaler->scale;
int output_height = (int)input_image.height * esrgan_upscaler->scale;
LOG_INFO("upscaling from (%i x %i) to (%i x %i)",
input_image.width, input_image.height, output_width, output_height);
sd::Tensor<float> input_tensor = sd_image_to_tensor(input_image);
sd::Tensor<float> upscaled;
int64_t t0 = ggml_time_ms();
upscaled = upscale_tensor(input_tensor);
if (upscaled.empty()) {
return upscaled_image;
}
sd_image_t upscaled_data = tensor_to_sd_image(upscaled);
int64_t t3 = ggml_time_ms();
LOG_INFO("input_image_tensor upscaled, taking %.2fs", (t3 - t0) / 1000.0f);
upscaled_image = upscaled_data;
return upscaled_image;
}
struct upscaler_ctx_t {
UpscalerGGML* upscaler = nullptr;
};
upscaler_ctx_t* new_upscaler_ctx(const char* esrgan_path_c_str,
bool offload_params_to_cpu,
bool direct,
int n_threads,
int tile_size,
const char* backend,
const char* params_backend) {
upscaler_ctx_t* upscaler_ctx = (upscaler_ctx_t*)malloc(sizeof(upscaler_ctx_t));
if (upscaler_ctx == nullptr) {
return nullptr;
}
std::string esrgan_path(esrgan_path_c_str);
upscaler_ctx->upscaler = new UpscalerGGML(n_threads, direct, tile_size, SAFE_STR(backend), SAFE_STR(params_backend));
if (upscaler_ctx->upscaler == nullptr) {
return nullptr;
}
if (!upscaler_ctx->upscaler->load_from_file(esrgan_path, offload_params_to_cpu, n_threads)) {
delete upscaler_ctx->upscaler;
upscaler_ctx->upscaler = nullptr;
free(upscaler_ctx);
return nullptr;
}
return upscaler_ctx;
}
sd_image_t upscale(upscaler_ctx_t* upscaler_ctx, sd_image_t input_image, uint32_t upscale_factor) {
return upscaler_ctx->upscaler->upscale(input_image, upscale_factor);
}
int get_upscale_factor(upscaler_ctx_t* upscaler_ctx) {
if (upscaler_ctx == nullptr || upscaler_ctx->upscaler == nullptr || upscaler_ctx->upscaler->esrgan_upscaler == nullptr) {
return 1;
}
return upscaler_ctx->upscaler->esrgan_upscaler->scale;
}
void free_upscaler_ctx(upscaler_ctx_t* upscaler_ctx) {
if (upscaler_ctx->upscaler != nullptr) {
delete upscaler_ctx->upscaler;
upscaler_ctx->upscaler = nullptr;
}
free(upscaler_ctx);
}