From fab2ff0687ffb6e2f57e256900a55c616a52cf58 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Mon, 20 Oct 2025 10:45:34 +0800 Subject: [PATCH] sync sd.cpp to e370258 --- otherarch/sdcpp/main.cpp | 197 +++++++++++++++---------- otherarch/sdcpp/stable-diffusion.cpp | 210 ++++++++++++++++++--------- otherarch/sdcpp/stable-diffusion.h | 15 ++ 3 files changed, 276 insertions(+), 146 deletions(-) diff --git a/otherarch/sdcpp/main.cpp b/otherarch/sdcpp/main.cpp index ee16763a3..538e1e64f 100644 --- a/otherarch/sdcpp/main.cpp +++ b/otherarch/sdcpp/main.cpp @@ -41,13 +41,15 @@ const char* modes_str[] = { "img_gen", "vid_gen", "convert", + "upscale", }; -#define SD_ALL_MODES_STR "img_gen, vid_gen, convert" +#define SD_ALL_MODES_STR "img_gen, vid_gen, convert, upscale" enum SDMode { IMG_GEN, VID_GEN, CONVERT, + UPSCALE, MODE_COUNT }; @@ -82,6 +84,7 @@ struct SDParams { std::string prompt; std::string negative_prompt; + int clip_skip = -1; // <= 0 represents unspecified int width = 512; int height = 512; @@ -125,6 +128,8 @@ struct SDParams { int chroma_t5_mask_pad = 1; float flow_shift = INFINITY; + prediction_t prediction = DEFAULT_PRED; + sd_tiling_params_t vae_tiling_params = {false, 0, 0, 0.5f, 0.0f, 0.0f}; SDParams() { @@ -186,6 +191,7 @@ void print_params(SDParams params) { printf(" sample_params: %s\n", SAFE_STR(sample_params_str)); printf(" high_noise_sample_params: %s\n", SAFE_STR(high_noise_sample_params_str)); printf(" moe_boundary: %.3f\n", params.moe_boundary); + printf(" prediction: %s\n", sd_prediction_name(params.prediction)); printf(" flow_shift: %.2f\n", params.flow_shift); printf(" strength(img2img): %.2f\n", params.strength); printf(" rng: %s\n", sd_rng_type_name(params.rng_type)); @@ -208,7 +214,7 @@ void print_usage(int argc, const char* argv[]) { printf("\n"); printf("arguments:\n"); printf(" -h, --help show this help message and exit\n"); - printf(" -M, --mode [MODE] run mode, one of: [img_gen, vid_gen, convert], default: img_gen\n"); + printf(" -M, --mode [MODE] run mode, one of: [img_gen, vid_gen, upscale, convert], default: img_gen\n"); printf(" -t, --threads N number of threads to use during computation (default: -1)\n"); printf(" If threads <= 0, then threads will be set to the number of CPU physical cores\n"); printf(" --offload-to-cpu place the weights in RAM to save VRAM, and automatically load them into VRAM when needed\n"); @@ -225,7 +231,7 @@ void print_usage(int argc, const char* argv[]) { printf(" --taesd [TAESD_PATH] path to taesd. Using Tiny AutoEncoder for fast decoding (low quality)\n"); printf(" --control-net [CONTROL_PATH] path to control net model\n"); printf(" --embd-dir [EMBEDDING_PATH] path to embeddings\n"); - printf(" --upscale-model [ESRGAN_PATH] path to esrgan model. Upscale images after generate, just RealESRGAN_x4plus_anime_6B supported by now\n"); + printf(" --upscale-model [ESRGAN_PATH] path to esrgan model. For img_gen mode, upscale images after generate, just RealESRGAN_x4plus_anime_6B supported by now\n"); printf(" --upscale-repeats Run the ESRGAN upscaler this many times (default 1)\n"); printf(" --type [TYPE] weight type (examples: f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0, q2_K, q3_K, q4_K)\n"); printf(" If not specified, the default is the type of the weight file\n"); @@ -279,6 +285,7 @@ void print_usage(int argc, const char* argv[]) { printf(" --rng {std_default, cuda} RNG (default: cuda)\n"); printf(" -s SEED, --seed SEED RNG seed (default: 42, use random seed for < 0)\n"); printf(" -b, --batch-count COUNT number of images to generate\n"); + printf(" --prediction {eps, v, edm_v, sd3_flow, flux_flow} Prediction type override.\n"); printf(" --clip-skip N ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1)\n"); printf(" <= 0 represents unspecified, will be 1 for SD1.x, 2 for SD2.x\n"); printf(" --vae-tiling process vae in tiles to reduce memory usage\n"); @@ -649,6 +656,20 @@ void parse_args(int argc, const char** argv, SDParams& params) { return 1; }; + auto on_prediction_arg = [&](int argc, const char** argv, int index) { + if (++index >= argc) { + return -1; + } + const char* arg = argv[index]; + params.prediction = str_to_prediction(arg); + if (params.prediction == PREDICTION_COUNT) { + fprintf(stderr, "error: invalid prediction type %s\n", + arg); + return -1; + } + return 1; + }; + auto on_sample_method_arg = [&](int argc, const char** argv, int index) { if (++index >= argc) { return -1; @@ -805,6 +826,7 @@ void parse_args(int argc, const char** argv, SDParams& params) { {"", "--rng", "", on_rng_arg}, {"-s", "--seed", "", on_seed_arg}, {"", "--sampling-method", "", on_sample_method_arg}, + {"", "--prediction", "", on_prediction_arg}, {"", "--scheduler", "", on_schedule_arg}, {"", "--skip-layers", "", on_skip_layers_arg}, {"", "--high-noise-sampling-method", "", on_high_noise_sample_method_arg}, @@ -825,13 +847,13 @@ void parse_args(int argc, const char** argv, SDParams& params) { params.n_threads = sd_get_num_physical_cores(); } - if (params.mode != CONVERT && params.mode != VID_GEN && params.prompt.length() == 0) { + if ((params.mode == IMG_GEN || params.mode == VID_GEN) && params.prompt.length() == 0) { fprintf(stderr, "error: the following arguments are required: prompt\n"); print_usage(argc, argv); exit(1); } - if (params.model_path.length() == 0 && params.diffusion_model_path.length() == 0) { + if (params.mode != UPSCALE && params.model_path.length() == 0 && params.diffusion_model_path.length() == 0) { fprintf(stderr, "error: the following arguments are required: model_path/diffusion_model\n"); print_usage(argc, argv); exit(1); @@ -891,6 +913,17 @@ void parse_args(int argc, const char** argv, SDParams& params) { exit(1); } + if (params.mode == UPSCALE) { + if (params.esrgan_path.length() == 0) { + fprintf(stderr, "error: upscale mode needs an upscaler model (--upscale-model)\n"); + exit(1); + } + if (params.init_image_path.length() == 0) { + fprintf(stderr, "error: upscale mode needs an init image (--init-img)\n"); + exit(1); + } + } + if (params.seed < 0) { srand((int)time(NULL)); params.seed = rand(); @@ -901,14 +934,6 @@ void parse_args(int argc, const char** argv, SDParams& params) { params.output_path = "output.gguf"; } } - - if (!isfinite(params.sample_params.guidance.img_cfg)) { - params.sample_params.guidance.img_cfg = params.sample_params.guidance.txt_cfg; - } - - if (!isfinite(params.high_noise_sample_params.guidance.img_cfg)) { - params.high_noise_sample_params.guidance.img_cfg = params.high_noise_sample_params.guidance.txt_cfg; - } } static std::string sd_basename(const std::string& path) { @@ -1349,6 +1374,7 @@ int main(int argc, const char* argv[]) { params.n_threads, params.wtype, params.rng_type, + params.prediction, params.offload_params_to_cpu, params.clip_on_cpu, params.control_net_cpu, @@ -1362,76 +1388,92 @@ int main(int argc, const char* argv[]) { params.flow_shift, }; - sd_ctx_t* sd_ctx = new_sd_ctx(&sd_ctx_params); + sd_image_t* results = nullptr; + int num_results = 0; - if (sd_ctx == NULL) { - printf("new_sd_ctx_t failed\n"); - release_all_resources(); - return 1; - } + if (params.mode == UPSCALE) { + num_results = 1; + results = (sd_image_t*)calloc(num_results, sizeof(sd_image_t)); + if (results == NULL) { + printf("failed to allocate results array\n"); + release_all_resources(); + return 1; + } - if (params.sample_params.sample_method == SAMPLE_METHOD_DEFAULT) { - params.sample_params.sample_method = sd_get_default_sample_method(sd_ctx); - } + results[0] = init_image; + init_image.data = NULL; + } else { + sd_ctx_t* sd_ctx = new_sd_ctx(&sd_ctx_params); - sd_image_t* results; - int num_results = 1; - if (params.mode == IMG_GEN) { - sd_img_gen_params_t img_gen_params = { - params.prompt.c_str(), - params.negative_prompt.c_str(), - params.clip_skip, - init_image, - ref_images.data(), - (int)ref_images.size(), - params.increase_ref_index, - mask_image, - params.width, - params.height, - params.sample_params, - params.strength, - params.seed, - params.batch_count, - control_image, - params.control_strength, - { - pmid_images.data(), - (int)pmid_images.size(), - params.pm_id_embed_path.c_str(), - params.pm_style_strength, - }, // pm_params - params.vae_tiling_params, - }; + if (sd_ctx == NULL) { + printf("new_sd_ctx_t failed\n"); + release_all_resources(); + return 1; + } - results = generate_image(sd_ctx, &img_gen_params); - num_results = params.batch_count; - } else if (params.mode == VID_GEN) { - sd_vid_gen_params_t vid_gen_params = { - params.prompt.c_str(), - params.negative_prompt.c_str(), - params.clip_skip, - init_image, - end_image, - control_frames.data(), - (int)control_frames.size(), - params.width, - params.height, - params.sample_params, - params.high_noise_sample_params, - params.moe_boundary, - params.strength, - params.seed, - params.video_frames, - params.vace_strength, - }; + if (params.sample_params.sample_method == SAMPLE_METHOD_DEFAULT) { + params.sample_params.sample_method = sd_get_default_sample_method(sd_ctx); + } - results = generate_video(sd_ctx, &vid_gen_params, &num_results); - } + if (params.mode == IMG_GEN) { + sd_img_gen_params_t img_gen_params = { + params.prompt.c_str(), + params.negative_prompt.c_str(), + params.clip_skip, + init_image, + ref_images.data(), + (int)ref_images.size(), + params.increase_ref_index, + mask_image, + params.width, + params.height, + params.sample_params, + params.strength, + params.seed, + params.batch_count, + control_image, + params.control_strength, + { + pmid_images.data(), + (int)pmid_images.size(), + params.pm_id_embed_path.c_str(), + params.pm_style_strength, + }, // pm_params + params.vae_tiling_params, + }; + + results = generate_image(sd_ctx, &img_gen_params); + num_results = params.batch_count; + } else if (params.mode == VID_GEN) { + sd_vid_gen_params_t vid_gen_params = { + params.prompt.c_str(), + params.negative_prompt.c_str(), + params.clip_skip, + init_image, + end_image, + control_frames.data(), + (int)control_frames.size(), + params.width, + params.height, + params.sample_params, + params.high_noise_sample_params, + params.moe_boundary, + params.strength, + params.seed, + params.video_frames, + params.vace_strength, + }; + + results = generate_video(sd_ctx, &vid_gen_params, &num_results); + } + + if (results == NULL) { + printf("generate failed\n"); + free_sd_ctx(sd_ctx); + return 1; + } - if (results == NULL) { - printf("generate failed\n"); free_sd_ctx(sd_ctx); - return 1; } int upscale_factor = 4; // unused for RealESRGAN_x4plus_anime_6B.pth @@ -1444,7 +1486,7 @@ int main(int argc, const char* argv[]) { if (upscaler_ctx == NULL) { printf("new_upscaler_ctx failed\n"); } else { - for (int i = 0; i < params.batch_count; i++) { + for (int i = 0; i < num_results; i++) { if (results[i].data == NULL) { continue; } @@ -1530,7 +1572,6 @@ int main(int argc, const char* argv[]) { results[i].data = NULL; } free(results); - free_sd_ctx(sd_ctx); release_all_resources(); diff --git a/otherarch/sdcpp/stable-diffusion.cpp b/otherarch/sdcpp/stable-diffusion.cpp index db788c99b..52b8ed28c 100644 --- a/otherarch/sdcpp/stable-diffusion.cpp +++ b/otherarch/sdcpp/stable-diffusion.cpp @@ -836,64 +836,102 @@ public: ggml_backend_is_cpu(clip_backend) ? "RAM" : "VRAM"); } - // check is_using_v_parameterization_for_sd2 - if (sd_version_is_sd2(version)) { - if (is_using_v_parameterization_for_sd2(ctx, sd_version_is_inpaint(version))) { - is_using_v_parameterization = true; - } - } else if (sd_version_is_sdxl(version)) { - if (model_loader.tensor_storages_types.find("edm_vpred.sigma_max") != model_loader.tensor_storages_types.end()) { - // CosXL models - // TODO: get sigma_min and sigma_max values from file - is_using_edm_v_parameterization = true; - } - if (model_loader.tensor_storages_types.find("v_pred") != model_loader.tensor_storages_types.end()) { - is_using_v_parameterization = true; - } - } else if (version == VERSION_SVD) { - // TODO: V_PREDICTION_EDM - is_using_v_parameterization = true; - } - - if (sd_version_is_sd3(version)) { - LOG_INFO("running in FLOW mode"); - float shift = sd_ctx_params->flow_shift; - if (shift == INFINITY) { - shift = 3.0; - } - denoiser = std::make_shared(shift); - } else if (sd_version_is_flux(version)) { - LOG_INFO("running in Flux FLOW mode"); - float shift = 1.0f; // TODO: validate - for (auto pair : model_loader.tensor_storages_types) { - if (pair.first.find("model.diffusion_model.guidance_in.in_layer.weight") != std::string::npos) { - shift = 1.15f; + if (sd_ctx_params->prediction != DEFAULT_PRED) { + switch (sd_ctx_params->prediction) { + case EPS_PRED: + LOG_INFO("running in eps-prediction mode"); + break; + case V_PRED: + LOG_INFO("running in v-prediction mode"); + denoiser = std::make_shared(); + break; + case EDM_V_PRED: + LOG_INFO("running in v-prediction EDM mode"); + denoiser = std::make_shared(); + break; + case SD3_FLOW_PRED: { + LOG_INFO("running in FLOW mode"); + float shift = sd_ctx_params->flow_shift; + if (shift == INFINITY) { + shift = 3.0; + } + denoiser = std::make_shared(shift); break; } + case FLUX_FLOW_PRED: { + LOG_INFO("running in Flux FLOW mode"); + float shift = sd_ctx_params->flow_shift; + if (shift == INFINITY) { + shift = 3.0; + } + denoiser = std::make_shared(shift); + break; + } + default: { + LOG_ERROR("Unknown parametrization %i", sd_ctx_params->prediction); + return false; + } } - denoiser = std::make_shared(shift); - } else if (sd_version_is_wan(version)) { - LOG_INFO("running in FLOW mode"); - float shift = sd_ctx_params->flow_shift; - if (shift == INFINITY) { - shift = 5.0; - } - denoiser = std::make_shared(shift); - } else if (sd_version_is_qwen_image(version)) { - LOG_INFO("running in FLOW mode"); - float shift = sd_ctx_params->flow_shift; - if (shift == INFINITY) { - shift = 3.0; - } - denoiser = std::make_shared(shift); - } else if (is_using_v_parameterization) { - LOG_INFO("running in v-prediction mode"); - denoiser = std::make_shared(); - } else if (is_using_edm_v_parameterization) { - LOG_INFO("running in v-prediction EDM mode"); - denoiser = std::make_shared(); } else { - LOG_INFO("running in eps-prediction mode"); + if (sd_version_is_sd2(version)) { + // check is_using_v_parameterization_for_sd2 + if (is_using_v_parameterization_for_sd2(ctx, sd_version_is_inpaint(version))) { + is_using_v_parameterization = true; + } + } else if (sd_version_is_sdxl(version)) { + if (model_loader.tensor_storages_types.find("edm_vpred.sigma_max") != model_loader.tensor_storages_types.end()) { + // CosXL models + // TODO: get sigma_min and sigma_max values from file + is_using_edm_v_parameterization = true; + } + if (model_loader.tensor_storages_types.find("v_pred") != model_loader.tensor_storages_types.end()) { + is_using_v_parameterization = true; + } + } else if (version == VERSION_SVD) { + // TODO: V_PREDICTION_EDM + is_using_v_parameterization = true; + } + + if (sd_version_is_sd3(version)) { + LOG_INFO("running in FLOW mode"); + float shift = sd_ctx_params->flow_shift; + if (shift == INFINITY) { + shift = 3.0; + } + denoiser = std::make_shared(shift); + } else if (sd_version_is_flux(version)) { + LOG_INFO("running in Flux FLOW mode"); + float shift = 1.0f; // TODO: validate + for (auto pair : model_loader.tensor_storages_types) { + if (pair.first.find("model.diffusion_model.guidance_in.in_layer.weight") != std::string::npos) { + shift = 1.15f; + break; + } + } + denoiser = std::make_shared(shift); + } else if (sd_version_is_wan(version)) { + LOG_INFO("running in FLOW mode"); + float shift = sd_ctx_params->flow_shift; + if (shift == INFINITY) { + shift = 5.0; + } + denoiser = std::make_shared(shift); + } else if (sd_version_is_qwen_image(version)) { + LOG_INFO("running in FLOW mode"); + float shift = sd_ctx_params->flow_shift; + if (shift == INFINITY) { + shift = 3.0; + } + denoiser = std::make_shared(shift); + } else if (is_using_v_parameterization) { + LOG_INFO("running in v-prediction mode"); + denoiser = std::make_shared(); + } else if (is_using_edm_v_parameterization) { + LOG_INFO("running in v-prediction EDM mode"); + denoiser = std::make_shared(); + } else { + LOG_INFO("running in eps-prediction mode"); + } } auto comp_vis_denoiser = std::dynamic_pointer_cast(denoiser); @@ -1281,7 +1319,7 @@ public: std::vector skip_layers(guidance.slg.layers, guidance.slg.layers + guidance.slg.layer_count); float cfg_scale = guidance.txt_cfg; - float img_cfg_scale = guidance.img_cfg; + float img_cfg_scale = isfinite(guidance.img_cfg) ? guidance.img_cfg : guidance.txt_cfg; float slg_scale = guidance.slg.scale; if (img_cfg_scale != cfg_scale && !sd_version_is_inpaint_or_unet_edit(version)) { @@ -1325,11 +1363,12 @@ public: } struct ggml_tensor* denoised = ggml_dup_tensor(work_ctx, x); + int64_t t0 = ggml_time_us(); + auto denoise = [&](ggml_tensor* input, float sigma, int step) -> ggml_tensor* { - if (step == 1) { + if (step == 1 || step == -1) { pretty_progress(0, (int)steps, 0); } - int64_t t0 = ggml_time_us(); std::vector scaling = denoiser->get_scalings(sigma); GGML_ASSERT(scaling.size() == 3); @@ -1483,8 +1522,9 @@ public: } int64_t t1 = ggml_time_us(); - if (step > 0) { - pretty_progress(step, (int)steps, (t1 - t0) / 1000000.f); + if (step > 0 || step == -(int)steps) { + int showstep = std::abs(step); + pretty_progress(showstep, (int)steps, (t1 - t0) / 1000000.f / showstep); // LOG_INFO("step %d sampling completed taking %.2fs", step, (t1 - t0) * 1.0f / 1000000); } if (denoise_mask != nullptr) { @@ -1625,19 +1665,19 @@ public: if (vae_tiling_params.enabled && !encode_video) { // TODO wan2.2 vae support? int C = sd_version_is_dit(version) ? 16 : 4; - int NE2, NE3; + int ne2; + int ne3; if (sd_version_is_qwen_image(version)) { - NE2 = x->ne[3]; - NE3 = C; - } - else { + ne2 = 1; + ne3 = C * x->ne[3]; + } else { if (!use_tiny_autoencoder) { C *= 2; } - NE2 = C; - NE3 = x->ne[3]; + ne2 = C; + ne3 = x->ne[3]; } - result = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, W, H, NE2, NE3); + result = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, W, H, ne2, ne3); } if (sd_version_is_qwen_image(version)) { @@ -1911,6 +1951,31 @@ enum scheduler_t str_to_schedule(const char* str) { return SCHEDULE_COUNT; } +const char* prediction_to_str[] = { + "default", + "eps", + "v", + "edm_v", + "sd3_flow", + "flux_flow", +}; + +const char* sd_prediction_name(enum prediction_t prediction) { + if (prediction < PREDICTION_COUNT) { + return prediction_to_str[prediction]; + } + return NONE_STR; +} + +enum prediction_t str_to_prediction(const char* str) { + for (int i = 0; i < PREDICTION_COUNT; i++) { + if (!strcmp(str, prediction_to_str[i])) { + return (enum prediction_t)i; + } + } + return PREDICTION_COUNT; +} + void sd_ctx_params_init(sd_ctx_params_t* sd_ctx_params) { *sd_ctx_params = {}; sd_ctx_params->vae_decode_only = true; @@ -1918,6 +1983,7 @@ void sd_ctx_params_init(sd_ctx_params_t* sd_ctx_params) { sd_ctx_params->n_threads = sd_get_num_physical_cores(); sd_ctx_params->wtype = SD_TYPE_COUNT; sd_ctx_params->rng_type = CUDA_RNG; + sd_ctx_params->prediction = DEFAULT_PRED; sd_ctx_params->offload_params_to_cpu = false; sd_ctx_params->keep_clip_on_cpu = false; sd_ctx_params->keep_control_net_on_cpu = false; @@ -1957,6 +2023,7 @@ char* sd_ctx_params_to_str(const sd_ctx_params_t* sd_ctx_params) { "n_threads: %d\n" "wtype: %s\n" "rng_type: %s\n" + "prediction: %s\n" "offload_params_to_cpu: %s\n" "keep_clip_on_cpu: %s\n" "keep_control_net_on_cpu: %s\n" @@ -1985,6 +2052,7 @@ char* sd_ctx_params_to_str(const sd_ctx_params_t* sd_ctx_params) { sd_ctx_params->n_threads, sd_type_name(sd_ctx_params->wtype), sd_rng_type_name(sd_ctx_params->rng_type), + sd_prediction_name(sd_ctx_params->prediction), BOOL_STR(sd_ctx_params->offload_params_to_cpu), BOOL_STR(sd_ctx_params->keep_clip_on_cpu), BOOL_STR(sd_ctx_params->keep_control_net_on_cpu), @@ -2031,7 +2099,9 @@ char* sd_sample_params_to_str(const sd_sample_params_t* sample_params) { "eta: %.2f, " "shifted_timestep: %d)", sample_params->guidance.txt_cfg, - sample_params->guidance.img_cfg, + isfinite(sample_params->guidance.img_cfg) + ? sample_params->guidance.img_cfg + : sample_params->guidance.txt_cfg, sample_params->guidance.distilled_guidance, sample_params->guidance.slg.layer_count, sample_params->guidance.slg.layer_start, @@ -2193,6 +2263,10 @@ sd_image_t* generate_image_internal(sd_ctx_t* sd_ctx, seed = rand(); } + if (!isfinite(guidance.img_cfg)) { + guidance.img_cfg = guidance.txt_cfg; + } + // for (auto v : sigmas) { // std::cout << v << " "; // } diff --git a/otherarch/sdcpp/stable-diffusion.h b/otherarch/sdcpp/stable-diffusion.h index bc5d9a0d2..7c638469e 100644 --- a/otherarch/sdcpp/stable-diffusion.h +++ b/otherarch/sdcpp/stable-diffusion.h @@ -64,6 +64,16 @@ enum scheduler_t { SCHEDULE_COUNT }; +enum prediction_t { + DEFAULT_PRED, + EPS_PRED, + V_PRED, + EDM_V_PRED, + SD3_FLOW_PRED, + FLUX_FLOW_PRED, + PREDICTION_COUNT +}; + // same as enum ggml_type enum sd_type_t { SD_TYPE_F32 = 0, @@ -146,6 +156,7 @@ typedef struct { int n_threads; enum sd_type_t wtype; enum rng_type_t rng_type; + enum prediction_t prediction; bool offload_params_to_cpu; bool keep_clip_on_cpu; bool keep_control_net_on_cpu; @@ -255,6 +266,8 @@ SD_API const char* sd_sample_method_name(enum sample_method_t sample_method); SD_API enum sample_method_t str_to_sample_method(const char* str); SD_API const char* sd_schedule_name(enum scheduler_t scheduler); SD_API enum scheduler_t str_to_schedule(const char* str); +SD_API const char* sd_prediction_name(enum prediction_t prediction); +SD_API enum prediction_t str_to_prediction(const char* str); SD_API void sd_ctx_params_init(sd_ctx_params_t* sd_ctx_params); SD_API char* sd_ctx_params_to_str(const sd_ctx_params_t* sd_ctx_params); @@ -285,6 +298,8 @@ SD_API sd_image_t upscale(upscaler_ctx_t* upscaler_ctx, sd_image_t input_image, uint32_t upscale_factor); +SD_API int get_upscale_factor(upscaler_ctx_t* upscaler_ctx); + SD_API bool convert(const char* input_path, const char* vae_path, const char* output_path,