WIP on sdcpp integration

2025-09-10 17:14:36 +00:00 · 2024-02-29 00:40:07 +08:00 · 2024-02-29 00:40:07 +08:00 · f75e479db0
commit f75e479db0
parent 1e3ac7d803
12 changed files with 154 additions and 664 deletions
--- a/3
+++ b/3
@ -479,7 +479,7 @@ expose.o: expose.cpp expose.h
 	$(CXX) $(CXXFLAGS) -c $< -o $@

 # sd.cpp objects
-sdcpp_default.o: otherarch/sdcpp/util.cpp otherarch/sdcpp/sd_adapter.cpp otherarch/sdcpp/stable-diffusion.cpp otherarch/sdcpp/upscaler.cpp otherarch/sdcpp/model.cpp otherarch/sdcpp/thirdparty/zip.c
+sdcpp_default.o: otherarch/sdcpp/sd_adapter.cpp otherarch/sdcpp/stable-diffusion.h otherarch/sdcpp/stable-diffusion.cpp otherarch/sdcpp/util.cpp otherarch/sdcpp/upscaler.cpp otherarch/sdcpp/model.cpp otherarch/sdcpp/thirdparty/zip.c
 	$(CXX) $(CXXFLAGS) -c $< -o $@

 # idiotic "for easier compilation"
@ -608,6 +608,5 @@ quantize_mpt: ggml.o llama.o ggml-quants.o ggml-alloc.o ggml-backend.o otherarch
 simpleclinfo: simpleclinfo.cpp
 	$(CXX) $(CXXFLAGS) $^ lib/OpenCL.lib lib/clblast.lib -o $@ $(LDFLAGS)

-
 build-info.h:
 	$(DONOTHING)
--- a/expose.cpp
+++ b/expose.cpp
@ -211,6 +211,15 @@ extern "C"
        return gpttype_generate(inputs, output);
    }

+    bool load_model_sd(const load_sd_model_inputs inputs)
+    {
+        return sdtype_load_model(inputs);
+    }
+    sd_generation_outputs generate_sd(const sd_generation_inputs inputs, sd_generation_outputs &output)
+    {
+        return sdtype_generate(inputs, output);
+    }
+
    const char * new_token(int idx) {
        if (generated_tokens.size() <= idx || idx < 0) return nullptr;

@ -263,4 +272,6 @@ extern "C"
        output.ids = toks.data(); //this may be slightly unsafe
        return output;
    }
+
+
 }
--- a/expose.h
+++ b/expose.h
@ -20,7 +20,7 @@ enum stop_reason
 {
    INVALID=-1,
    OUT_OF_TOKENS=0,
-    EOS_TOKEN=1,
+    EOS_TOKEN_HIT=1,
    CUSTOM_STOPPER=2,
 };
 struct logit_bias {
@ -92,13 +92,31 @@ struct generation_inputs
 struct generation_outputs
 {
    int status = -1;
-    char text[32768]; //32kb should be enough for any response
+    char text[24576]; //24kb should be enough for any response
 };
 struct token_count_outputs
 {
    int count = 0;
    int * ids; //we'll just use shared memory for this one, bit of a hack
 };
+struct load_sd_model_inputs
+{
+    const char * model_filename;
+};
+struct sd_generation_inputs
+{
+    const char * prompt;
+    const char * negative_prompt;
+    const float cfg_scale;
+    const int sample_steps;
+    const int seed;
+    const char * sample_method;
+};
+struct sd_generation_outputs
+{
+    int status = -1;
+    char data[24576];
+};

 extern std::string executable_path;
 extern std::string lora_filename;
--- a/gpttype_adapter.cpp
+++ b/gpttype_adapter.cpp
@ -949,7 +949,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
        #if defined(GGML_USE_CLBLAST)
        if(file_format==FileFormat::GGUF_GENERIC && model_params.n_gpu_layers>0)
        {
-            if(file_format_meta.model_architecture == GGUFArch::FALCON)
+            if(file_format_meta.model_architecture == GGUFArch::ARCH_FALCON)
            {
                printf("\nOpenCL does not support GPU Layer offloading for this model architecture! GPU Offload has been disabled.\n");
                model_params.n_gpu_layers = 0;
@ -2032,7 +2032,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
                    printf("\n(EOS token triggered!)");
                }
                remaining_tokens = 0;
-                last_stop_reason = stop_reason::EOS_TOKEN;
+                last_stop_reason = stop_reason::EOS_TOKEN_HIT;
            }

            for (const auto &matched : stop_sequence)
--- a/kcpp_docs.embd
+++ b/kcpp_docs.embd
@ -316,7 +316,7 @@
                         },
                         "stop_reason": {
                            "type": "integer",
-                            "description": "Reason the generation stopped. INVALID=-1, OUT_OF_TOKENS=0, EOS_TOKEN=1, CUSTOM_STOPPER=2"
+                            "description": "Reason the generation stopped. INVALID=-1, OUT_OF_TOKENS=0, EOS_TOKEN_HIT=1, CUSTOM_STOPPER=2"
                         },
                         "queue": {
                            "type": "integer",
--- a/koboldcpp.py
+++ b/koboldcpp.py
@ -87,7 +87,7 @@ class generation_inputs(ctypes.Structure):

 class generation_outputs(ctypes.Structure):
    _fields_ = [("status", ctypes.c_int),
-                ("text", ctypes.c_char * 32768)]
+                ("text", ctypes.c_char * 24576)]

 class token_count_outputs(ctypes.Structure):
    _fields_ = [("count", ctypes.c_int),
--- a/model_adapter.cpp
+++ b/model_adapter.cpp
@ -284,14 +284,14 @@ void print_tok_vec(std::vector<float> &embd)

            int filever = gguf_get_version(ctx);
            fileformatmeta->fileversion = filever;
-            fileformatmeta->model_architecture = GGUFArch::DEFAULT;
+            fileformatmeta->model_architecture = GGUFArch::ARCH_DEFAULT;
            if(modelarch=="phi2")
            {
-                fileformatmeta->model_architecture = GGUFArch::PHI;
+                fileformatmeta->model_architecture = GGUFArch::ARCH_PHI;
            }
            else if(modelarch=="falcon")
            {
-                fileformatmeta->model_architecture = GGUFArch::FALCON;
+                fileformatmeta->model_architecture = GGUFArch::ARCH_FALCON;
            }
        }
        gguf_free(ctx);
--- a/model_adapter.h
+++ b/model_adapter.h
@ -52,16 +52,16 @@ enum FileFormat

 enum GGUFArch
 {
-    DEFAULT = 0, //used for llama and other generic gguf
-    FALCON = 1,
-    PHI = 2,
+    ARCH_DEFAULT = 0, //used for llama and other generic gguf
+    ARCH_FALCON = 1,
+    ARCH_PHI = 2,
 };

 struct FileFormatExtraMeta
 {
    int n_ctx_train = 2048;
    int fileversion = 0;
-    GGUFArch model_architecture = GGUFArch::DEFAULT;
+    GGUFArch model_architecture = GGUFArch::ARCH_DEFAULT;
    int n_expert_count = 0;
 };

@ -78,6 +78,9 @@ bool gpttype_generate_abort();
 const std::string & gpttype_get_pending_output();
 std::vector<int> gpttype_get_token_arr(const std::string & input);

+bool sdtype_load_model(const load_sd_model_inputs inputs);
+sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs, sd_generation_outputs &output);
+
 void timer_start();
 double timer_check();
 void print_tok_vec(std::vector<int> &embd);
--- a/otherarch/sdcpp/main.cpp
+++ b/otherarch/sdcpp/main.cpp
@ -440,7 +440,7 @@ void parse_args(int argc, const char** argv, SDParams& params) {
        exit(1);
    }
    if (params.n_threads <= 0) {
-        params.n_threads = get_num_physical_cores();
+        params.n_threads = sd_get_num_physical_cores();
    }

    if (params.mode != CONVERT && params.mode != IMG2VID && params.prompt.length() == 0) {
--- a/otherarch/sdcpp/sd_adapter.cpp
+++ b/otherarch/sdcpp/sd_adapter.cpp
@ -6,6 +6,14 @@
 #include <string>
 #include <vector>

+#include "model_adapter.h"
+
+#include "stable-diffusion.cpp"
+#include "util.cpp"
+#include "upscaler.cpp"
+#include "model.cpp"
+#include "zip.c"
+
 // #include "preprocessing.hpp"
 #include "stable-diffusion.h"

@ -99,679 +107,130 @@ struct SDParams {
    int upscale_repeats           = 1;
 };

-void print_params(SDParams params) {
-    printf("Option: \n");
-    printf("    n_threads:         %d\n", params.n_threads);
-    printf("    mode:              %s\n", modes_str[params.mode]);
-    printf("    model_path:        %s\n", params.model_path.c_str());
-    printf("    wtype:             %s\n", params.wtype < SD_TYPE_COUNT ? sd_type_name(params.wtype) : "unspecified");
-    printf("    vae_path:          %s\n", params.vae_path.c_str());
-    printf("    taesd_path:        %s\n", params.taesd_path.c_str());
-    printf("    esrgan_path:       %s\n", params.esrgan_path.c_str());
-    printf("    controlnet_path:   %s\n", params.controlnet_path.c_str());
-    printf("    embeddings_path:   %s\n", params.embeddings_path.c_str());
-    printf("    output_path:       %s\n", params.output_path.c_str());
-    printf("    init_img:          %s\n", params.input_path.c_str());
-    printf("    control_image:     %s\n", params.control_image_path.c_str());
-    printf("    controlnet cpu:    %s\n", params.control_net_cpu ? "true" : "false");
-    printf("    strength(control): %.2f\n", params.control_strength);
-    printf("    prompt:            %s\n", params.prompt.c_str());
-    printf("    negative_prompt:   %s\n", params.negative_prompt.c_str());
-    printf("    min_cfg:           %.2f\n", params.min_cfg);
-    printf("    cfg_scale:         %.2f\n", params.cfg_scale);
-    printf("    clip_skip:         %d\n", params.clip_skip);
-    printf("    width:             %d\n", params.width);
-    printf("    height:            %d\n", params.height);
-    printf("    sample_method:     %s\n", sample_method_str[params.sample_method]);
-    printf("    schedule:          %s\n", schedule_str[params.schedule]);
-    printf("    sample_steps:      %d\n", params.sample_steps);
-    printf("    strength(img2img): %.2f\n", params.strength);
-    printf("    rng:               %s\n", rng_type_to_str[params.rng_type]);
-    printf("    seed:              %ld\n", params.seed);
-    printf("    batch_count:       %d\n", params.batch_count);
-    printf("    vae_tiling:        %s\n", params.vae_tiling ? "true" : "false");
-    printf("    upscale_repeats:   %d\n", params.upscale_repeats);
-}
+//global static vars for SD
+static SDParams * sd_params = nullptr;
+static sd_ctx_t * sd_ctx = nullptr;

-void print_usage(int argc, const char* argv[]) {
-    printf("usage: %s [arguments]\n", argv[0]);
-    printf("\n");
-    printf("arguments:\n");
-    printf("  -h, --help                         show this help message and exit\n");
-    printf("  -M, --mode [MODEL]                 run mode (txt2img or img2img or convert, default: txt2img)\n");
-    printf("  -t, --threads N                    number of threads to use during computation (default: -1).\n");
-    printf("                                     If threads <= 0, then threads will be set to the number of CPU physical cores\n");
-    printf("  -m, --model [MODEL]                path to model\n");
-    printf("  --vae [VAE]                        path to vae\n");
-    printf("  --taesd [TAESD_PATH]               path to taesd. Using Tiny AutoEncoder for fast decoding (low quality)\n");
-    printf("  --control-net [CONTROL_PATH]       path to control net model\n");
-    printf("  --embd-dir [EMBEDDING_PATH]        path to embeddings.\n");
-    printf("  --upscale-model [ESRGAN_PATH]      path to esrgan model. Upscale images after generate, just RealESRGAN_x4plus_anime_6B supported by now.\n");
-    printf("  --upscale-repeats                  Run the ESRGAN upscaler this many times (default 1)\n");
-    printf("  --type [TYPE]                      weight type (f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0)\n");
-    printf("                                     If not specified, the default is the type of the weight file.\n");
-    printf("  --lora-model-dir [DIR]             lora model directory\n");
-    printf("  -i, --init-img [IMAGE]             path to the input image, required by img2img\n");
-    printf("  --control-image [IMAGE]            path to image condition, control net\n");
-    printf("  -o, --output OUTPUT                path to write result image to (default: ./output.png)\n");
-    printf("  -p, --prompt [PROMPT]              the prompt to render\n");
-    printf("  -n, --negative-prompt PROMPT       the negative prompt (default: \"\")\n");
-    printf("  --cfg-scale SCALE                  unconditional guidance scale: (default: 7.0)\n");
-    printf("  --strength STRENGTH                strength for noising/unnoising (default: 0.75)\n");
-    printf("  --control-strength STRENGTH        strength to apply Control Net (default: 0.9)\n");
-    printf("                                     1.0 corresponds to full destruction of information in init image\n");
-    printf("  -H, --height H                     image height, in pixel space (default: 512)\n");
-    printf("  -W, --width W                      image width, in pixel space (default: 512)\n");
-    printf("  --sampling-method {euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, lcm}\n");
-    printf("                                     sampling method (default: \"euler_a\")\n");
-    printf("  --steps  STEPS                     number of sample steps (default: 20)\n");
-    printf("  --rng {std_default, cuda}          RNG (default: cuda)\n");
-    printf("  -s SEED, --seed SEED               RNG seed (default: 42, use random seed for < 0)\n");
-    printf("  -b, --batch-count COUNT            number of images to generate.\n");
-    printf("  --schedule {discrete, karras}      Denoiser sigma schedule (default: discrete)\n");
-    printf("  --clip-skip N                      ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1)\n");
-    printf("                                     <= 0 represents unspecified, will be 1 for SD1.x, 2 for SD2.x\n");
-    printf("  --vae-tiling                       process vae in tiles to reduce memory usage\n");
-    printf("  --control-net-cpu                  keep controlnet in cpu (for low vram)\n");
-    printf("  --canny                            apply canny preprocessor (edge detection)\n");
-    printf("  -v, --verbose                      print extra info\n");
-}
+bool sdtype_load_model(const load_sd_model_inputs inputs) {
+    sd_params = new SDParams();
+    sd_params->model_path = inputs.model_filename;
+    sd_params->wtype = SD_TYPE_F16;
+    sd_params->n_threads = -1; //use physical cores
+    sd_params->input_path = ""; //unused

-void parse_args(int argc, const char** argv, SDParams& params) {
-    bool invalid_arg = false;
-    std::string arg;
-    for (int i = 1; i < argc; i++) {
-        arg = argv[i];
+    bool vae_decode_only = false;

-        if (arg == "-t" || arg == "--threads") {
-            if (++i >= argc) {
-                invalid_arg = true;
-                break;
-            }
-            params.n_threads = std::stoi(argv[i]);
-        } else if (arg == "-M" || arg == "--mode") {
-            if (++i >= argc) {
-                invalid_arg = true;
-                break;
-            }
-            const char* mode_selected = argv[i];
-            int mode_found            = -1;
-            for (int d = 0; d < MODE_COUNT; d++) {
-                if (!strcmp(mode_selected, modes_str[d])) {
-                    mode_found = d;
-                }
-            }
-            if (mode_found == -1) {
-                fprintf(stderr,
-                        "error: invalid mode %s, must be one of [txt2img, img2img, img2vid, convert]\n",
-                        mode_selected);
-                exit(1);
-            }
-            params.mode = (SDMode)mode_found;
-        } else if (arg == "-m" || arg == "--model") {
-            if (++i >= argc) {
-                invalid_arg = true;
-                break;
-            }
-            params.model_path = argv[i];
-        } else if (arg == "--vae") {
-            if (++i >= argc) {
-                invalid_arg = true;
-                break;
-            }
-            params.vae_path = argv[i];
-        } else if (arg == "--taesd") {
-            if (++i >= argc) {
-                invalid_arg = true;
-                break;
-            }
-            params.taesd_path = argv[i];
-        } else if (arg == "--control-net") {
-            if (++i >= argc) {
-                invalid_arg = true;
-                break;
-            }
-            params.controlnet_path = argv[i];
-        } else if (arg == "--upscale-model") {
-            if (++i >= argc) {
-                invalid_arg = true;
-                break;
-            }
-            params.esrgan_path = argv[i];
-        } else if (arg == "--embd-dir") {
-            if (++i >= argc) {
-                invalid_arg = true;
-                break;
-            }
-            params.embeddings_path = argv[i];
-        } else if (arg == "--type") {
-            if (++i >= argc) {
-                invalid_arg = true;
-                break;
-            }
-            std::string type = argv[i];
-            if (type == "f32") {
-                params.wtype = SD_TYPE_F32;
-            } else if (type == "f16") {
-                params.wtype = SD_TYPE_F16;
-            } else if (type == "q4_0") {
-                params.wtype = SD_TYPE_Q4_0;
-            } else if (type == "q4_1") {
-                params.wtype = SD_TYPE_Q4_1;
-            } else if (type == "q5_0") {
-                params.wtype = SD_TYPE_Q5_0;
-            } else if (type == "q5_1") {
-                params.wtype = SD_TYPE_Q5_1;
-            } else if (type == "q8_0") {
-                params.wtype = SD_TYPE_Q8_0;
-            } else {
-                fprintf(stderr, "error: invalid weight format %s, must be one of [f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0]\n",
-                        type.c_str());
-                exit(1);
-            }
-        } else if (arg == "--lora-model-dir") {
-            if (++i >= argc) {
-                invalid_arg = true;
-                break;
-            }
-            params.lora_model_dir = argv[i];
-        } else if (arg == "-i" || arg == "--init-img") {
-            if (++i >= argc) {
-                invalid_arg = true;
-                break;
-            }
-            params.input_path = argv[i];
-        } else if (arg == "--control-image") {
-            if (++i >= argc) {
-                invalid_arg = true;
-                break;
-            }
-            params.control_image_path = argv[i];
-        } else if (arg == "-o" || arg == "--output") {
-            if (++i >= argc) {
-                invalid_arg = true;
-                break;
-            }
-            params.output_path = argv[i];
-        } else if (arg == "-p" || arg == "--prompt") {
-            if (++i >= argc) {
-                invalid_arg = true;
-                break;
-            }
-            params.prompt = argv[i];
-        } else if (arg == "--upscale-repeats") {
-            if (++i >= argc) {
-                invalid_arg = true;
-                break;
-            }
-            params.upscale_repeats = std::stoi(argv[i]);
-            if (params.upscale_repeats < 1) {
-                fprintf(stderr, "error: upscale multiplier must be at least 1\n");
-                exit(1);
-            }
-        } else if (arg == "-n" || arg == "--negative-prompt") {
-            if (++i >= argc) {
-                invalid_arg = true;
-                break;
-            }
-            params.negative_prompt = argv[i];
-        } else if (arg == "--cfg-scale") {
-            if (++i >= argc) {
-                invalid_arg = true;
-                break;
-            }
-            params.cfg_scale = std::stof(argv[i]);
-        } else if (arg == "--strength") {
-            if (++i >= argc) {
-                invalid_arg = true;
-                break;
-            }
-            params.strength = std::stof(argv[i]);
-        } else if (arg == "--control-strength") {
-            if (++i >= argc) {
-                invalid_arg = true;
-                break;
-            }
-            params.control_strength = std::stof(argv[i]);
-        } else if (arg == "-H" || arg == "--height") {
-            if (++i >= argc) {
-                invalid_arg = true;
-                break;
-            }
-            params.height = std::stoi(argv[i]);
-        } else if (arg == "-W" || arg == "--width") {
-            if (++i >= argc) {
-                invalid_arg = true;
-                break;
-            }
-            params.width = std::stoi(argv[i]);
-        } else if (arg == "--steps") {
-            if (++i >= argc) {
-                invalid_arg = true;
-                break;
-            }
-            params.sample_steps = std::stoi(argv[i]);
-        } else if (arg == "--clip-skip") {
-            if (++i >= argc) {
-                invalid_arg = true;
-                break;
-            }
-            params.clip_skip = std::stoi(argv[i]);
-        } else if (arg == "--vae-tiling") {
-            params.vae_tiling = true;
-        } else if (arg == "--control-net-cpu") {
-            params.control_net_cpu = true;
-        } else if (arg == "--canny") {
-            params.canny_preprocess = true;
-        } else if (arg == "-b" || arg == "--batch-count") {
-            if (++i >= argc) {
-                invalid_arg = true;
-                break;
-            }
-            params.batch_count = std::stoi(argv[i]);
-        } else if (arg == "--rng") {
-            if (++i >= argc) {
-                invalid_arg = true;
-                break;
-            }
-            std::string rng_type_str = argv[i];
-            if (rng_type_str == "std_default") {
-                params.rng_type = STD_DEFAULT_RNG;
-            } else if (rng_type_str == "cuda") {
-                params.rng_type = CUDA_RNG;
-            } else {
-                invalid_arg = true;
-                break;
-            }
-        } else if (arg == "--schedule") {
-            if (++i >= argc) {
-                invalid_arg = true;
-                break;
-            }
-            const char* schedule_selected = argv[i];
-            int schedule_found            = -1;
-            for (int d = 0; d < N_SCHEDULES; d++) {
-                if (!strcmp(schedule_selected, schedule_str[d])) {
-                    schedule_found = d;
-                }
-            }
-            if (schedule_found == -1) {
-                invalid_arg = true;
-                break;
-            }
-            params.schedule = (schedule_t)schedule_found;
-        } else if (arg == "-s" || arg == "--seed") {
-            if (++i >= argc) {
-                invalid_arg = true;
-                break;
-            }
-            params.seed = std::stoll(argv[i]);
-        } else if (arg == "--sampling-method") {
-            if (++i >= argc) {
-                invalid_arg = true;
-                break;
-            }
-            const char* sample_method_selected = argv[i];
-            int sample_method_found            = -1;
-            for (int m = 0; m < N_SAMPLE_METHODS; m++) {
-                if (!strcmp(sample_method_selected, sample_method_str[m])) {
-                    sample_method_found = m;
-                }
-            }
-            if (sample_method_found == -1) {
-                invalid_arg = true;
-                break;
-            }
-            params.sample_method = (sample_method_t)sample_method_found;
-        } else if (arg == "-h" || arg == "--help") {
-            print_usage(argc, argv);
-            exit(0);
-        } else if (arg == "-v" || arg == "--verbose") {
-            params.verbose = true;
-        } else {
-            fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
-            print_usage(argc, argv);
-            exit(1);
-        }
-    }
-    if (invalid_arg) {
-        fprintf(stderr, "error: invalid parameter for argument: %s\n", arg.c_str());
-        print_usage(argc, argv);
-        exit(1);
-    }
-    if (params.n_threads <= 0) {
-        params.n_threads = get_num_physical_cores();
-    }
-
-    if (params.mode != CONVERT && params.mode != IMG2VID && params.prompt.length() == 0) {
-        fprintf(stderr, "error: the following arguments are required: prompt\n");
-        print_usage(argc, argv);
-        exit(1);
-    }
-
-    if (params.model_path.length() == 0) {
-        fprintf(stderr, "error: the following arguments are required: model_path\n");
-        print_usage(argc, argv);
-        exit(1);
-    }
-
-    if ((params.mode == IMG2IMG || params.mode == IMG2VID) && params.input_path.length() == 0) {
-        fprintf(stderr, "error: when using the img2img mode, the following arguments are required: init-img\n");
-        print_usage(argc, argv);
-        exit(1);
-    }
-
-    if (params.output_path.length() == 0) {
-        fprintf(stderr, "error: the following arguments are required: output_path\n");
-        print_usage(argc, argv);
-        exit(1);
-    }
-
-    if (params.width <= 0 || params.width % 64 != 0) {
-        fprintf(stderr, "error: the width must be a multiple of 64\n");
-        exit(1);
-    }
-
-    if (params.height <= 0 || params.height % 64 != 0) {
-        fprintf(stderr, "error: the height must be a multiple of 64\n");
-        exit(1);
-    }
-
-    if (params.sample_steps <= 0) {
-        fprintf(stderr, "error: the sample_steps must be greater than 0\n");
-        exit(1);
-    }
-
-    if (params.strength < 0.f || params.strength > 1.f) {
-        fprintf(stderr, "error: can only work with strength in [0.0, 1.0]\n");
-        exit(1);
-    }
-
-    if (params.seed < 0) {
-        srand((int)time(NULL));
-        params.seed = rand();
-    }
-
-    if (params.mode == CONVERT) {
-        if (params.output_path == "output.png") {
-            params.output_path = "output.gguf";
-        }
-    }
-}
-
-static std::string sd_basename(const std::string& path) {
-    size_t pos = path.find_last_of('/');
-    if (pos != std::string::npos) {
-        return path.substr(pos + 1);
-    }
-    pos = path.find_last_of('\\');
-    if (pos != std::string::npos) {
-        return path.substr(pos + 1);
-    }
-    return path;
-}
-
-std::string get_image_params(SDParams params, int64_t seed) {
-    std::string parameter_string = params.prompt + "\n";
-    if (params.negative_prompt.size() != 0) {
-        parameter_string += "Negative prompt: " + params.negative_prompt + "\n";
-    }
-    parameter_string += "Steps: " + std::to_string(params.sample_steps) + ", ";
-    parameter_string += "CFG scale: " + std::to_string(params.cfg_scale) + ", ";
-    parameter_string += "Seed: " + std::to_string(seed) + ", ";
-    parameter_string += "Size: " + std::to_string(params.width) + "x" + std::to_string(params.height) + ", ";
-    parameter_string += "Model: " + sd_basename(params.model_path) + ", ";
-    parameter_string += "RNG: " + std::string(rng_type_to_str[params.rng_type]) + ", ";
-    parameter_string += "Sampler: " + std::string(sample_method_str[params.sample_method]);
-    if (params.schedule == KARRAS) {
-        parameter_string += " karras";
-    }
-    parameter_string += ", ";
-    parameter_string += "Version: stable-diffusion.cpp";
-    return parameter_string;
-}
-
-void sd_log_cb(enum sd_log_level_t level, const char* log, void* data) {
-    SDParams* params = (SDParams*)data;
-    if (!params->verbose && level <= SD_LOG_DEBUG) {
-        return;
-    }
-    if (level <= SD_LOG_INFO) {
-        fputs(log, stdout);
-        fflush(stdout);
-    } else {
-        fputs(log, stderr);
-        fflush(stderr);
-    }
-}
-
-int load_sd_model() {
-    SDParams params;
-
-    sd_set_log_callback(sd_log_cb, (void*)&params);
-
-    if (params.verbose) {
-        print_params(params);
-        printf("%s", sd_get_system_info());
-    }
-
-    if (params.mode == CONVERT) {
-        bool success = convert(params.model_path.c_str(), params.vae_path.c_str(), params.output_path.c_str(), params.wtype);
-        if (!success) {
-            fprintf(stderr,
-                    "convert '%s'/'%s' to '%s' failed\n",
-                    params.model_path.c_str(),
-                    params.vae_path.c_str(),
-                    params.output_path.c_str());
-            return 1;
-        } else {
-            printf("convert '%s'/'%s' to '%s' success\n",
-                   params.model_path.c_str(),
-                   params.vae_path.c_str(),
-                   params.output_path.c_str());
-            return 0;
-        }
-    }
-
-    if (params.mode == IMG2VID) {
-        fprintf(stderr, "SVD support is broken, do not use it!!!\n");
-        return 1;
-    }
-
-    bool vae_decode_only        = true;
-    uint8_t* input_image_buffer = NULL;
-    if (params.mode == IMG2IMG || params.mode == IMG2VID) {
-        vae_decode_only = false;
-
-        int c              = 0;
-        input_image_buffer = stbi_load(params.input_path.c_str(), &params.width, &params.height, &c, 3);
-        if (input_image_buffer == NULL) {
-            fprintf(stderr, "load image from '%s' failed\n", params.input_path.c_str());
-            return 1;
-        }
-        if (c != 3) {
-            fprintf(stderr, "input image must be a 3 channels RGB image, but got %d channels\n", c);
-            free(input_image_buffer);
-            return 1;
-        }
-        if (params.width <= 0 || params.width % 64 != 0) {
-            fprintf(stderr, "error: the width of image must be a multiple of 64\n");
-            free(input_image_buffer);
-            return 1;
-        }
-        if (params.height <= 0 || params.height % 64 != 0) {
-            fprintf(stderr, "error: the height of image must be a multiple of 64\n");
-            free(input_image_buffer);
-            return 1;
-        }
-    }
-
-    sd_ctx_t* sd_ctx = new_sd_ctx(params.model_path.c_str(),
-                                  params.vae_path.c_str(),
-                                  params.taesd_path.c_str(),
-                                  params.controlnet_path.c_str(),
-                                  params.lora_model_dir.c_str(),
-                                  params.embeddings_path.c_str(),
+    sd_ctx = new_sd_ctx(sd_params->model_path.c_str(),
+                                  sd_params->vae_path.c_str(),
+                                  sd_params->taesd_path.c_str(),
+                                  sd_params->controlnet_path.c_str(),
+                                  sd_params->lora_model_dir.c_str(),
+                                  sd_params->embeddings_path.c_str(),
                                  vae_decode_only,
-                                  params.vae_tiling,
+                                  sd_params->vae_tiling,
                                  true,
-                                  params.n_threads,
-                                  params.wtype,
-                                  params.rng_type,
-                                  params.schedule,
-                                  params.control_net_cpu);
+                                  sd_params->n_threads,
+                                  sd_params->wtype,
+                                  sd_params->rng_type,
+                                  sd_params->schedule,
+                                  sd_params->control_net_cpu);

    if (sd_ctx == NULL) {
-        printf("new_sd_ctx_t failed\n");
-        return 1;
+        printf("\nError: KCPP SD Failed to create context!\n");
+        return false;
    }

-    sd_image_t* results;
-    if (params.mode == TXT2IMG) {
-        sd_image_t* control_image = NULL;
-        if (params.controlnet_path.size() > 0 && params.control_image_path.size() > 0) {
-            int c              = 0;
-            input_image_buffer = stbi_load(params.control_image_path.c_str(), &params.width, &params.height, &c, 3);
-            if (input_image_buffer == NULL) {
-                fprintf(stderr, "load image from '%s' failed\n", params.control_image_path.c_str());
-                return 1;
-            }
-            control_image = new sd_image_t{(uint32_t)params.width,
-                                           (uint32_t)params.height,
-                                           3,
-                                           input_image_buffer};
-            if (params.canny_preprocess) {  // apply preprocessor
-                control_image->data = preprocess_canny(control_image->data,
-                                                       control_image->width,
-                                                       control_image->height,
-                                                       0.08f,
-                                                       0.08f,
-                                                       0.8f,
-                                                       1.0f,
-                                                       false);
-            }
-        }
+    return true;
+
+}
+
+sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs, sd_generation_outputs &output)
+{
+    if(sd_ctx == nullptr || sd_params == nullptr)
+    {
+        printf("\nError: KCPP SD is not initialized!\n");
+        snprintf(output.data, sizeof(output.data), "%s", "");
+        output.status = 0;
+        return output;
+    }
+    uint8_t * input_image_buffer = NULL;
+    sd_image_t * results;
+    sd_image_t* control_image = NULL;
+
+    sd_params->prompt = inputs.prompt;
+    sd_params->negative_prompt = inputs.negative_prompt;
+    sd_params->cfg_scale = inputs.cfg_scale;
+    sd_params->sample_steps = inputs.sample_steps;
+    sd_params->seed = inputs.seed;
+
+    if(inputs.sample_method=="euler a") //all lowercase
+    {
+        sd_params->sample_method = sample_method_t::EULER_A;
+    }
+    else
+    {
+        sd_params->sample_method = sample_method_t::EULER_A;
+    }
+
+    if (sd_params->mode == TXT2IMG) {
        results = txt2img(sd_ctx,
-                          params.prompt.c_str(),
-                          params.negative_prompt.c_str(),
-                          params.clip_skip,
-                          params.cfg_scale,
-                          params.width,
-                          params.height,
-                          params.sample_method,
-                          params.sample_steps,
-                          params.seed,
-                          params.batch_count,
+                          sd_params->prompt.c_str(),
+                          sd_params->negative_prompt.c_str(),
+                          sd_params->clip_skip,
+                          sd_params->cfg_scale,
+                          sd_params->width,
+                          sd_params->height,
+                          sd_params->sample_method,
+                          sd_params->sample_steps,
+                          sd_params->seed,
+                          sd_params->batch_count,
                          control_image,
-                          params.control_strength);
+                          sd_params->control_strength);
    } else {
-        sd_image_t input_image = {(uint32_t)params.width,
-                                  (uint32_t)params.height,
+        sd_image_t input_image = {(uint32_t)sd_params->width,
+                                  (uint32_t)sd_params->height,
                                  3,
                                  input_image_buffer};
-
-        if (params.mode == IMG2VID) {
-            results = img2vid(sd_ctx,
-                              input_image,
-                              params.width,
-                              params.height,
-                              params.video_frames,
-                              params.motion_bucket_id,
-                              params.fps,
-                              params.augmentation_level,
-                              params.min_cfg,
-                              params.cfg_scale,
-                              params.sample_method,
-                              params.sample_steps,
-                              params.strength,
-                              params.seed);
-            if (results == NULL) {
-                printf("generate failed\n");
-                free_sd_ctx(sd_ctx);
-                return 1;
-            }
-            size_t last            = params.output_path.find_last_of(".");
-            std::string dummy_name = last != std::string::npos ? params.output_path.substr(0, last) : params.output_path;
-            for (int i = 0; i < params.video_frames; i++) {
-                if (results[i].data == NULL) {
-                    continue;
-                }
-                std::string final_image_path = i > 0 ? dummy_name + "_" + std::to_string(i + 1) + ".png" : dummy_name + ".png";
-                stbi_write_png(final_image_path.c_str(), results[i].width, results[i].height, results[i].channel,
-                               results[i].data, 0, get_image_params(params, params.seed + i).c_str());
-                printf("save result image to '%s'\n", final_image_path.c_str());
-                free(results[i].data);
-                results[i].data = NULL;
-            }
-            free(results);
-            free_sd_ctx(sd_ctx);
-            return 0;
-        } else {
-            results = img2img(sd_ctx,
-                              input_image,
-                              params.prompt.c_str(),
-                              params.negative_prompt.c_str(),
-                              params.clip_skip,
-                              params.cfg_scale,
-                              params.width,
-                              params.height,
-                              params.sample_method,
-                              params.sample_steps,
-                              params.strength,
-                              params.seed,
-                              params.batch_count);
-        }
+        results = img2img(sd_ctx,
+                            input_image,
+                            sd_params->prompt.c_str(),
+                            sd_params->negative_prompt.c_str(),
+                            sd_params->clip_skip,
+                            sd_params->cfg_scale,
+                            sd_params->width,
+                            sd_params->height,
+                            sd_params->sample_method,
+                            sd_params->sample_steps,
+                            sd_params->strength,
+                            sd_params->seed,
+                            sd_params->batch_count);
    }

    if (results == NULL) {
-        printf("generate failed\n");
-        free_sd_ctx(sd_ctx);
-        return 1;
+        printf("\nKCPP SD generate failed!\n");
+        snprintf(output.data, sizeof(output.data), "%s", "");
+        output.status = 0;
+        return output;
    }

-    int upscale_factor = 4;  // unused for RealESRGAN_x4plus_anime_6B.pth
-    if (params.esrgan_path.size() > 0 && params.upscale_repeats > 0) {
-        upscaler_ctx_t* upscaler_ctx = new_upscaler_ctx(params.esrgan_path.c_str(),
-                                                        params.n_threads,
-                                                        params.wtype);

-        if (upscaler_ctx == NULL) {
-            printf("new_upscaler_ctx failed\n");
-        } else {
-            for (int i = 0; i < params.batch_count; i++) {
-                if (results[i].data == NULL) {
-                    continue;
-                }
-                sd_image_t current_image = results[i];
-                for (int u = 0; u < params.upscale_repeats; ++u) {
-                    sd_image_t upscaled_image = upscale(upscaler_ctx, current_image, upscale_factor);
-                    if (upscaled_image.data == NULL) {
-                        printf("upscale failed\n");
-                        break;
-                    }
-                    free(current_image.data);
-                    current_image = upscaled_image;
-                }
-                results[i] = current_image;  // Set the final upscaled image as the result
-            }
-        }
-    }
-
-    size_t last            = params.output_path.find_last_of(".");
-    std::string dummy_name = last != std::string::npos ? params.output_path.substr(0, last) : params.output_path;
-    for (int i = 0; i < params.batch_count; i++) {
+    size_t last            = sd_params->output_path.find_last_of(".");
+    std::string dummy_name = last != std::string::npos ? sd_params->output_path.substr(0, last) : sd_params->output_path;
+    for (int i = 0; i < sd_params->batch_count; i++) {
        if (results[i].data == NULL) {
            continue;
        }
        std::string final_image_path = i > 0 ? dummy_name + "_" + std::to_string(i + 1) + ".png" : dummy_name + ".png";
        stbi_write_png(final_image_path.c_str(), results[i].width, results[i].height, results[i].channel,
-                       results[i].data, 0, get_image_params(params, params.seed + i).c_str());
+                       results[i].data, 0, "Made By KoboldCpp");
        printf("save result image to '%s'\n", final_image_path.c_str());
        free(results[i].data);
        results[i].data = NULL;
    }
-    free(results);
-    free_sd_ctx(sd_ctx);

-    return 0;
+    free(results);
+
+    snprintf(output.data, sizeof(output.data), "%s", "");
+    output.status = 1;
+    return output;
 }
--- a/otherarch/sdcpp/stable-diffusion.h
+++ b/otherarch/sdcpp/stable-diffusion.h
@ -94,7 +94,7 @@ enum sd_log_level_t {
 typedef void (*sd_log_cb_t)(enum sd_log_level_t level, const char* text, void* data);

 SD_API void sd_set_log_callback(sd_log_cb_t sd_log_cb, void* data);
-SD_API int32_t get_num_physical_cores();
+SD_API int32_t sd_get_num_physical_cores();
 SD_API const char* sd_get_system_info();

 typedef struct {
--- a/otherarch/sdcpp/util.cpp
+++ b/otherarch/sdcpp/util.cpp
@ -126,7 +126,7 @@ std::string get_full_path(const std::string& dir, const std::string& filename) {
 // get_num_physical_cores is copy from
 // https://github.com/ggerganov/llama.cpp/blob/master/examples/common.cpp
 // LICENSE: https://github.com/ggerganov/llama.cpp/blob/master/LICENSE
-int32_t get_num_physical_cores() {
+int32_t sd_get_num_physical_cores() {
 #ifdef __linux__
    // enumerate the set of thread siblings, num entries is num cores
    std::unordered_set<std::string> siblings;