diff --git a/CMakeLists.txt b/CMakeLists.txt index d6b053797..bb0686148 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -80,9 +80,8 @@ if (LLAMA_CUBLAS) enable_language(CUDA) add_compile_definitions(GGML_USE_CUBLAS) - #add_compile_definitions(GGML_CUDA_CUBLAS) #remove to not use cublas + add_compile_definitions(SD_USE_CUBLAS) add_compile_definitions(GGML_CUDA_MMQ_Y=${LLAMA_CUDA_MMQ_Y}) - #add_compile_definitions(GGML_CUDA_FORCE_DMMV) #non dmmv broken for me add_compile_definitions(GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X}) add_compile_definitions(GGML_CUDA_DMMV_Y=${LLAMA_CUDA_DMMV_Y}) @@ -150,7 +149,7 @@ if (LLAMA_HIPBLAS) if (${hipblas_FOUND} AND ${hip_FOUND}) message(STATUS "HIP and hipBLAS found") - add_compile_definitions(GGML_USE_HIPBLAS GGML_USE_CUBLAS) + add_compile_definitions(GGML_USE_HIPBLAS GGML_USE_CUBLAS SD_USE_CUBLAS) add_library(ggml-rocm OBJECT ${GGML_SOURCES_CUDA}) if (LLAMA_CUDA_FORCE_DMMV) target_compile_definitions(ggml-rocm PUBLIC GGML_CUDA_FORCE_DMMV) @@ -425,14 +424,21 @@ add_library(common2 common/common.h common/grammar-parser.h common/grammar-parser.cpp) -target_include_directories(common2 PUBLIC . ./otherarch ./otherarch/tools ./examples ./common) +target_include_directories(common2 PUBLIC . ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common) target_compile_features(common2 PUBLIC cxx_std_11) # don't bump target_link_libraries(common2 PRIVATE ggml ${LLAMA_EXTRA_LIBS}) set_target_properties(common2 PROPERTIES POSITION_INDEPENDENT_CODE ON) +add_library(sdtype_adapter + sdtype_adapter.cpp) +target_include_directories(sdtype_adapter PUBLIC . ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common) +target_compile_features(sdtype_adapter PUBLIC cxx_std_11) # don't bump +target_link_libraries(sdtype_adapter PRIVATE common2 ggml ${LLAMA_EXTRA_LIBS}) +set_target_properties(sdtype_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON) + add_library(gpttype_adapter gpttype_adapter.cpp) -target_include_directories(gpttype_adapter PUBLIC . ./otherarch ./otherarch/tools ./examples ./common) +target_include_directories(gpttype_adapter PUBLIC . ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common) target_compile_features(gpttype_adapter PUBLIC cxx_std_11) # don't bump target_link_libraries(gpttype_adapter PRIVATE common2 ggml ${LLAMA_EXTRA_LIBS}) set_target_properties(gpttype_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON) @@ -440,24 +446,24 @@ set_target_properties(gpttype_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON) if (LLAMA_CUBLAS) set(TARGET koboldcpp_cublas) add_library(${TARGET} SHARED expose.cpp expose.h) - target_include_directories(${TARGET} PUBLIC . ./otherarch ./otherarch/tools ./examples ./common) + target_include_directories(${TARGET} PUBLIC . ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common) target_compile_features(${TARGET} PUBLIC cxx_std_11) # don't bump set_target_properties(${TARGET} PROPERTIES PREFIX "") set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME "koboldcpp_cublas") set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON) - target_link_libraries(${TARGET} PUBLIC Threads::Threads ggml ggml_v1 ggml_v2 ggml_v3 common2 gpttype_adapter ${LLAMA_EXTRA_LIBS}) + target_link_libraries(${TARGET} PUBLIC Threads::Threads ggml ggml_v1 ggml_v2 ggml_v3 common2 gpttype_adapter sdtype_adapter ${LLAMA_EXTRA_LIBS}) target_compile_features(${TARGET} PRIVATE cxx_std_11) endif() if (LLAMA_HIPBLAS) set(TARGET koboldcpp_hipblas) add_library(${TARGET} SHARED expose.cpp expose.h) - target_include_directories(${TARGET} PUBLIC . ./otherarch ./otherarch/tools ./examples ./common) + target_include_directories(${TARGET} PUBLIC . ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common) target_compile_features(${TARGET} PUBLIC cxx_std_11) # don't bump set_target_properties(${TARGET} PROPERTIES PREFIX "") set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME "koboldcpp_hipblas") set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON) - target_link_libraries(${TARGET} PUBLIC Threads::Threads ggml ggml_v1 ggml_v2 ggml_v3 common2 gpttype_adapter ${LLAMA_EXTRA_LIBS}) + target_link_libraries(${TARGET} PUBLIC Threads::Threads ggml ggml_v1 ggml_v2 ggml_v3 common2 gpttype_adapter sdtype_adapter ${LLAMA_EXTRA_LIBS}) target_compile_features(${TARGET} PRIVATE cxx_std_11) endif() diff --git a/Makefile b/Makefile index d7df93e41..59be403e8 100644 --- a/Makefile +++ b/Makefile @@ -43,6 +43,7 @@ CFLAGS = -I. -I./include -I./include/CL -I./otherarch -I./otherarch CXXFLAGS = -I. -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -DNDEBUG -std=c++11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE LDFLAGS = FASTCFLAGS = $(subst -O3,-Ofast,$(CFLAGS)) +FASTCXXFLAGS = $(subst -O3,-Ofast,$(CXXFLAGS)) # these are used on windows, to build some libraries with extra old device compatibility SIMPLECFLAGS = @@ -54,7 +55,7 @@ CLBLAST_FLAGS = -DGGML_USE_CLBLAST FAILSAFE_FLAGS = -DUSE_FAILSAFE VULKAN_FLAGS = -DGGML_USE_VULKAN ifdef LLAMA_CUBLAS - CUBLAS_FLAGS = -DGGML_USE_CUBLAS + CUBLAS_FLAGS = -DGGML_USE_CUBLAS -DSD_USE_CUBLAS else CUBLAS_FLAGS = endif @@ -141,7 +142,7 @@ endif # it is recommended to use the CMAKE file to build for cublas if you can - will likely work better ifdef LLAMA_CUBLAS - CUBLAS_FLAGS = -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include + CUBLAS_FLAGS = -DGGML_USE_CUBLAS -DSD_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include CUBLASLD_FLAGS = -lcuda -lcublas -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib CUBLAS_OBJS = ggml-cuda.o ggml_v3-cuda.o ggml_v2-cuda.o ggml_v2-cuda-legacy.o NVCC = nvcc @@ -225,7 +226,7 @@ ifdef LLAMA_HIPBLAS LLAMA_CUDA_DMMV_X ?= 32 LLAMA_CUDA_MMV_Y ?= 1 LLAMA_CUDA_KQUANTS_ITER ?= 2 - HIPFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUBLAS $(shell $(ROCM_PATH)/bin/hipconfig -C) + HIPFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUBLAS -DSD_USE_CUBLAS $(shell $(ROCM_PATH)/bin/hipconfig -C) HIPLDFLAGS += -L$(ROCM_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib -lhipblas -lamdhip64 -lrocblas HIP_OBJS += ggml-cuda.o ggml_v3-cuda.o ggml_v2-cuda.o ggml_v2-cuda-legacy.o ggml-cuda.o: HIPFLAGS += $(addprefix --offload-arch=,$(GPU_TARGETS)) \ @@ -256,8 +257,8 @@ endif # LLAMA_HIPBLAS ifdef LLAMA_METAL - CFLAGS += -DGGML_USE_METAL -DGGML_METAL_NDEBUG - CXXFLAGS += -DGGML_USE_METAL + CFLAGS += -DGGML_USE_METAL -DGGML_METAL_NDEBUG -DSD_USE_METAL + CXXFLAGS += -DGGML_USE_METAL -DSD_USE_METAL LDFLAGS += -framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders OBJS += ggml-metal.o @@ -479,8 +480,10 @@ expose.o: expose.cpp expose.h $(CXX) $(CXXFLAGS) -c $< -o $@ # sd.cpp objects -sdcpp_default.o: otherarch/sdcpp/sd_adapter.cpp otherarch/sdcpp/stable-diffusion.h otherarch/sdcpp/stable-diffusion.cpp otherarch/sdcpp/util.cpp otherarch/sdcpp/upscaler.cpp otherarch/sdcpp/model.cpp otherarch/sdcpp/thirdparty/zip.c - $(CXX) $(CXXFLAGS) -c $< -o $@ +sdcpp_default.o: otherarch/sdcpp/sdtype_adapter.cpp otherarch/sdcpp/stable-diffusion.h otherarch/sdcpp/stable-diffusion.cpp otherarch/sdcpp/util.cpp otherarch/sdcpp/upscaler.cpp otherarch/sdcpp/model.cpp otherarch/sdcpp/thirdparty/zip.c + $(CXX) $(FASTCXXFLAGS) -c $< -o $@ +sdcpp_cublas.o: otherarch/sdcpp/sdtype_adapter.cpp otherarch/sdcpp/stable-diffusion.h otherarch/sdcpp/stable-diffusion.cpp otherarch/sdcpp/util.cpp otherarch/sdcpp/upscaler.cpp otherarch/sdcpp/model.cpp otherarch/sdcpp/thirdparty/zip.c + $(CXX) $(FASTCXXFLAGS) $(CUBLAS_FLAGS) $(HIPFLAGS) -c $< -o $@ # idiotic "for easier compilation" GPTTYPE_ADAPTER = gpttype_adapter.cpp otherarch/llama_v2.cpp otherarch/llama_v3.cpp llama.cpp otherarch/utils.cpp otherarch/gptj_v1.cpp otherarch/gptj_v2.cpp otherarch/gptj_v3.cpp otherarch/gpt2_v1.cpp otherarch/gpt2_v2.cpp otherarch/gpt2_v3.cpp otherarch/rwkv_v2.cpp otherarch/rwkv_v3.cpp otherarch/neox_v2.cpp otherarch/neox_v3.cpp otherarch/mpt_v3.cpp ggml.h ggml-cuda.h llama.h otherarch/llama-util.h diff --git a/expose.cpp b/expose.cpp index 6b841e0e4..f7942ffa7 100644 --- a/expose.cpp +++ b/expose.cpp @@ -211,11 +211,11 @@ extern "C" return gpttype_generate(inputs); } - bool load_model_sd(const load_sd_model_inputs inputs) + bool sd_load_model(const sd_load_model_inputs inputs) { return sdtype_load_model(inputs); } - sd_generation_outputs generate_sd(const sd_generation_inputs inputs) + sd_generation_outputs sd_generate(const sd_generation_inputs inputs) { return sdtype_generate(inputs); } diff --git a/expose.h b/expose.h index 6ee13d00e..a87ae5cf5 100644 --- a/expose.h +++ b/expose.h @@ -99,7 +99,7 @@ struct token_count_outputs int count = 0; int * ids; //we'll just use shared memory for this one, bit of a hack }; -struct load_sd_model_inputs +struct sd_load_model_inputs { const char * model_filename; const int debugmode = 0; @@ -116,6 +116,7 @@ struct sd_generation_inputs struct sd_generation_outputs { int status = -1; + unsigned int data_length = 0; const char * data; }; diff --git a/klite.embd b/klite.embd index 7fd8cf120..77e8f06f9 100644 --- a/klite.embd +++ b/klite.embd @@ -5,8 +5,9 @@ Kobold Lite WebUI is a standalone WebUI for use with KoboldAI United, AI Horde, It requires no dependencies, installation or setup. Just copy this single static HTML file anywhere and open it in a browser, or from a webserver. Please go to https://github.com/LostRuins/lite.koboldai.net for updates on Kobold Lite. +If you are submitting a pull request for Lite, PLEASE use the above repo, not the KoboldCpp one. Kobold Lite is under the AGPL v3.0 License unless otherwise exempted. Please do not remove this line. -Current version: 116 +Current version: 117 -Concedo --> @@ -3254,7 +3255,7 @@ Current version: 116 //casualwriter casual-markdown, under MIT license function simpleMarkdown(e){var r=function(e){return e.replace(//g,">")},l=function(e,r){return"
"+(r=(r=(r=(r=(r=r.replace(//g,">")).replace(/\t/g," ").replace(/\^\^\^(.+?)\^\^\^/g,"$1")).replace(/^\/\/(.*)/gm,"//$1 ").replace(/\s\/\/(.*)/gm," //$1 ")).replace(/(\s?)(function|procedure|return|exit|if|then|else|end|loop|while|or|and|case|when)(\s)/gim,"$1$2$3")).replace(/(\s?)(var|let|const|=>|for|next|do|while|loop|continue|break|switch|try|catch|finally)(\s)/gim,"$1$2$3"))+"
"},c=function(e){return(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=(e=e.replace(/^###### (.*?)\s*#*$/gm,""+r(l).replace(/`/g,"`")+"
"})).replace(/`(.*?)`/gm,"$1
")).replace(/^\>\> (.*$)/gm,"")).replace(/^\> (.*$)/gm,"$1
$1")).replace(/<\/blockquote\>\n
/g,"\n")).replace(/<\/blockquote\>\n/g,"\n
")).replace(/!\[(.*?)\]\((.*?) "(.*?)"\)/gm,'')).replace(/!\[(.*?)\]\((.*?)\)/gm,'
')).replace(/\[(.*?)\]\((.*?) "new"\)/gm,'$1')).replace(/\[(.*?)\]\((.*?) "(.*?)"\)/gm,'$1')).replace(/
/gm,'http$1')).replace(/\[(.*?)\]\(\)/gm,'$1')).replace(/\[(.*?)\]\((.*?)\)/gm,'$1')) - .replace(/^[\*+-][ .](.*)/gm," ")).replace(/\%SpcEtg\%(\d\d?)[ .](.*)([\n]?)/gm,"\%SpcEtg\%\n$1.$2\n").replace(/^\d\d?[ .](.*)([\n]??)/gm,"
- $1
").replace(/<\/li><\/ol>
- $1
- /gm,"
- ")).replace(/^<[ou]l>
- (.*\%SpcStg\%.*\%SpcEtg\%.*)<\/li><\/[ou]l\>/gm,"$1").replace(/^\s{2,6}[\*+-][ .](.*)/gm,"
")).replace(/^\s{2,6}\d[ .](.*)/gm,"
- $1
")).replace(/<\/[ou]l\>\n\n<[ou]l\>/gm,"\n").replace(/<\/[ou]l\>\n<[ou]l\>/g,"")).replace(/<\/[ou]l\>\n<[ou]l\>/g,"\n").replace(/<\/li><\/ul>
- $1
- /gm,"
- ")).replace(/\*\*\*(\w.*?[^\\])\*\*\*/gm,"$1")).replace(/\*\*(\w.*?[^\\])\*\*/gm,"$1")).replace(/\*(\w.*?[^\\])\*/gm,"$1")).replace(/___(\w.*?[^\\])___/gm,"$1")).replace(/__(\w.*?[^\\])__/gm,"$1")).replace(/~~(\w.*?)~~/gm,"
$1")).replace(/\^\^(\w.*?)\^\^/gm,"$1")).replace(/\{\{(\w.*?)\}\}/gm,"$1")).replace(/^((?:\|[^|\r\n]*[^|\r\n\s]\s*)+\|(?:\r?\n|\r|))+/gm,function (matchedTable){return convertMarkdownTableToHtml(matchedTable);})).replace(/ \n/g,"\n
") + .replace(/^[\*+-][ .](.*)/gm,"")).replace(/\%SpcEtg\%(\d\d?)[ .](.*)([\n]?)/gm,"\%SpcEtg\%\n$1.$2\n").replace(/^\d\d?[ .] (.*)([\n]??)/gm,"
- $1
").replace(/<\/li><\/ol>
- $1
- /gm,"
- ")).replace(/^<[ou]l>
- (.*\%SpcStg\%.*\%SpcEtg\%.*)<\/li><\/[ou]l\>/gm,"$1").replace(/^\s{2,6}[\*+-][ .](.*)/gm,"
")).replace(/^\s{2,6}\d[ .](.*)/gm,"
- $1
")).replace(/<\/[ou]l\>\n\n<[ou]l\>/gm,"\n").replace(/<\/[ou]l\>\n<[ou]l\>/g,"")).replace(/<\/[ou]l\>\n<[ou]l\>/g,"\n").replace(/<\/li><\/ul>
- $1
- /gm,"
- ")).replace(/\*\*\*(\w.*?[^\\])\*\*\*/gm,"$1")).replace(/\*\*(\w.*?[^\\])\*\*/gm,"$1")).replace(/\*(\w.*?[^\\])\*/gm,"$1")).replace(/___(\w.*?[^\\])___/gm,"$1")).replace(/__(\w.*?[^\\])__/gm,"$1")).replace(/~~(\w.*?)~~/gm,"
$1")).replace(/\^\^(\w.*?)\^\^/gm,"$1")).replace(/\{\{(\w.*?)\}\}/gm,"$1")).replace(/^((?:\|[^|\r\n]*[^|\r\n\s]\s*)+\|(?:\r?\n|\r|))+/gm,function (matchedTable){return convertMarkdownTableToHtml(matchedTable);})).replace(/ \n/g,"\n
") //.replace(/\n\s*\n/g,"\n\n") ).replace(/^ {4,10}(.*)/gm,function(e,l){return"
"})).replace(/^\t(.*)/gm,function(e,l){return""+r(l)+"
"})).replace(/<\/code\><\/pre\>\n"+r(l)+"
/g,"\n")).replace(/\\([`_~\*\+\-\.\^\\\<\>\(\)\[\]])/gm,"$1")},a=0,n=0,p="";for(e=(e=e.replace(/\r\n/g,"\n").replace(/\n~~~/g,"\n```")).replace(/```([^`]+)```/g,l);(a=e.indexOf("
"))>=0;)n=e.indexOf("
",a),p+=c(e.substr(0,a))+e.substr(a+6,n>0?n-a-6:mdtext.length),e=e.substr(n+7);return p+c(e)} @@ -8119,7 +8120,7 @@ Current version: 116 localsettings.last_selected_preset = document.getElementById("presets").value; //clean and clamp invalid values - localsettings.max_context_length = cleannum(localsettings.max_context_length, 8, 99999); + localsettings.max_context_length = cleannum(localsettings.max_context_length, 8, 999999); localsettings.max_length = cleannum(localsettings.max_length, 1, (localsettings.max_context_length-1)); localsettings.temperature = cleannum(localsettings.temperature, 0.01, 5); localsettings.rep_pen = cleannum(localsettings.rep_pen, 0.1, 5); diff --git a/koboldcpp.py b/koboldcpp.py index 86c0dc2d1..96e47ba87 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -22,6 +22,14 @@ logit_bias_max = 16 bias_min_value = -100.0 bias_max_value = 100.0 +class logit_bias(ctypes.Structure): + _fields_ = [("token_id", ctypes.c_int32), + ("bias", ctypes.c_float)] + +class token_count_outputs(ctypes.Structure): + _fields_ = [("count", ctypes.c_int), + ("ids", ctypes.POINTER(ctypes.c_int))] + class load_model_inputs(ctypes.Structure): _fields_ = [("threads", ctypes.c_int), ("blasthreads", ctypes.c_int), @@ -49,10 +57,6 @@ class load_model_inputs(ctypes.Structure): ("banned_tokens", ctypes.c_char_p * ban_token_max), ("tensor_split", ctypes.c_float * tensor_split_max)] -class logit_bias(ctypes.Structure): - _fields_ = [("token_id", ctypes.c_int32), - ("bias", ctypes.c_float)] - class generation_inputs(ctypes.Structure): _fields_ = [("seed", ctypes.c_int), ("prompt", ctypes.c_char_p), @@ -103,12 +107,9 @@ class sd_generation_inputs(ctypes.Structure): class sd_generation_outputs(ctypes.Structure): _fields_ = [("status", ctypes.c_int), + ("data_length", ctypes.c_uint), ("data", ctypes.c_char_p)] -class token_count_outputs(ctypes.Structure): - _fields_ = [("count", ctypes.c_int), - ("ids", ctypes.POINTER(ctypes.c_int))] - handle = None def getdirpath(): @@ -273,10 +274,10 @@ def init_library(): handle.abort_generate.restype = ctypes.c_bool handle.token_count.restype = token_count_outputs handle.get_pending_output.restype = ctypes.c_char_p - handle.load_model_sd.argtypes = [sd_load_model_inputs] - handle.load_model_sd.restype = ctypes.c_bool - handle.generate_sd.argtypes = [sd_generation_inputs] - handle.generate_sd.restype = sd_generation_outputs + handle.sd_load_model.argtypes = [sd_load_model_inputs] + handle.sd_load_model.restype = ctypes.c_bool + handle.sd_generate.argtypes = [sd_generation_inputs] + handle.sd_generate.restype = sd_generation_outputs def load_model(model_filename): global args @@ -469,14 +470,29 @@ def generate(prompt, memory="", max_length=32, max_context_length=512, temperatu return outstr -def load_model_sd(model_filename): +def sd_load_model(model_filename): global args inputs = sd_load_model_inputs() inputs.debugmode = args.debugmode inputs.model_filename = model_filename.encode("UTF-8") - ret = handle.load_model_sd(inputs) + ret = handle.sd_load_model(inputs) return ret +def sd_generate(prompt, negative_prompt="", cfg_scale=5, sample_steps=20, seed=-1, sample_method="euler a"): + global maxctx, args, currentusergenkey, totalgens, pendingabortkey + inputs = sd_generation_inputs() + inputs.prompt = prompt.encode("UTF-8") + inputs.negative_prompt = negative_prompt.encode("UTF-8") + inputs.cfg_scale = cfg_scale + inputs.sample_steps = sample_steps + inputs.seed = seed + inputs.sample_method = sample_method.encode("UTF-8") + ret = handle.sd_generate(inputs) + outstr = "" + if ret.status==1: + outstr = ret.data.decode("UTF-8","ignore") + return outstr + def utfprint(str): try: print(str) @@ -2567,7 +2583,7 @@ def main(launch_args,start_server=True): time.sleep(3) sys.exit(2) imgmodel = os.path.abspath(imgmodel) - loadok = load_model_sd(imgmodel) + loadok = sd_load_model(imgmodel) print("Load Image Model OK: " + str(loadok)) if not loadok: exitcounter = 999 diff --git a/model_adapter.h b/model_adapter.h index 75e90434e..51cd75ced 100644 --- a/model_adapter.h +++ b/model_adapter.h @@ -78,7 +78,7 @@ bool gpttype_generate_abort(); const std::string & gpttype_get_pending_output(); std::vectorgpttype_get_token_arr(const std::string & input); -bool sdtype_load_model(const load_sd_model_inputs inputs); +bool sdtype_load_model(const sd_load_model_inputs inputs); sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs); void timer_start(); diff --git a/otherarch/sdcpp/CMakeLists.txt b/otherarch/sdcpp/CMakeLists.txt deleted file mode 100644 index a9bedebef..000000000 --- a/otherarch/sdcpp/CMakeLists.txt +++ /dev/null @@ -1,95 +0,0 @@ -cmake_minimum_required(VERSION 3.12) -project("stable-diffusion") - -set(CMAKE_EXPORT_COMPILE_COMMANDS ON) - -if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE) - set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE) - set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo") -endif() - -set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) -set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) - -if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) - set(SD_STANDALONE ON) -else() - set(SD_STANDALONE OFF) -endif() - -# -# Option list -# - -# general -option(SD_CUBLAS "sd: cuda backend" OFF) -option(SD_HIPBLAS "sd: rocm backend" OFF) -option(SD_METAL "sd: metal backend" OFF) -option(SD_FLASH_ATTN "sd: use flash attention for x4 less memory usage" OFF) -option(SD_FAST_SOFTMAX "sd: x1.5 faster softmax, indeterministic (sometimes, same seed don't generate same image), cuda only" OFF) -option(SD_BUILD_SHARED_LIBS "sd: build shared libs" OFF) - -if(SD_CUBLAS) - message("Use CUBLAS as backend stable-diffusion") - set(GGML_CUBLAS ON) - add_definitions(-DSD_USE_CUBLAS) -endif() - -if(SD_METAL) - message("Use Metal as backend stable-diffusion") - set(GGML_METAL ON) - add_definitions(-DSD_USE_METAL) -endif() - -if (SD_HIPBLAS) - message("Use HIPBLAS as backend stable-diffusion") - set(GGML_HIPBLAS ON) - add_definitions(-DSD_USE_CUBLAS) - if(SD_FAST_SOFTMAX) - set(GGML_CUDA_FAST_SOFTMAX ON) - endif() -endif () - -if(SD_FLASH_ATTN) - message("Use Flash Attention for memory optimization") - add_definitions(-DSD_USE_FLASH_ATTENTION) -endif() - -set(SD_LIB stable-diffusion) - -file(GLOB SD_LIB_SOURCES - "*.h" - "*.cpp" - "*.hpp" -) - -# we can get only one share lib -if(SD_BUILD_SHARED_LIBS) - message("Build shared library") - set(BUILD_SHARED_LIBS OFF) - message(${SD_LIB_SOURCES}) - add_library(${SD_LIB} SHARED ${SD_LIB_SOURCES}) - add_definitions(-DSD_BUILD_SHARED_LIB) - target_compile_definitions(${SD_LIB} PRIVATE -DSD_BUILD_DLL) - set(CMAKE_POSITION_INDEPENDENT_CODE ON) -else() - message("Build static library") - add_library(${SD_LIB} STATIC ${SD_LIB_SOURCES}) -endif() - - -set(CMAKE_POLICY_DEFAULT_CMP0077 NEW) - - -# deps -add_subdirectory(ggml) - -add_subdirectory(thirdparty) - -target_link_libraries(${SD_LIB} PUBLIC ggml zip) -target_include_directories(${SD_LIB} PUBLIC . thirdparty) -target_compile_features(${SD_LIB} PUBLIC cxx_std_11) - - -add_subdirectory(examples) - diff --git a/otherarch/sdcpp/sd_adapter.cpp b/otherarch/sdcpp/sdtype_adapter.cpp similarity index 98% rename from otherarch/sdcpp/sd_adapter.cpp rename to otherarch/sdcpp/sdtype_adapter.cpp index 629db9f62..69febaeae 100644 --- a/otherarch/sdcpp/sd_adapter.cpp +++ b/otherarch/sdcpp/sdtype_adapter.cpp @@ -125,7 +125,7 @@ static void sd_logger_callback(enum sd_log_level_t level, const char* log, void* } } -bool sdtype_load_model(const load_sd_model_inputs inputs) { +bool sdtype_load_model(const sd_load_model_inputs inputs) { printf("\nSelected Image Model: %s\n",inputs.model_filename); @@ -174,6 +174,7 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs) printf("\nError: KCPP SD is not initialized!\n"); output.data = nullptr; output.status = 0; + output.data_length = 0; return output; } uint8_t * input_image_buffer = NULL; @@ -233,6 +234,7 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs) printf("\nKCPP SD generate failed!\n"); output.data = nullptr; output.status = 0; + output.data_length = 0; return output; } @@ -255,5 +257,6 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs) output.data = nullptr; output.status = 1; + output.data_length = 0; return output; }