From 499283c63a5fa175c559fb89435f828009c83dcf Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Fri, 23 May 2025 17:10:12 +0800 Subject: [PATCH] rename define to match upstream --- CMakeLists.txt | 4 ++-- Makefile | 6 +++--- koboldcpp.py | 4 +++- otherarch/sdcpp/ggml_extend.hpp | 6 +++--- otherarch/sdcpp/stable-diffusion.cpp | 2 +- otherarch/sdcpp/upscaler.cpp | 2 +- 6 files changed, 13 insertions(+), 11 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index fa5f28f18..f05d011d9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -96,7 +96,7 @@ if (LLAMA_CUBLAS) add_compile_definitions(GGML_USE_LLAMAFILE) add_compile_definitions(GGML_USE_CUDA) - add_compile_definitions(SD_USE_CUBLAS) + add_compile_definitions(SD_USE_CUDA) if (LLAMA_CUDA_F16 OR LLAMA_CUDA_DMMV_F16) add_compile_definitions(GGML_CUDA_F16) @@ -177,7 +177,7 @@ if (LLAMA_HIPBLAS) list(APPEND GGML_SOURCES_ROCM ${SRCS}) file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq*.cu") list(APPEND GGML_SOURCES_ROCM ${SRCS}) - add_compile_definitions(GGML_USE_HIP GGML_USE_CUDA SD_USE_CUBLAS) + add_compile_definitions(GGML_USE_HIP GGML_USE_CUDA SD_USE_CUDA) add_library(ggml-rocm ${GGML_SOURCES_CUDA}) file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu") diff --git a/Makefile b/Makefile index c152634f5..8277d837b 100644 --- a/Makefile +++ b/Makefile @@ -83,7 +83,7 @@ CLBLAST_FLAGS = -DGGML_USE_CLBLAST FAILSAFE_FLAGS = -DUSE_FAILSAFE VULKAN_FLAGS = -DGGML_USE_VULKAN -DSD_USE_VULKAN ifdef LLAMA_CUBLAS - CUBLAS_FLAGS = -DGGML_USE_CUDA -DSD_USE_CUBLAS + CUBLAS_FLAGS = -DGGML_USE_CUDA -DSD_USE_CUDA else CUBLAS_FLAGS = endif @@ -177,7 +177,7 @@ OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/templat OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*f16-f16.cu)) ifdef LLAMA_CUBLAS - CUBLAS_FLAGS = -DGGML_USE_CUDA -DSD_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include + CUBLAS_FLAGS = -DGGML_USE_CUDA -DSD_USE_CUDA -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include CUBLASLD_FLAGS = -lcuda -lcublas -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib -L$(CUDA_PATH)/lib64/stubs -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/local/cuda/targets/sbsa-linux/lib -L/usr/lib/wsl/lib CUBLAS_OBJS = ggml-cuda.o ggml_v3-cuda.o ggml_v2-cuda.o ggml_v2-cuda-legacy.o CUBLAS_OBJS += $(patsubst %.cu,%.o,$(filter-out ggml/src/ggml-cuda/ggml-cuda.cu, $(wildcard ggml/src/ggml-cuda/*.cu))) @@ -256,7 +256,7 @@ ifdef DETECT_ROCWMMA HIPFLAGS += -DGGML_HIP_ROCWMMA_FATTN -I$(dir $(DETECT_ROCWMMA)) endif - HIPFLAGS += -DGGML_USE_HIP -DGGML_HIP_NO_VMM -DGGML_USE_CUDA -DSD_USE_CUBLAS $(shell $(ROCM_PATH)/bin/hipconfig -C) + HIPFLAGS += -DGGML_USE_HIP -DGGML_HIP_NO_VMM -DGGML_USE_CUDA -DSD_USE_CUDA $(shell $(ROCM_PATH)/bin/hipconfig -C) HIPLDFLAGS += -L$(ROCM_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib HIPLDFLAGS += -L$(ROCM_PATH)/lib64 -Wl,-rpath=$(ROCM_PATH)/lib64 HIPLDFLAGS += -lhipblas -lamdhip64 -lrocblas diff --git a/koboldcpp.py b/koboldcpp.py index 0dcd9eefd..ea5f1c027 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -3134,7 +3134,9 @@ Change Mode
return try: genparams = json.loads(body) - schema = genparams.get('schema', {}) + schema = genparams.get('schema', None) + if not schema: + schema = genparams decoded = convert_json_to_gbnf(schema) response_body = (json.dumps({"result": decoded,"success":(True if decoded else False)}).encode()) except Exception as e: diff --git a/otherarch/sdcpp/ggml_extend.hpp b/otherarch/sdcpp/ggml_extend.hpp index ab40dc861..abce7cfe4 100644 --- a/otherarch/sdcpp/ggml_extend.hpp +++ b/otherarch/sdcpp/ggml_extend.hpp @@ -28,7 +28,7 @@ #include "model.h" -#ifdef SD_USE_CUBLAS +#ifdef SD_USE_CUDA #include "ggml-cuda.h" #endif @@ -782,7 +782,7 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_attention(struct ggml_context* ctx struct ggml_tensor* k, struct ggml_tensor* v, bool mask = false) { -#if defined(SD_USE_FLASH_ATTENTION) && !defined(SD_USE_CUBLAS) && !defined(SD_USE_METAL) && !defined(SD_USE_VULKAN) && !defined(SD_USE_SYCL) +#if defined(SD_USE_FLASH_ATTENTION) && !defined(SD_USE_CUDA) && !defined(SD_USE_METAL) && !defined(SD_USE_VULKAN) && !defined(SD_USE_SYCL) struct ggml_tensor* kqv = ggml_flash_attn(ctx, q, k, v, false); // [N * n_head, n_token, d_head] #else float d_head = (float)q->ne[0]; @@ -938,7 +938,7 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_group_norm(struct ggml_context* ct } __STATIC_INLINE__ void ggml_backend_tensor_get_and_sync(ggml_backend_t backend, const struct ggml_tensor* tensor, void* data, size_t offset, size_t size) { -#if defined(SD_USE_CUBLAS) || defined(SD_USE_SYCL) +#if defined(SD_USE_CUDA) || defined(SD_USE_SYCL) if (!ggml_backend_is_cpu(backend)) { ggml_backend_tensor_get_async(backend, tensor, data, offset, size); ggml_backend_synchronize(backend); diff --git a/otherarch/sdcpp/stable-diffusion.cpp b/otherarch/sdcpp/stable-diffusion.cpp index 451afcf24..bedb76987 100644 --- a/otherarch/sdcpp/stable-diffusion.cpp +++ b/otherarch/sdcpp/stable-diffusion.cpp @@ -161,7 +161,7 @@ public: bool diffusion_flash_attn) { use_tiny_autoencoder = taesd_path.size() > 0; std::string taesd_path_fixed = taesd_path; -#ifdef SD_USE_CUBLAS +#ifdef SD_USE_CUDA LOG_DEBUG("Using CUDA backend"); backend = ggml_backend_cuda_init(0); #endif diff --git a/otherarch/sdcpp/upscaler.cpp b/otherarch/sdcpp/upscaler.cpp index ea5629855..8907e69c3 100644 --- a/otherarch/sdcpp/upscaler.cpp +++ b/otherarch/sdcpp/upscaler.cpp @@ -15,7 +15,7 @@ struct UpscalerGGML { } bool load_from_file(const std::string& esrgan_path) { -#ifdef SD_USE_CUBLAS +#ifdef SD_USE_CUDA LOG_DEBUG("Using CUDA backend"); backend = ggml_backend_cuda_init(0); #endif