diff --git a/Makefile b/Makefile index 8626def78..c96db748b 100644 --- a/Makefile +++ b/Makefile @@ -430,6 +430,7 @@ ggml_v4_vulkan.o: ggml/src/ggml.c ggml/include/ggml.h ggml_v4_vulkan_noavx2.o: ggml/src/ggml.c ggml/include/ggml.h $(CC) $(FASTCFLAGS) $(SIMPLECFLAGS) $(VULKAN_FLAGS) -c $< -o $@ +# cpu and clblast separated ggml-cpu.o: ggml/src/ggml-cpu.c ggml/include/ggml-cpu.h $(CC) $(FASTCFLAGS) $(FULLCFLAGS) -c $< -o $@ ggml-cpu_v4_failsafe.o: ggml/src/ggml-cpu.c ggml/include/ggml-cpu.h @@ -563,7 +564,7 @@ whispercpp_cublas.o: otherarch/whispercpp/whisper_adapter.cpp $(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) $(HIPFLAGS) -c $< -o $@ # idiotic "for easier compilation" -GPTTYPE_ADAPTER = gpttype_adapter.cpp otherarch/llama_v2.cpp otherarch/llama_v3.cpp src/llama.cpp src/llama-grammar.cpp src/llama-sampling.cpp src/llama-vocab.cpp otherarch/utils.cpp otherarch/gptj_v1.cpp otherarch/gptj_v2.cpp otherarch/gptj_v3.cpp otherarch/gpt2_v1.cpp otherarch/gpt2_v2.cpp otherarch/gpt2_v3.cpp otherarch/rwkv_v2.cpp otherarch/rwkv_v3.cpp otherarch/neox_v2.cpp otherarch/neox_v3.cpp otherarch/mpt_v3.cpp ggml/include/ggml.h ggml/include/ggml-cuda.h include/llama.h otherarch/llama-util.h +GPTTYPE_ADAPTER = gpttype_adapter.cpp otherarch/llama_v2.cpp otherarch/llama_v3.cpp src/llama.cpp src/llama-grammar.cpp src/llama-sampling.cpp src/llama-vocab.cpp otherarch/utils.cpp otherarch/gptj_v1.cpp otherarch/gptj_v2.cpp otherarch/gptj_v3.cpp otherarch/gpt2_v1.cpp otherarch/gpt2_v2.cpp otherarch/gpt2_v3.cpp otherarch/rwkv_v2.cpp otherarch/rwkv_v3.cpp otherarch/neox_v2.cpp otherarch/neox_v3.cpp otherarch/mpt_v3.cpp ggml/include/ggml.h ggml/include/ggml-cpu.h ggml/include/ggml-cuda.h include/llama.h otherarch/llama-util.h gpttype_adapter_failsafe.o: $(GPTTYPE_ADAPTER) $(CXX) $(CXXFLAGS) $(FAILSAFE_FLAGS) -c $< -o $@ gpttype_adapter.o: $(GPTTYPE_ADAPTER) @@ -643,7 +644,7 @@ koboldcpp_clblast_noavx2: endif ifdef CUBLAS_BUILD -koboldcpp_cublas: ggml_v4_cublas.o ggml.o ggml_v3_cublas.o ggml_v2_cublas.o ggml_v1.o expose.o gpttype_adapter_cublas.o sdcpp_cublas.o whispercpp_cublas.o llavaclip_cublas.o llava.o ggml-backend_cublas.o $(CUBLAS_OBJS) $(OBJS_FULL) $(OBJS) +koboldcpp_cublas: ggml_v4_cublas.o ggml-cpu.o ggml_v3_cublas.o ggml_v2_cublas.o ggml_v1.o expose.o gpttype_adapter_cublas.o sdcpp_cublas.o whispercpp_cublas.o llavaclip_cublas.o llava.o ggml-backend_cublas.o $(CUBLAS_OBJS) $(OBJS_FULL) $(OBJS) $(CUBLAS_BUILD) else koboldcpp_cublas: @@ -651,7 +652,7 @@ koboldcpp_cublas: endif ifdef HIPBLAS_BUILD -koboldcpp_hipblas: ggml_v4_cublas.o ggml.o ggml_v3_cublas.o ggml_v2_cublas.o ggml_v1.o expose.o gpttype_adapter_cublas.o sdcpp_cublas.o whispercpp_cublas.o llavaclip_cublas.o llava.o ggml-backend_cublas.o $(HIP_OBJS) $(OBJS_FULL) $(OBJS) +koboldcpp_hipblas: ggml_v4_cublas.o ggml-cpu.o ggml_v3_cublas.o ggml_v2_cublas.o ggml_v1.o expose.o gpttype_adapter_cublas.o sdcpp_cublas.o whispercpp_cublas.o llavaclip_cublas.o llava.o ggml-backend_cublas.o $(HIP_OBJS) $(OBJS_FULL) $(OBJS) $(HIPBLAS_BUILD) else koboldcpp_hipblas: @@ -662,7 +663,7 @@ ifdef VULKAN_BUILD koboldcpp_vulkan: ggml_v4_vulkan.o ggml-cpu.o ggml_v3.o ggml_v2.o ggml_v1.o expose.o gpttype_adapter_vulkan.o ggml-vulkan.o sdcpp_vulkan.o whispercpp_default.o llavaclip_vulkan.o llava.o ggml-backend_vulkan.o $(OBJS_FULL) $(OBJS) $(VULKAN_BUILD) ifdef NOAVX2_BUILD -koboldcpp_vulkan_noavx2: ggml_v4_vulkan_noavx2.o ggml_v4_noavx2.o ggml_v3_noavx2.o ggml_v2_noavx2.o ggml_v1_failsafe.o expose.o gpttype_adapter_vulkan_noavx2.o ggml-vulkan.o sdcpp_vulkan.o whispercpp_default.o llavaclip_vulkan.o llava.o ggml-backend_vulkan.o $(OBJS_SIMPLE) $(OBJS) +koboldcpp_vulkan_noavx2: ggml_v4_vulkan_noavx2.o ggml-cpu_v4_noavx2.o ggml_v3_noavx2.o ggml_v2_noavx2.o ggml_v1_failsafe.o expose.o gpttype_adapter_vulkan_noavx2.o ggml-vulkan.o sdcpp_vulkan.o whispercpp_default.o llavaclip_vulkan.o llava.o ggml-backend_vulkan.o $(OBJS_SIMPLE) $(OBJS) $(VULKAN_BUILD) else koboldcpp_vulkan_noavx2: diff --git a/ggml/src/ggml-vulkan.cpp b/ggml/src/ggml-vulkan.cpp index 693e76de3..fa001f17a 100644 --- a/ggml/src/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan.cpp @@ -1035,7 +1035,7 @@ static uint32_t find_properties(const vk::PhysicalDeviceMemoryProperties* mem_pr static vk_buffer ggml_vk_create_buffer(vk_device& device, size_t size, vk::MemoryPropertyFlags req_flags, vk::MemoryPropertyFlags fallback_flags = vk::MemoryPropertyFlags(0)) { VK_LOG_DEBUG("ggml_vk_create_buffer(" << device->name << ", " << size << ", " << to_string(req_flags) << ", " << to_string(fallback_flags) << ")"); if (size > device->max_memory_allocation_size) { - printf("\nWARNING: Requested buffer size exceeds device memory allocation limit!\n"); + printf("\nWARNING: Requested buffer size (%zu) exceeds device memory allocation limit (%zu)!\n",size,device->max_memory_allocation_size); } std::lock_guard guard(device->mutex); diff --git a/koboldcpp.py b/koboldcpp.py index 9ee74ea59..fd0b90eab 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -45,7 +45,7 @@ maxhordelen = 400 modelbusy = threading.Lock() requestsinqueue = 0 defaultport = 5001 -KcppVersion = "1.77" +KcppVersion = "1.78" showdebug = True guimode = False showsamplerwarning = True @@ -2867,7 +2867,7 @@ def show_gui(): makeslider(quick_tab, "Context Size:", contextsize_text, context_var, 0, len(contextsize_text)-1, 30, width=280, set=5,tooltip="What is the maximum context size to support. Model specific. You cannot exceed it.\nLarger contexts require more memory, and not all models support it.") # load model - makefileentry(quick_tab, "Model:", "Select GGUF or GGML Model File", model_var, 40, 280, onchoosefile=on_picked_model_file,tooltiptxt="Select a GGUF or GGML model file on disk to be loaded.") + makefileentry(quick_tab, "GGUF Model:", "Select GGUF or GGML Model File", model_var, 40, 280, onchoosefile=on_picked_model_file,tooltiptxt="Select a GGUF or GGML model file on disk to be loaded.") model_var.trace("w", gui_changed_modelfile) # Hardware Tab diff --git a/otherarch/sdcpp/model.cpp b/otherarch/sdcpp/model.cpp index dfaff874d..73458320d 100644 --- a/otherarch/sdcpp/model.cpp +++ b/otherarch/sdcpp/model.cpp @@ -14,6 +14,7 @@ #include "ggml-alloc.h" #include "ggml-backend.h" #include "ggml.h" +#include "ggml-cpu.h" #include "stable-diffusion.h"