From 0a11f50da89d035ca37a87ae768ccc2b1a6fa330 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Tue, 18 Jul 2023 20:26:18 +0800 Subject: [PATCH] reenabled sched_yield, reduced sampler warning msg to once per session --- Makefile | 4 ++-- ggml.c | 2 +- koboldcpp.py | 7 +++++-- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index e580e6c6b..374ee0a74 100644 --- a/Makefile +++ b/Makefile @@ -144,7 +144,7 @@ ifdef LLAMA_CUBLAS CUBLASLD_FLAGS = -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib CUBLAS_OBJS = ggml-cuda.o ggml_v2-cuda.o ggml_v2-cuda-legacy.o NVCC = nvcc - NVCCFLAGS = --forward-unknown-to-host-compiler + NVCCFLAGS = --forward-unknown-to-host-compiler -use_fast_math ifdef CUDA_DOCKER_ARCH NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=$(CUDA_DOCKER_ARCH) else @@ -358,7 +358,7 @@ koboldcpp_openblas: ggml_openblas.o ggml_v2_openblas.o ggml_v1.o expose.o common $(OPENBLAS_BUILD) koboldcpp_failsafe: ggml_failsafe.o ggml_v2_failsafe.o ggml_v1_failsafe.o expose.o common.o gpttype_adapter_failsafe.o k_quants_failsafe.o $(OBJS) $(FAILSAFE_BUILD) -koboldcpp_openblas_noavx2: ggml_openblas_noavx2.o ggml_v2_openblas_noavx2.o ggml_v1_failsafe.o expose.o common.o gpttype_adapter.o k_quants_noavx2.o $(OBJS) +koboldcpp_openblas_noavx2: ggml_openblas_noavx2.o ggml_v2_openblas_noavx2.o ggml_v1_failsafe.o expose.o common.o gpttype_adapter_failsafe.o k_quants_noavx2.o $(OBJS) $(OPENBLAS_NOAVX2_BUILD) koboldcpp_clblast: ggml_clblast.o ggml_v2_clblast.o ggml_v1.o expose.o common.o gpttype_adapter_clblast.o ggml-opencl.o ggml_v2-opencl.o ggml_v2-opencl-legacy.o k_quants.o $(OBJS) $(CLBLAST_BUILD) diff --git a/ggml.c b/ggml.c index d6ba8ac97..a288b778d 100644 --- a/ggml.c +++ b/ggml.c @@ -16383,7 +16383,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) { // wait for other threads to finish const int last = node_n; do { - //sched_yield(); + sched_yield(); node_n = atomic_load(&state->shared->node_n); } while (node_n == last); } diff --git a/koboldcpp.py b/koboldcpp.py index d269ae5b8..1c55fd302 100755 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -242,8 +242,10 @@ def generate(prompt,max_length=20, max_context_length=512, temperature=0.8, top_ for i, sampler in enumerate(sampler_order): inputs.sampler_order[i] = sampler inputs.sampler_len = len(sampler_order) - if inputs.sampler_len>0 and (inputs.sampler_order[0]!=6 or inputs.sampler_order[inputs.sampler_len-1]!=5): - print("\n(Warning!!! Poor sampler_order detected! You will have reduced quality. Recommended values are [6,0,1,3,4,2,5])") + global showsamplerwarning + if showsamplerwarning and inputs.sampler_len>0 and (inputs.sampler_order[0]!=6 or inputs.sampler_order[inputs.sampler_len-1]!=5): + print("\n(Note: Sub-optimal sampler_order detected. You may have reduced quality. Recommended sampler values are [6,0,1,3,4,2,5]. This message will only show once per session.)") + showsamplerwarning = False except TypeError as e: print("ERROR: sampler_order must be a list of integers: " + str(e)) inputs.seed = seed @@ -277,6 +279,7 @@ modelbusy = False defaultport = 5001 KcppVersion = "1.36" showdebug = True +showsamplerwarning = True class ServerRequestHandler(http.server.SimpleHTTPRequestHandler): sys_version = ""