sse3 mode for noavx2 clblast, fixed metadata, added version command

This commit is contained in:
Concedo 2025-01-06 21:59:05 +08:00
parent 7b25b6171c
commit 58791612d2
5 changed files with 36 additions and 20 deletions

View file

@ -75,6 +75,7 @@ FASTCXXFLAGS = $(subst -O3,-Ofast,$(CXXFLAGS))
# these are used on windows, to build some libraries with extra old device compatibility # these are used on windows, to build some libraries with extra old device compatibility
SIMPLECFLAGS = SIMPLECFLAGS =
SIMPLERCFLAGS =
FULLCFLAGS = FULLCFLAGS =
NONECFLAGS = NONECFLAGS =
@ -91,6 +92,7 @@ CUBLAS_OBJS =
OBJS_FULL += ggml-alloc.o ggml-cpu-traits.o ggml-quants.o ggml-cpu-quants.o ggml-cpu-aarch64.o unicode.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o sgemm.o common.o sampling.o OBJS_FULL += ggml-alloc.o ggml-cpu-traits.o ggml-quants.o ggml-cpu-quants.o ggml-cpu-aarch64.o unicode.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o sgemm.o common.o sampling.o
OBJS_SIMPLE += ggml-alloc.o ggml-cpu-traits.o ggml-quants_noavx2.o ggml-cpu-quants_noavx2.o ggml-cpu-aarch64_noavx2.o unicode.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o sgemm_noavx2.o common.o sampling.o OBJS_SIMPLE += ggml-alloc.o ggml-cpu-traits.o ggml-quants_noavx2.o ggml-cpu-quants_noavx2.o ggml-cpu-aarch64_noavx2.o unicode.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o sgemm_noavx2.o common.o sampling.o
OBJS_SIMPLER += ggml-alloc.o ggml-cpu-traits.o ggml-quants_noavx1.o ggml-cpu-quants_noavx1.o ggml-cpu-aarch64_noavx1.o unicode.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o sgemm_noavx1.o common.o sampling.o
OBJS_FAILSAFE += ggml-alloc.o ggml-cpu-traits.o ggml-quants_failsafe.o ggml-cpu-quants_failsafe.o ggml-cpu-aarch64_failsafe.o unicode.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o sgemm_failsafe.o common.o sampling.o OBJS_FAILSAFE += ggml-alloc.o ggml-cpu-traits.o ggml-quants_failsafe.o ggml-cpu-quants_failsafe.o ggml-cpu-aarch64_failsafe.o unicode.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o sgemm_failsafe.o common.o sampling.o
# OS specific # OS specific
@ -148,6 +150,7 @@ ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686 amd64))
CFLAGS += CFLAGS +=
NONECFLAGS += NONECFLAGS +=
SIMPLECFLAGS += -mavx -msse3 SIMPLECFLAGS += -mavx -msse3
SIMPLERCFLAGS += -mavx
ifdef LLAMA_NOAVX2 ifdef LLAMA_NOAVX2
FULLCFLAGS += -msse3 -mavx FULLCFLAGS += -msse3 -mavx
else else
@ -161,6 +164,7 @@ ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686 amd64))
CFLAGS += CFLAGS +=
NONECFLAGS += NONECFLAGS +=
SIMPLECFLAGS += -mavx -msse3 SIMPLECFLAGS += -mavx -msse3
SIMPLERCFLAGS += -mavx
ifdef LLAMA_NOAVX2 ifdef LLAMA_NOAVX2
FULLCFLAGS += -msse3 -mavx FULLCFLAGS += -msse3 -mavx
else else
@ -462,7 +466,7 @@ ggml_v4_clblast.o: ggml/src/ggml.c ggml/include/ggml.h
ggml_v4_cublas.o: ggml/src/ggml.c ggml/include/ggml.h ggml_v4_cublas.o: ggml/src/ggml.c ggml/include/ggml.h
$(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(CUBLAS_FLAGS) $(HIPFLAGS) -c $< -o $@ $(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(CUBLAS_FLAGS) $(HIPFLAGS) -c $< -o $@
ggml_v4_clblast_noavx2.o: ggml/src/ggml.c ggml/include/ggml.h ggml_v4_clblast_noavx2.o: ggml/src/ggml.c ggml/include/ggml.h
$(CC) $(FASTCFLAGS) $(SIMPLECFLAGS) $(CLBLAST_FLAGS) -c $< -o $@ $(CC) $(FASTCFLAGS) $(SIMPLERCFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
ggml_v4_vulkan.o: ggml/src/ggml.c ggml/include/ggml.h ggml_v4_vulkan.o: ggml/src/ggml.c ggml/include/ggml.h
$(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(VULKAN_FLAGS) -c $< -o $@ $(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(VULKAN_FLAGS) -c $< -o $@
ggml_v4_vulkan_noavx2.o: ggml/src/ggml.c ggml/include/ggml.h ggml_v4_vulkan_noavx2.o: ggml/src/ggml.c ggml/include/ggml.h
@ -478,19 +482,23 @@ ggml-cpu_v4_noavx2.o: ggml/src/ggml-cpu/ggml-cpu.c ggml/include/ggml-cpu.h
ggml-cpu_v4_clblast.o: ggml/src/ggml-cpu/ggml-cpu.c ggml/include/ggml-cpu.h ggml-cpu_v4_clblast.o: ggml/src/ggml-cpu/ggml-cpu.c ggml/include/ggml-cpu.h
$(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(CLBLAST_FLAGS) -c $< -o $@ $(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
ggml-cpu_v4_clblast_noavx2.o: ggml/src/ggml-cpu/ggml-cpu.c ggml/include/ggml-cpu.h ggml-cpu_v4_clblast_noavx2.o: ggml/src/ggml-cpu/ggml-cpu.c ggml/include/ggml-cpu.h
$(CC) $(FASTCFLAGS) $(SIMPLECFLAGS) $(CLBLAST_FLAGS) -c $< -o $@ $(CC) $(FASTCFLAGS) $(SIMPLERCFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
#quants #quants
ggml-quants.o: ggml/src/ggml-quants.c ggml/include/ggml.h ggml/src/ggml-quants.h ggml/src/ggml-common.h ggml-quants.o: ggml/src/ggml-quants.c ggml/include/ggml.h ggml/src/ggml-quants.h ggml/src/ggml-common.h
$(CC) $(CFLAGS) $(FULLCFLAGS) -c $< -o $@ $(CC) $(CFLAGS) $(FULLCFLAGS) -c $< -o $@
ggml-quants_noavx2.o: ggml/src/ggml-quants.c ggml/include/ggml.h ggml/src/ggml-quants.h ggml/src/ggml-common.h ggml-quants_noavx2.o: ggml/src/ggml-quants.c ggml/include/ggml.h ggml/src/ggml-quants.h ggml/src/ggml-common.h
$(CC) $(CFLAGS) $(SIMPLECFLAGS) -c $< -o $@ $(CC) $(CFLAGS) $(SIMPLECFLAGS) -c $< -o $@
ggml-quants_noavx1.o: ggml/src/ggml-quants.c ggml/include/ggml.h ggml/src/ggml-quants.h ggml/src/ggml-common.h
$(CC) $(CFLAGS) $(SIMPLERCFLAGS) -c $< -o $@
ggml-quants_failsafe.o: ggml/src/ggml-quants.c ggml/include/ggml.h ggml/src/ggml-quants.h ggml/src/ggml-common.h ggml-quants_failsafe.o: ggml/src/ggml-quants.c ggml/include/ggml.h ggml/src/ggml-quants.h ggml/src/ggml-common.h
$(CC) $(CFLAGS) $(NONECFLAGS) -c $< -o $@ $(CC) $(CFLAGS) $(NONECFLAGS) -c $< -o $@
ggml-cpu-quants.o: ggml/src/ggml-cpu/ggml-cpu-quants.c ggml/include/ggml.h ggml/src/ggml-cpu/ggml-cpu-quants.h ggml/src/ggml-common.h ggml-cpu-quants.o: ggml/src/ggml-cpu/ggml-cpu-quants.c ggml/include/ggml.h ggml/src/ggml-cpu/ggml-cpu-quants.h ggml/src/ggml-common.h
$(CC) $(CFLAGS) $(FULLCFLAGS) -c $< -o $@ $(CC) $(CFLAGS) $(FULLCFLAGS) -c $< -o $@
ggml-cpu-quants_noavx2.o: ggml/src/ggml-cpu/ggml-cpu-quants.c ggml/include/ggml.h ggml/src/ggml-cpu/ggml-cpu-quants.h ggml/src/ggml-common.h ggml-cpu-quants_noavx2.o: ggml/src/ggml-cpu/ggml-cpu-quants.c ggml/include/ggml.h ggml/src/ggml-cpu/ggml-cpu-quants.h ggml/src/ggml-common.h
$(CC) $(CFLAGS) $(SIMPLECFLAGS) -c $< -o $@ $(CC) $(CFLAGS) $(SIMPLECFLAGS) -c $< -o $@
ggml-cpu-quants_noavx1.o: ggml/src/ggml-cpu/ggml-cpu-quants.c ggml/include/ggml.h ggml/src/ggml-cpu/ggml-cpu-quants.h ggml/src/ggml-common.h
$(CC) $(CFLAGS) $(SIMPLERCFLAGS) -c $< -o $@
ggml-cpu-quants_failsafe.o: ggml/src/ggml-cpu/ggml-cpu-quants.c ggml/include/ggml.h ggml/src/ggml-cpu/ggml-cpu-quants.h ggml/src/ggml-common.h ggml-cpu-quants_failsafe.o: ggml/src/ggml-cpu/ggml-cpu-quants.c ggml/include/ggml.h ggml/src/ggml-cpu/ggml-cpu-quants.h ggml/src/ggml-common.h
$(CC) $(CFLAGS) $(NONECFLAGS) -c $< -o $@ $(CC) $(CFLAGS) $(NONECFLAGS) -c $< -o $@
@ -499,6 +507,8 @@ ggml-cpu-aarch64.o: ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp ggml/include/ggml.h g
$(CXX) $(CXXFLAGS) $(FULLCFLAGS) -c $< -o $@ $(CXX) $(CXXFLAGS) $(FULLCFLAGS) -c $< -o $@
ggml-cpu-aarch64_noavx2.o: ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp ggml/include/ggml.h ggml/src/ggml-cpu/ggml-cpu-aarch64.h ggml-cpu-aarch64_noavx2.o: ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp ggml/include/ggml.h ggml/src/ggml-cpu/ggml-cpu-aarch64.h
$(CXX) $(CXXFLAGS) $(SIMPLECFLAGS) -c $< -o $@ $(CXX) $(CXXFLAGS) $(SIMPLECFLAGS) -c $< -o $@
ggml-cpu-aarch64_noavx1.o: ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp ggml/include/ggml.h ggml/src/ggml-cpu/ggml-cpu-aarch64.h
$(CXX) $(CXXFLAGS) $(SIMPLERCFLAGS) -c $< -o $@
ggml-cpu-aarch64_failsafe.o: ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp ggml/include/ggml.h ggml/src/ggml-cpu/ggml-cpu-aarch64.h ggml-cpu-aarch64_failsafe.o: ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp ggml/include/ggml.h ggml/src/ggml-cpu/ggml-cpu-aarch64.h
$(CXX) $(CXXFLAGS) $(NONECFLAGS) -c $< -o $@ $(CXX) $(CXXFLAGS) $(NONECFLAGS) -c $< -o $@
@ -507,6 +517,8 @@ sgemm.o: ggml/src/ggml-cpu/llamafile/sgemm.cpp ggml/src/ggml-cpu/llamafile/sgemm
$(CXX) $(CXXFLAGS) $(FULLCFLAGS) -c $< -o $@ $(CXX) $(CXXFLAGS) $(FULLCFLAGS) -c $< -o $@
sgemm_noavx2.o: ggml/src/ggml-cpu/llamafile/sgemm.cpp ggml/src/ggml-cpu/llamafile/sgemm.h ggml/include/ggml.h sgemm_noavx2.o: ggml/src/ggml-cpu/llamafile/sgemm.cpp ggml/src/ggml-cpu/llamafile/sgemm.h ggml/include/ggml.h
$(CXX) $(CXXFLAGS) $(SIMPLECFLAGS) -c $< -o $@ $(CXX) $(CXXFLAGS) $(SIMPLECFLAGS) -c $< -o $@
sgemm_noavx1.o: ggml/src/ggml-cpu/llamafile/sgemm.cpp ggml/src/ggml-cpu/llamafile/sgemm.h ggml/include/ggml.h
$(CXX) $(CXXFLAGS) $(SIMPLERCFLAGS) -c $< -o $@
sgemm_failsafe.o: ggml/src/ggml-cpu/llamafile/sgemm.cpp ggml/src/ggml-cpu/llamafile/sgemm.h ggml/include/ggml.h sgemm_failsafe.o: ggml/src/ggml-cpu/llamafile/sgemm.cpp ggml/src/ggml-cpu/llamafile/sgemm.h ggml/include/ggml.h
$(CXX) $(CXXFLAGS) $(NONECFLAGS) -c $< -o $@ $(CXX) $(CXXFLAGS) $(NONECFLAGS) -c $< -o $@
@ -562,7 +574,7 @@ ggml_v3_clblast.o: otherarch/ggml_v3.c otherarch/ggml_v3.h
ggml_v3_cublas.o: otherarch/ggml_v3.c otherarch/ggml_v3.h ggml_v3_cublas.o: otherarch/ggml_v3.c otherarch/ggml_v3.h
$(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(CUBLAS_FLAGS) $(HIPFLAGS) -c $< -o $@ $(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(CUBLAS_FLAGS) $(HIPFLAGS) -c $< -o $@
ggml_v3_clblast_noavx2.o: otherarch/ggml_v3.c otherarch/ggml_v3.h ggml_v3_clblast_noavx2.o: otherarch/ggml_v3.c otherarch/ggml_v3.h
$(CC) $(FASTCFLAGS) $(SIMPLECFLAGS) $(CLBLAST_FLAGS) -c $< -o $@ $(CC) $(FASTCFLAGS) $(SIMPLERCFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
#version 2 libs #version 2 libs
ggml_v2.o: otherarch/ggml_v2.c otherarch/ggml_v2.h ggml_v2.o: otherarch/ggml_v2.c otherarch/ggml_v2.h
@ -576,7 +588,7 @@ ggml_v2_clblast.o: otherarch/ggml_v2.c otherarch/ggml_v2.h
ggml_v2_cublas.o: otherarch/ggml_v2.c otherarch/ggml_v2.h ggml_v2_cublas.o: otherarch/ggml_v2.c otherarch/ggml_v2.h
$(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(CUBLAS_FLAGS) $(HIPFLAGS) -c $< -o $@ $(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(CUBLAS_FLAGS) $(HIPFLAGS) -c $< -o $@
ggml_v2_clblast_noavx2.o: otherarch/ggml_v2.c otherarch/ggml_v2.h ggml_v2_clblast_noavx2.o: otherarch/ggml_v2.c otherarch/ggml_v2.h
$(CC) $(FASTCFLAGS) $(SIMPLECFLAGS) $(CLBLAST_FLAGS) -c $< -o $@ $(CC) $(FASTCFLAGS) $(SIMPLERCFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
#extreme old version compat #extreme old version compat
ggml_v1.o: otherarch/ggml_v1.c otherarch/ggml_v1.h ggml_v1.o: otherarch/ggml_v1.c otherarch/ggml_v1.h
@ -692,7 +704,7 @@ ifdef CLBLAST_BUILD
koboldcpp_clblast: ggml_v4_clblast.o ggml-cpu_v4_clblast.o ggml_v3_clblast.o ggml_v2_clblast.o ggml_v1.o expose.o gpttype_adapter_clblast.o ggml-opencl.o ggml_v3-opencl.o ggml_v2-opencl.o ggml_v2-opencl-legacy.o sdcpp_default.o whispercpp_default.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) $(OBJS) koboldcpp_clblast: ggml_v4_clblast.o ggml-cpu_v4_clblast.o ggml_v3_clblast.o ggml_v2_clblast.o ggml_v1.o expose.o gpttype_adapter_clblast.o ggml-opencl.o ggml_v3-opencl.o ggml_v2-opencl.o ggml_v2-opencl-legacy.o sdcpp_default.o whispercpp_default.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) $(OBJS)
$(CLBLAST_BUILD) $(CLBLAST_BUILD)
ifdef NOAVX2_BUILD ifdef NOAVX2_BUILD
koboldcpp_clblast_noavx2: ggml_v4_clblast_noavx2.o ggml-cpu_v4_clblast_noavx2.o ggml_v3_clblast_noavx2.o ggml_v2_clblast_noavx2.o ggml_v1_failsafe.o expose.o gpttype_adapter_clblast_noavx2.o ggml-opencl.o ggml_v3-opencl.o ggml_v2-opencl.o ggml_v2-opencl-legacy.o sdcpp_default.o whispercpp_default.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_SIMPLE) $(OBJS) koboldcpp_clblast_noavx2: ggml_v4_clblast_noavx2.o ggml-cpu_v4_clblast_noavx2.o ggml_v3_clblast_noavx2.o ggml_v2_clblast_noavx2.o ggml_v1_failsafe.o expose.o gpttype_adapter_clblast_noavx2.o ggml-opencl.o ggml_v3-opencl.o ggml_v2-opencl.o ggml_v2-opencl-legacy.o sdcpp_default.o whispercpp_default.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_SIMPLER) $(OBJS)
$(CLBLAST_BUILD) $(CLBLAST_BUILD)
else else
koboldcpp_clblast_noavx2: koboldcpp_clblast_noavx2:

View file

@ -99,7 +99,7 @@ class model_backend(InferenceModel):
"extra_classes": "", "extra_classes": "",
'children': [{'text': 'Use No BLAS', 'value': 0}, {'text': 'Use CuBLAS', 'value': 1}, 'children': [{'text': 'Use No BLAS', 'value': 0}, {'text': 'Use CuBLAS', 'value': 1},
{'text': 'Use CLBLast GPU #1', 'value': 2},{'text': 'Use CLBLast GPU #2', 'value': 3},{'text': 'Use CLBLast GPU #3', 'value': 4} {'text': 'Use CLBLast GPU #1', 'value': 2},{'text': 'Use CLBLast GPU #2', 'value': 3},{'text': 'Use CLBLast GPU #3', 'value': 4}
,{'text': 'NoAVX2 Mode (Old CPU)', 'value': 5},{'text': 'Failsafe Mode (Old CPU)', 'value': 6},{'text': 'Use Vulkan GPU #1', 'value': 7},{'text': 'Use Vulkan GPU #2', 'value': 8}], ,{'text': 'NoAVX2 Mode (Old CPU)', 'value': 5},{'text': 'Failsafe Mode (Older CPU)', 'value': 6},{'text': 'Use Vulkan GPU #1', 'value': 7},{'text': 'Use Vulkan GPU #2', 'value': 8}],
}) })
requested_parameters.append({ requested_parameters.append({
"uitype": "text", "uitype": "text",

View file

@ -359,9 +359,9 @@ lib_option_pairs = [
(lib_hipblas, "Use hipBLAS (ROCm)"), (lib_hipblas, "Use hipBLAS (ROCm)"),
(lib_vulkan, "Use Vulkan"), (lib_vulkan, "Use Vulkan"),
(lib_noavx2, "Use CPU (Old CPU)"), (lib_noavx2, "Use CPU (Old CPU)"),
(lib_clblast_noavx2, "Use CLBlast (Old CPU)"),
(lib_vulkan_noavx2, "Use Vulkan (Old CPU)"), (lib_vulkan_noavx2, "Use Vulkan (Old CPU)"),
(lib_failsafe, "Failsafe Mode (Old CPU)")] (lib_clblast_noavx2, "Use CLBlast (Older CPU)"),
(lib_failsafe, "Failsafe Mode (Older CPU)")]
default_option, clblast_option, cublas_option, hipblas_option, vulkan_option, noavx2_option, clblast_noavx2_option, vulkan_noavx2_option, failsafe_option = (opt if file_exists(lib) or (os.name == 'nt' and file_exists(opt + ".dll")) else None for lib, opt in lib_option_pairs) default_option, clblast_option, cublas_option, hipblas_option, vulkan_option, noavx2_option, clblast_noavx2_option, vulkan_noavx2_option, failsafe_option = (opt if file_exists(lib) or (os.name == 'nt' and file_exists(opt + ".dll")) else None for lib, opt in lib_option_pairs)
runopts = [opt for lib, opt in lib_option_pairs if file_exists(lib)] runopts = [opt for lib, opt in lib_option_pairs if file_exists(lib)]
@ -3124,7 +3124,7 @@ def show_gui():
nl = '\n' nl = '\n'
tooltxt = "Number of backends you have built and available." + (f"\n\nMissing Backends: \n\n{nl.join(antirunopts)}" if len(runopts) < 8 else "") tooltxt = "Number of backends you have built and available." + (f"\n\nMissing Backends: \n\n{nl.join(antirunopts)}" if len(runopts) < 8 else "")
num_backends_built = makelabel(parent, str(len(runopts)) + "/8", 5, 2,tooltxt) num_backends_built = makelabel(parent, str(len(runopts)) + "/8", 5, 2,tooltxt)
num_backends_built.grid(row=1, column=1, padx=195, pady=0) num_backends_built.grid(row=1, column=1, padx=205, pady=0)
num_backends_built.configure(text_color="#00ff00") num_backends_built.configure(text_color="#00ff00")
def gui_changed_modelfile(*args): def gui_changed_modelfile(*args):
@ -3143,7 +3143,7 @@ def show_gui():
predicted_gpu_layers = autoset_gpu_layers(int(contextsize_text[context_var.get()]),(sd_quant_var.get()==1),int(blasbatchsize_values[int(blas_size_var.get())])) predicted_gpu_layers = autoset_gpu_layers(int(contextsize_text[context_var.get()]),(sd_quant_var.get()==1),int(blasbatchsize_values[int(blas_size_var.get())]))
max_gpu_layers = (f"/{modelfile_extracted_meta[0][0]+3}" if (modelfile_extracted_meta and modelfile_extracted_meta[0] and modelfile_extracted_meta[0][0]!=0) else "") max_gpu_layers = (f"/{modelfile_extracted_meta[0][0]+3}" if (modelfile_extracted_meta and modelfile_extracted_meta[0] and modelfile_extracted_meta[0][0]!=0) else "")
index = runopts_var.get() index = runopts_var.get()
gpu_be = (index == "Use Vulkan" or index == "Use Vulkan (Old CPU)" or index == "Use CLBlast" or index == "Use CLBlast (Old CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)") gpu_be = (index == "Use Vulkan" or index == "Use Vulkan (Old CPU)" or index == "Use CLBlast" or index == "Use CLBlast (Older CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)")
layercounter_label.grid(row=6, column=1, padx=75, sticky="W") layercounter_label.grid(row=6, column=1, padx=75, sticky="W")
quick_layercounter_label.grid(row=6, column=1, padx=75, sticky="W") quick_layercounter_label.grid(row=6, column=1, padx=75, sticky="W")
if sys.platform=="darwin" and gpulayers_var.get()=="-1": if sys.platform=="darwin" and gpulayers_var.get()=="-1":
@ -3174,7 +3174,7 @@ def show_gui():
if v == "Use Vulkan" or v == "Use Vulkan (Old CPU)": if v == "Use Vulkan" or v == "Use Vulkan (Old CPU)":
quick_gpuname_label.configure(text=VKDevicesNames[s]) quick_gpuname_label.configure(text=VKDevicesNames[s])
gpuname_label.configure(text=VKDevicesNames[s]) gpuname_label.configure(text=VKDevicesNames[s])
elif v == "Use CLBlast" or v == "Use CLBlast (Old CPU)": elif v == "Use CLBlast" or v == "Use CLBlast (Older CPU)":
quick_gpuname_label.configure(text=CLDevicesNames[s]) quick_gpuname_label.configure(text=CLDevicesNames[s])
gpuname_label.configure(text=CLDevicesNames[s]) gpuname_label.configure(text=CLDevicesNames[s])
else: else:
@ -3231,12 +3231,12 @@ def show_gui():
global runmode_untouched global runmode_untouched
runmode_untouched = False runmode_untouched = False
index = runopts_var.get() index = runopts_var.get()
if index == "Use Vulkan" or index == "Use Vulkan (Old CPU)" or index == "Use CLBlast" or index == "Use CLBlast (Old CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)": if index == "Use Vulkan" or index == "Use Vulkan (Old CPU)" or index == "Use CLBlast" or index == "Use CLBlast (Older CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)":
quick_gpuname_label.grid(row=3, column=1, padx=75, sticky="W") quick_gpuname_label.grid(row=3, column=1, padx=75, sticky="W")
gpuname_label.grid(row=3, column=1, padx=75, sticky="W") gpuname_label.grid(row=3, column=1, padx=75, sticky="W")
gpu_selector_label.grid(row=3, column=0, padx = 8, pady=1, stick="nw") gpu_selector_label.grid(row=3, column=0, padx = 8, pady=1, stick="nw")
quick_gpu_selector_label.grid(row=3, column=0, padx = 8, pady=1, stick="nw") quick_gpu_selector_label.grid(row=3, column=0, padx = 8, pady=1, stick="nw")
if index == "Use CLBlast" or index == "Use CLBlast (Old CPU)": if index == "Use CLBlast" or index == "Use CLBlast (Older CPU)":
gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw") gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw")
quick_gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw") quick_gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw")
CUDA_gpu_selector_box.grid_remove() CUDA_gpu_selector_box.grid_remove()
@ -3280,7 +3280,7 @@ def show_gui():
else: else:
quick_use_flashattn.grid(row=22, column=1, padx=8, pady=1, stick="nw") quick_use_flashattn.grid(row=22, column=1, padx=8, pady=1, stick="nw")
if index == "Use Vulkan" or index == "Use Vulkan (Old CPU)" or index == "Use CLBlast" or index == "Use CLBlast (Old CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)": if index == "Use Vulkan" or index == "Use Vulkan (Old CPU)" or index == "Use CLBlast" or index == "Use CLBlast (Older CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)":
gpu_layers_label.grid(row=6, column=0, padx = 8, pady=1, stick="nw") gpu_layers_label.grid(row=6, column=0, padx = 8, pady=1, stick="nw")
gpu_layers_entry.grid(row=6, column=1, padx=8, pady=1, stick="nw") gpu_layers_entry.grid(row=6, column=1, padx=8, pady=1, stick="nw")
quick_gpu_layers_label.grid(row=6, column=0, padx = 8, pady=1, stick="nw") quick_gpu_layers_label.grid(row=6, column=0, padx = 8, pady=1, stick="nw")
@ -3302,7 +3302,7 @@ def show_gui():
# presets selector # presets selector
makelabel(quick_tab, "Presets:", 1,0,"Select a backend to use.\nCuBLAS runs on Nvidia GPUs, and is much faster.\nVulkan and CLBlast works on all GPUs but is somewhat slower.\nOtherwise, runs on CPU only.\nNoAVX2 and Failsafe modes support older PCs.") makelabel(quick_tab, "Presets:", 1,0,"Select a backend to use.\nCuBLAS runs on Nvidia GPUs, and is much faster.\nVulkan and CLBlast works on all GPUs but is somewhat slower.\nOtherwise, runs on CPU only.\nNoAVX2 and Failsafe modes support older PCs.")
runoptbox = ctk.CTkComboBox(quick_tab, values=runopts, width=180,variable=runopts_var, state="readonly") runoptbox = ctk.CTkComboBox(quick_tab, values=runopts, width=190,variable=runopts_var, state="readonly")
runoptbox.grid(row=1, column=1,padx=8, stick="nw") runoptbox.grid(row=1, column=1,padx=8, stick="nw")
runoptbox.set(runopts[0]) # Set to first available option runoptbox.set(runopts[0]) # Set to first available option
@ -3636,9 +3636,9 @@ def show_gui():
args.noavx2 = False args.noavx2 = False
if gpu_choice_var.get()!="All": if gpu_choice_var.get()!="All":
gpuchoiceidx = int(gpu_choice_var.get())-1 gpuchoiceidx = int(gpu_choice_var.get())-1
if runopts_var.get() == "Use CLBlast" or runopts_var.get() == "Use CLBlast (Old CPU)": if runopts_var.get() == "Use CLBlast" or runopts_var.get() == "Use CLBlast (Older CPU)":
args.useclblast = [[0,0], [1,0], [0,1], [1,1]][gpuchoiceidx] args.useclblast = [[0,0], [1,0], [0,1], [1,1]][gpuchoiceidx]
if runopts_var.get() == "Use CLBlast (Old CPU)": if runopts_var.get() == "Use CLBlast (Older CPU)":
args.noavx2 = True args.noavx2 = True
if runopts_var.get() == "Use CuBLAS" or runopts_var.get() == "Use hipBLAS (ROCm)": if runopts_var.get() == "Use CuBLAS" or runopts_var.get() == "Use hipBLAS (ROCm)":
if gpu_choice_var.get()=="All": if gpu_choice_var.get()=="All":
@ -3664,7 +3664,7 @@ def show_gui():
args.usecpu = True args.usecpu = True
if runopts_var.get()=="Use CPU (Old CPU)": if runopts_var.get()=="Use CPU (Old CPU)":
args.noavx2 = True args.noavx2 = True
if runopts_var.get()=="Failsafe Mode (Old CPU)": if runopts_var.get()=="Failsafe Mode (Older CPU)":
args.noavx2 = True args.noavx2 = True
args.usecpu = True args.usecpu = True
args.nommap = True args.nommap = True
@ -4517,6 +4517,9 @@ def main(launch_args,start_server=True):
global libname, args, friendlymodelname, friendlysdmodelname, fullsdmodelpath, mmprojpath, password, fullwhispermodelpath global libname, args, friendlymodelname, friendlysdmodelname, fullsdmodelpath, mmprojpath, password, fullwhispermodelpath
args = launch_args args = launch_args
if (args.version) and len(sys.argv) <= 2:
print(f"{KcppVersion}") # just print version and exit
return
if (args.model_param or args.model) and args.prompt and not args.benchmark and not (args.debugmode >= 1): if (args.model_param or args.model) and args.prompt and not args.benchmark and not (args.debugmode >= 1):
suppress_stdout() suppress_stdout()
@ -5164,6 +5167,7 @@ if __name__ == '__main__':
#more advanced params #more advanced params
advparser = parser.add_argument_group('Advanced Commands') advparser = parser.add_argument_group('Advanced Commands')
advparser.add_argument("--version", help="Prints version and exits.", action='store_true')
advparser.add_argument("--ropeconfig", help="If set, uses customized RoPE scaling from configured frequency scale and frequency base (e.g. --ropeconfig 0.25 10000). Otherwise, uses NTK-Aware scaling set automatically based on context size. For linear rope, simply set the freq-scale and ignore the freq-base",metavar=('[rope-freq-scale]', '[rope-freq-base]'), default=[0.0, 10000.0], type=float, nargs='+') advparser.add_argument("--ropeconfig", help="If set, uses customized RoPE scaling from configured frequency scale and frequency base (e.g. --ropeconfig 0.25 10000). Otherwise, uses NTK-Aware scaling set automatically based on context size. For linear rope, simply set the freq-scale and ignore the freq-base",metavar=('[rope-freq-scale]', '[rope-freq-base]'), default=[0.0, 10000.0], type=float, nargs='+')
advparser.add_argument("--blasbatchsize", help="Sets the batch size used in BLAS processing (default 512). Setting it to -1 disables BLAS mode, but keeps other benefits like GPU offload.", type=int,choices=[-1,32,64,128,256,512,1024,2048], default=512) advparser.add_argument("--blasbatchsize", help="Sets the batch size used in BLAS processing (default 512). Setting it to -1 disables BLAS mode, but keeps other benefits like GPU offload.", type=int,choices=[-1,32,64,128,256,512,1024,2048], default=512)
advparser.add_argument("--blasthreads", help="Use a different number of threads during BLAS if specified. Otherwise, has the same value as --threads",metavar=('[threads]'), type=int, default=0) advparser.add_argument("--blasthreads", help="Use a different number of threads during BLAS if specified. Otherwise, has the same value as --threads",metavar=('[threads]'), type=int, default=0)

View file

@ -15,7 +15,7 @@ VSVersionInfo(
StringTable( StringTable(
u'040904b0', u'040904b0',
[ [
StringStruct(u'CompanyName', u'Your Company Name'), StringStruct(u'CompanyName', u'KoboldCpp'),
StringStruct(u'FileDescription', u'KoboldCpp'), StringStruct(u'FileDescription', u'KoboldCpp'),
StringStruct(u'InternalName', u'KoboldCpp'), StringStruct(u'InternalName', u'KoboldCpp'),
StringStruct(u'LegalCopyright', u'AGPLv3'), StringStruct(u'LegalCopyright', u'AGPLv3'),

View file

@ -15,7 +15,7 @@ VSVersionInfo(
StringTable( StringTable(
u'040904b0', u'040904b0',
[ [
StringStruct(u'CompanyName', u'Your Company Name'), StringStruct(u'CompanyName', u'KoboldCpp'),
StringStruct(u'FileDescription', u'KoboldCpp'), StringStruct(u'FileDescription', u'KoboldCpp'),
StringStruct(u'InternalName', u'KoboldCpp'), StringStruct(u'InternalName', u'KoboldCpp'),
StringStruct(u'LegalCopyright', u'AGPLv3'), StringStruct(u'LegalCopyright', u'AGPLv3'),