mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-12 01:54:37 +00:00
sse3 mode for noavx2 clblast, fixed metadata, added version command
This commit is contained in:
parent
7b25b6171c
commit
58791612d2
5 changed files with 36 additions and 20 deletions
22
Makefile
22
Makefile
|
@ -75,6 +75,7 @@ FASTCXXFLAGS = $(subst -O3,-Ofast,$(CXXFLAGS))
|
||||||
|
|
||||||
# these are used on windows, to build some libraries with extra old device compatibility
|
# these are used on windows, to build some libraries with extra old device compatibility
|
||||||
SIMPLECFLAGS =
|
SIMPLECFLAGS =
|
||||||
|
SIMPLERCFLAGS =
|
||||||
FULLCFLAGS =
|
FULLCFLAGS =
|
||||||
NONECFLAGS =
|
NONECFLAGS =
|
||||||
|
|
||||||
|
@ -91,6 +92,7 @@ CUBLAS_OBJS =
|
||||||
|
|
||||||
OBJS_FULL += ggml-alloc.o ggml-cpu-traits.o ggml-quants.o ggml-cpu-quants.o ggml-cpu-aarch64.o unicode.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o sgemm.o common.o sampling.o
|
OBJS_FULL += ggml-alloc.o ggml-cpu-traits.o ggml-quants.o ggml-cpu-quants.o ggml-cpu-aarch64.o unicode.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o sgemm.o common.o sampling.o
|
||||||
OBJS_SIMPLE += ggml-alloc.o ggml-cpu-traits.o ggml-quants_noavx2.o ggml-cpu-quants_noavx2.o ggml-cpu-aarch64_noavx2.o unicode.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o sgemm_noavx2.o common.o sampling.o
|
OBJS_SIMPLE += ggml-alloc.o ggml-cpu-traits.o ggml-quants_noavx2.o ggml-cpu-quants_noavx2.o ggml-cpu-aarch64_noavx2.o unicode.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o sgemm_noavx2.o common.o sampling.o
|
||||||
|
OBJS_SIMPLER += ggml-alloc.o ggml-cpu-traits.o ggml-quants_noavx1.o ggml-cpu-quants_noavx1.o ggml-cpu-aarch64_noavx1.o unicode.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o sgemm_noavx1.o common.o sampling.o
|
||||||
OBJS_FAILSAFE += ggml-alloc.o ggml-cpu-traits.o ggml-quants_failsafe.o ggml-cpu-quants_failsafe.o ggml-cpu-aarch64_failsafe.o unicode.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o sgemm_failsafe.o common.o sampling.o
|
OBJS_FAILSAFE += ggml-alloc.o ggml-cpu-traits.o ggml-quants_failsafe.o ggml-cpu-quants_failsafe.o ggml-cpu-aarch64_failsafe.o unicode.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o sgemm_failsafe.o common.o sampling.o
|
||||||
|
|
||||||
# OS specific
|
# OS specific
|
||||||
|
@ -148,6 +150,7 @@ ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686 amd64))
|
||||||
CFLAGS +=
|
CFLAGS +=
|
||||||
NONECFLAGS +=
|
NONECFLAGS +=
|
||||||
SIMPLECFLAGS += -mavx -msse3
|
SIMPLECFLAGS += -mavx -msse3
|
||||||
|
SIMPLERCFLAGS += -mavx
|
||||||
ifdef LLAMA_NOAVX2
|
ifdef LLAMA_NOAVX2
|
||||||
FULLCFLAGS += -msse3 -mavx
|
FULLCFLAGS += -msse3 -mavx
|
||||||
else
|
else
|
||||||
|
@ -161,6 +164,7 @@ ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686 amd64))
|
||||||
CFLAGS +=
|
CFLAGS +=
|
||||||
NONECFLAGS +=
|
NONECFLAGS +=
|
||||||
SIMPLECFLAGS += -mavx -msse3
|
SIMPLECFLAGS += -mavx -msse3
|
||||||
|
SIMPLERCFLAGS += -mavx
|
||||||
ifdef LLAMA_NOAVX2
|
ifdef LLAMA_NOAVX2
|
||||||
FULLCFLAGS += -msse3 -mavx
|
FULLCFLAGS += -msse3 -mavx
|
||||||
else
|
else
|
||||||
|
@ -462,7 +466,7 @@ ggml_v4_clblast.o: ggml/src/ggml.c ggml/include/ggml.h
|
||||||
ggml_v4_cublas.o: ggml/src/ggml.c ggml/include/ggml.h
|
ggml_v4_cublas.o: ggml/src/ggml.c ggml/include/ggml.h
|
||||||
$(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(CUBLAS_FLAGS) $(HIPFLAGS) -c $< -o $@
|
$(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(CUBLAS_FLAGS) $(HIPFLAGS) -c $< -o $@
|
||||||
ggml_v4_clblast_noavx2.o: ggml/src/ggml.c ggml/include/ggml.h
|
ggml_v4_clblast_noavx2.o: ggml/src/ggml.c ggml/include/ggml.h
|
||||||
$(CC) $(FASTCFLAGS) $(SIMPLECFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
|
$(CC) $(FASTCFLAGS) $(SIMPLERCFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
|
||||||
ggml_v4_vulkan.o: ggml/src/ggml.c ggml/include/ggml.h
|
ggml_v4_vulkan.o: ggml/src/ggml.c ggml/include/ggml.h
|
||||||
$(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(VULKAN_FLAGS) -c $< -o $@
|
$(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(VULKAN_FLAGS) -c $< -o $@
|
||||||
ggml_v4_vulkan_noavx2.o: ggml/src/ggml.c ggml/include/ggml.h
|
ggml_v4_vulkan_noavx2.o: ggml/src/ggml.c ggml/include/ggml.h
|
||||||
|
@ -478,19 +482,23 @@ ggml-cpu_v4_noavx2.o: ggml/src/ggml-cpu/ggml-cpu.c ggml/include/ggml-cpu.h
|
||||||
ggml-cpu_v4_clblast.o: ggml/src/ggml-cpu/ggml-cpu.c ggml/include/ggml-cpu.h
|
ggml-cpu_v4_clblast.o: ggml/src/ggml-cpu/ggml-cpu.c ggml/include/ggml-cpu.h
|
||||||
$(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
|
$(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
|
||||||
ggml-cpu_v4_clblast_noavx2.o: ggml/src/ggml-cpu/ggml-cpu.c ggml/include/ggml-cpu.h
|
ggml-cpu_v4_clblast_noavx2.o: ggml/src/ggml-cpu/ggml-cpu.c ggml/include/ggml-cpu.h
|
||||||
$(CC) $(FASTCFLAGS) $(SIMPLECFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
|
$(CC) $(FASTCFLAGS) $(SIMPLERCFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
|
||||||
|
|
||||||
#quants
|
#quants
|
||||||
ggml-quants.o: ggml/src/ggml-quants.c ggml/include/ggml.h ggml/src/ggml-quants.h ggml/src/ggml-common.h
|
ggml-quants.o: ggml/src/ggml-quants.c ggml/include/ggml.h ggml/src/ggml-quants.h ggml/src/ggml-common.h
|
||||||
$(CC) $(CFLAGS) $(FULLCFLAGS) -c $< -o $@
|
$(CC) $(CFLAGS) $(FULLCFLAGS) -c $< -o $@
|
||||||
ggml-quants_noavx2.o: ggml/src/ggml-quants.c ggml/include/ggml.h ggml/src/ggml-quants.h ggml/src/ggml-common.h
|
ggml-quants_noavx2.o: ggml/src/ggml-quants.c ggml/include/ggml.h ggml/src/ggml-quants.h ggml/src/ggml-common.h
|
||||||
$(CC) $(CFLAGS) $(SIMPLECFLAGS) -c $< -o $@
|
$(CC) $(CFLAGS) $(SIMPLECFLAGS) -c $< -o $@
|
||||||
|
ggml-quants_noavx1.o: ggml/src/ggml-quants.c ggml/include/ggml.h ggml/src/ggml-quants.h ggml/src/ggml-common.h
|
||||||
|
$(CC) $(CFLAGS) $(SIMPLERCFLAGS) -c $< -o $@
|
||||||
ggml-quants_failsafe.o: ggml/src/ggml-quants.c ggml/include/ggml.h ggml/src/ggml-quants.h ggml/src/ggml-common.h
|
ggml-quants_failsafe.o: ggml/src/ggml-quants.c ggml/include/ggml.h ggml/src/ggml-quants.h ggml/src/ggml-common.h
|
||||||
$(CC) $(CFLAGS) $(NONECFLAGS) -c $< -o $@
|
$(CC) $(CFLAGS) $(NONECFLAGS) -c $< -o $@
|
||||||
ggml-cpu-quants.o: ggml/src/ggml-cpu/ggml-cpu-quants.c ggml/include/ggml.h ggml/src/ggml-cpu/ggml-cpu-quants.h ggml/src/ggml-common.h
|
ggml-cpu-quants.o: ggml/src/ggml-cpu/ggml-cpu-quants.c ggml/include/ggml.h ggml/src/ggml-cpu/ggml-cpu-quants.h ggml/src/ggml-common.h
|
||||||
$(CC) $(CFLAGS) $(FULLCFLAGS) -c $< -o $@
|
$(CC) $(CFLAGS) $(FULLCFLAGS) -c $< -o $@
|
||||||
ggml-cpu-quants_noavx2.o: ggml/src/ggml-cpu/ggml-cpu-quants.c ggml/include/ggml.h ggml/src/ggml-cpu/ggml-cpu-quants.h ggml/src/ggml-common.h
|
ggml-cpu-quants_noavx2.o: ggml/src/ggml-cpu/ggml-cpu-quants.c ggml/include/ggml.h ggml/src/ggml-cpu/ggml-cpu-quants.h ggml/src/ggml-common.h
|
||||||
$(CC) $(CFLAGS) $(SIMPLECFLAGS) -c $< -o $@
|
$(CC) $(CFLAGS) $(SIMPLECFLAGS) -c $< -o $@
|
||||||
|
ggml-cpu-quants_noavx1.o: ggml/src/ggml-cpu/ggml-cpu-quants.c ggml/include/ggml.h ggml/src/ggml-cpu/ggml-cpu-quants.h ggml/src/ggml-common.h
|
||||||
|
$(CC) $(CFLAGS) $(SIMPLERCFLAGS) -c $< -o $@
|
||||||
ggml-cpu-quants_failsafe.o: ggml/src/ggml-cpu/ggml-cpu-quants.c ggml/include/ggml.h ggml/src/ggml-cpu/ggml-cpu-quants.h ggml/src/ggml-common.h
|
ggml-cpu-quants_failsafe.o: ggml/src/ggml-cpu/ggml-cpu-quants.c ggml/include/ggml.h ggml/src/ggml-cpu/ggml-cpu-quants.h ggml/src/ggml-common.h
|
||||||
$(CC) $(CFLAGS) $(NONECFLAGS) -c $< -o $@
|
$(CC) $(CFLAGS) $(NONECFLAGS) -c $< -o $@
|
||||||
|
|
||||||
|
@ -499,6 +507,8 @@ ggml-cpu-aarch64.o: ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp ggml/include/ggml.h g
|
||||||
$(CXX) $(CXXFLAGS) $(FULLCFLAGS) -c $< -o $@
|
$(CXX) $(CXXFLAGS) $(FULLCFLAGS) -c $< -o $@
|
||||||
ggml-cpu-aarch64_noavx2.o: ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp ggml/include/ggml.h ggml/src/ggml-cpu/ggml-cpu-aarch64.h
|
ggml-cpu-aarch64_noavx2.o: ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp ggml/include/ggml.h ggml/src/ggml-cpu/ggml-cpu-aarch64.h
|
||||||
$(CXX) $(CXXFLAGS) $(SIMPLECFLAGS) -c $< -o $@
|
$(CXX) $(CXXFLAGS) $(SIMPLECFLAGS) -c $< -o $@
|
||||||
|
ggml-cpu-aarch64_noavx1.o: ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp ggml/include/ggml.h ggml/src/ggml-cpu/ggml-cpu-aarch64.h
|
||||||
|
$(CXX) $(CXXFLAGS) $(SIMPLERCFLAGS) -c $< -o $@
|
||||||
ggml-cpu-aarch64_failsafe.o: ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp ggml/include/ggml.h ggml/src/ggml-cpu/ggml-cpu-aarch64.h
|
ggml-cpu-aarch64_failsafe.o: ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp ggml/include/ggml.h ggml/src/ggml-cpu/ggml-cpu-aarch64.h
|
||||||
$(CXX) $(CXXFLAGS) $(NONECFLAGS) -c $< -o $@
|
$(CXX) $(CXXFLAGS) $(NONECFLAGS) -c $< -o $@
|
||||||
|
|
||||||
|
@ -507,6 +517,8 @@ sgemm.o: ggml/src/ggml-cpu/llamafile/sgemm.cpp ggml/src/ggml-cpu/llamafile/sgemm
|
||||||
$(CXX) $(CXXFLAGS) $(FULLCFLAGS) -c $< -o $@
|
$(CXX) $(CXXFLAGS) $(FULLCFLAGS) -c $< -o $@
|
||||||
sgemm_noavx2.o: ggml/src/ggml-cpu/llamafile/sgemm.cpp ggml/src/ggml-cpu/llamafile/sgemm.h ggml/include/ggml.h
|
sgemm_noavx2.o: ggml/src/ggml-cpu/llamafile/sgemm.cpp ggml/src/ggml-cpu/llamafile/sgemm.h ggml/include/ggml.h
|
||||||
$(CXX) $(CXXFLAGS) $(SIMPLECFLAGS) -c $< -o $@
|
$(CXX) $(CXXFLAGS) $(SIMPLECFLAGS) -c $< -o $@
|
||||||
|
sgemm_noavx1.o: ggml/src/ggml-cpu/llamafile/sgemm.cpp ggml/src/ggml-cpu/llamafile/sgemm.h ggml/include/ggml.h
|
||||||
|
$(CXX) $(CXXFLAGS) $(SIMPLERCFLAGS) -c $< -o $@
|
||||||
sgemm_failsafe.o: ggml/src/ggml-cpu/llamafile/sgemm.cpp ggml/src/ggml-cpu/llamafile/sgemm.h ggml/include/ggml.h
|
sgemm_failsafe.o: ggml/src/ggml-cpu/llamafile/sgemm.cpp ggml/src/ggml-cpu/llamafile/sgemm.h ggml/include/ggml.h
|
||||||
$(CXX) $(CXXFLAGS) $(NONECFLAGS) -c $< -o $@
|
$(CXX) $(CXXFLAGS) $(NONECFLAGS) -c $< -o $@
|
||||||
|
|
||||||
|
@ -562,7 +574,7 @@ ggml_v3_clblast.o: otherarch/ggml_v3.c otherarch/ggml_v3.h
|
||||||
ggml_v3_cublas.o: otherarch/ggml_v3.c otherarch/ggml_v3.h
|
ggml_v3_cublas.o: otherarch/ggml_v3.c otherarch/ggml_v3.h
|
||||||
$(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(CUBLAS_FLAGS) $(HIPFLAGS) -c $< -o $@
|
$(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(CUBLAS_FLAGS) $(HIPFLAGS) -c $< -o $@
|
||||||
ggml_v3_clblast_noavx2.o: otherarch/ggml_v3.c otherarch/ggml_v3.h
|
ggml_v3_clblast_noavx2.o: otherarch/ggml_v3.c otherarch/ggml_v3.h
|
||||||
$(CC) $(FASTCFLAGS) $(SIMPLECFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
|
$(CC) $(FASTCFLAGS) $(SIMPLERCFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
|
||||||
|
|
||||||
#version 2 libs
|
#version 2 libs
|
||||||
ggml_v2.o: otherarch/ggml_v2.c otherarch/ggml_v2.h
|
ggml_v2.o: otherarch/ggml_v2.c otherarch/ggml_v2.h
|
||||||
|
@ -576,7 +588,7 @@ ggml_v2_clblast.o: otherarch/ggml_v2.c otherarch/ggml_v2.h
|
||||||
ggml_v2_cublas.o: otherarch/ggml_v2.c otherarch/ggml_v2.h
|
ggml_v2_cublas.o: otherarch/ggml_v2.c otherarch/ggml_v2.h
|
||||||
$(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(CUBLAS_FLAGS) $(HIPFLAGS) -c $< -o $@
|
$(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(CUBLAS_FLAGS) $(HIPFLAGS) -c $< -o $@
|
||||||
ggml_v2_clblast_noavx2.o: otherarch/ggml_v2.c otherarch/ggml_v2.h
|
ggml_v2_clblast_noavx2.o: otherarch/ggml_v2.c otherarch/ggml_v2.h
|
||||||
$(CC) $(FASTCFLAGS) $(SIMPLECFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
|
$(CC) $(FASTCFLAGS) $(SIMPLERCFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
|
||||||
|
|
||||||
#extreme old version compat
|
#extreme old version compat
|
||||||
ggml_v1.o: otherarch/ggml_v1.c otherarch/ggml_v1.h
|
ggml_v1.o: otherarch/ggml_v1.c otherarch/ggml_v1.h
|
||||||
|
@ -692,7 +704,7 @@ ifdef CLBLAST_BUILD
|
||||||
koboldcpp_clblast: ggml_v4_clblast.o ggml-cpu_v4_clblast.o ggml_v3_clblast.o ggml_v2_clblast.o ggml_v1.o expose.o gpttype_adapter_clblast.o ggml-opencl.o ggml_v3-opencl.o ggml_v2-opencl.o ggml_v2-opencl-legacy.o sdcpp_default.o whispercpp_default.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) $(OBJS)
|
koboldcpp_clblast: ggml_v4_clblast.o ggml-cpu_v4_clblast.o ggml_v3_clblast.o ggml_v2_clblast.o ggml_v1.o expose.o gpttype_adapter_clblast.o ggml-opencl.o ggml_v3-opencl.o ggml_v2-opencl.o ggml_v2-opencl-legacy.o sdcpp_default.o whispercpp_default.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) $(OBJS)
|
||||||
$(CLBLAST_BUILD)
|
$(CLBLAST_BUILD)
|
||||||
ifdef NOAVX2_BUILD
|
ifdef NOAVX2_BUILD
|
||||||
koboldcpp_clblast_noavx2: ggml_v4_clblast_noavx2.o ggml-cpu_v4_clblast_noavx2.o ggml_v3_clblast_noavx2.o ggml_v2_clblast_noavx2.o ggml_v1_failsafe.o expose.o gpttype_adapter_clblast_noavx2.o ggml-opencl.o ggml_v3-opencl.o ggml_v2-opencl.o ggml_v2-opencl-legacy.o sdcpp_default.o whispercpp_default.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_SIMPLE) $(OBJS)
|
koboldcpp_clblast_noavx2: ggml_v4_clblast_noavx2.o ggml-cpu_v4_clblast_noavx2.o ggml_v3_clblast_noavx2.o ggml_v2_clblast_noavx2.o ggml_v1_failsafe.o expose.o gpttype_adapter_clblast_noavx2.o ggml-opencl.o ggml_v3-opencl.o ggml_v2-opencl.o ggml_v2-opencl-legacy.o sdcpp_default.o whispercpp_default.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_SIMPLER) $(OBJS)
|
||||||
$(CLBLAST_BUILD)
|
$(CLBLAST_BUILD)
|
||||||
else
|
else
|
||||||
koboldcpp_clblast_noavx2:
|
koboldcpp_clblast_noavx2:
|
||||||
|
|
2
class.py
2
class.py
|
@ -99,7 +99,7 @@ class model_backend(InferenceModel):
|
||||||
"extra_classes": "",
|
"extra_classes": "",
|
||||||
'children': [{'text': 'Use No BLAS', 'value': 0}, {'text': 'Use CuBLAS', 'value': 1},
|
'children': [{'text': 'Use No BLAS', 'value': 0}, {'text': 'Use CuBLAS', 'value': 1},
|
||||||
{'text': 'Use CLBLast GPU #1', 'value': 2},{'text': 'Use CLBLast GPU #2', 'value': 3},{'text': 'Use CLBLast GPU #3', 'value': 4}
|
{'text': 'Use CLBLast GPU #1', 'value': 2},{'text': 'Use CLBLast GPU #2', 'value': 3},{'text': 'Use CLBLast GPU #3', 'value': 4}
|
||||||
,{'text': 'NoAVX2 Mode (Old CPU)', 'value': 5},{'text': 'Failsafe Mode (Old CPU)', 'value': 6},{'text': 'Use Vulkan GPU #1', 'value': 7},{'text': 'Use Vulkan GPU #2', 'value': 8}],
|
,{'text': 'NoAVX2 Mode (Old CPU)', 'value': 5},{'text': 'Failsafe Mode (Older CPU)', 'value': 6},{'text': 'Use Vulkan GPU #1', 'value': 7},{'text': 'Use Vulkan GPU #2', 'value': 8}],
|
||||||
})
|
})
|
||||||
requested_parameters.append({
|
requested_parameters.append({
|
||||||
"uitype": "text",
|
"uitype": "text",
|
||||||
|
|
28
koboldcpp.py
28
koboldcpp.py
|
@ -359,9 +359,9 @@ lib_option_pairs = [
|
||||||
(lib_hipblas, "Use hipBLAS (ROCm)"),
|
(lib_hipblas, "Use hipBLAS (ROCm)"),
|
||||||
(lib_vulkan, "Use Vulkan"),
|
(lib_vulkan, "Use Vulkan"),
|
||||||
(lib_noavx2, "Use CPU (Old CPU)"),
|
(lib_noavx2, "Use CPU (Old CPU)"),
|
||||||
(lib_clblast_noavx2, "Use CLBlast (Old CPU)"),
|
|
||||||
(lib_vulkan_noavx2, "Use Vulkan (Old CPU)"),
|
(lib_vulkan_noavx2, "Use Vulkan (Old CPU)"),
|
||||||
(lib_failsafe, "Failsafe Mode (Old CPU)")]
|
(lib_clblast_noavx2, "Use CLBlast (Older CPU)"),
|
||||||
|
(lib_failsafe, "Failsafe Mode (Older CPU)")]
|
||||||
default_option, clblast_option, cublas_option, hipblas_option, vulkan_option, noavx2_option, clblast_noavx2_option, vulkan_noavx2_option, failsafe_option = (opt if file_exists(lib) or (os.name == 'nt' and file_exists(opt + ".dll")) else None for lib, opt in lib_option_pairs)
|
default_option, clblast_option, cublas_option, hipblas_option, vulkan_option, noavx2_option, clblast_noavx2_option, vulkan_noavx2_option, failsafe_option = (opt if file_exists(lib) or (os.name == 'nt' and file_exists(opt + ".dll")) else None for lib, opt in lib_option_pairs)
|
||||||
runopts = [opt for lib, opt in lib_option_pairs if file_exists(lib)]
|
runopts = [opt for lib, opt in lib_option_pairs if file_exists(lib)]
|
||||||
|
|
||||||
|
@ -3124,7 +3124,7 @@ def show_gui():
|
||||||
nl = '\n'
|
nl = '\n'
|
||||||
tooltxt = "Number of backends you have built and available." + (f"\n\nMissing Backends: \n\n{nl.join(antirunopts)}" if len(runopts) < 8 else "")
|
tooltxt = "Number of backends you have built and available." + (f"\n\nMissing Backends: \n\n{nl.join(antirunopts)}" if len(runopts) < 8 else "")
|
||||||
num_backends_built = makelabel(parent, str(len(runopts)) + "/8", 5, 2,tooltxt)
|
num_backends_built = makelabel(parent, str(len(runopts)) + "/8", 5, 2,tooltxt)
|
||||||
num_backends_built.grid(row=1, column=1, padx=195, pady=0)
|
num_backends_built.grid(row=1, column=1, padx=205, pady=0)
|
||||||
num_backends_built.configure(text_color="#00ff00")
|
num_backends_built.configure(text_color="#00ff00")
|
||||||
|
|
||||||
def gui_changed_modelfile(*args):
|
def gui_changed_modelfile(*args):
|
||||||
|
@ -3143,7 +3143,7 @@ def show_gui():
|
||||||
predicted_gpu_layers = autoset_gpu_layers(int(contextsize_text[context_var.get()]),(sd_quant_var.get()==1),int(blasbatchsize_values[int(blas_size_var.get())]))
|
predicted_gpu_layers = autoset_gpu_layers(int(contextsize_text[context_var.get()]),(sd_quant_var.get()==1),int(blasbatchsize_values[int(blas_size_var.get())]))
|
||||||
max_gpu_layers = (f"/{modelfile_extracted_meta[0][0]+3}" if (modelfile_extracted_meta and modelfile_extracted_meta[0] and modelfile_extracted_meta[0][0]!=0) else "")
|
max_gpu_layers = (f"/{modelfile_extracted_meta[0][0]+3}" if (modelfile_extracted_meta and modelfile_extracted_meta[0] and modelfile_extracted_meta[0][0]!=0) else "")
|
||||||
index = runopts_var.get()
|
index = runopts_var.get()
|
||||||
gpu_be = (index == "Use Vulkan" or index == "Use Vulkan (Old CPU)" or index == "Use CLBlast" or index == "Use CLBlast (Old CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)")
|
gpu_be = (index == "Use Vulkan" or index == "Use Vulkan (Old CPU)" or index == "Use CLBlast" or index == "Use CLBlast (Older CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)")
|
||||||
layercounter_label.grid(row=6, column=1, padx=75, sticky="W")
|
layercounter_label.grid(row=6, column=1, padx=75, sticky="W")
|
||||||
quick_layercounter_label.grid(row=6, column=1, padx=75, sticky="W")
|
quick_layercounter_label.grid(row=6, column=1, padx=75, sticky="W")
|
||||||
if sys.platform=="darwin" and gpulayers_var.get()=="-1":
|
if sys.platform=="darwin" and gpulayers_var.get()=="-1":
|
||||||
|
@ -3174,7 +3174,7 @@ def show_gui():
|
||||||
if v == "Use Vulkan" or v == "Use Vulkan (Old CPU)":
|
if v == "Use Vulkan" or v == "Use Vulkan (Old CPU)":
|
||||||
quick_gpuname_label.configure(text=VKDevicesNames[s])
|
quick_gpuname_label.configure(text=VKDevicesNames[s])
|
||||||
gpuname_label.configure(text=VKDevicesNames[s])
|
gpuname_label.configure(text=VKDevicesNames[s])
|
||||||
elif v == "Use CLBlast" or v == "Use CLBlast (Old CPU)":
|
elif v == "Use CLBlast" or v == "Use CLBlast (Older CPU)":
|
||||||
quick_gpuname_label.configure(text=CLDevicesNames[s])
|
quick_gpuname_label.configure(text=CLDevicesNames[s])
|
||||||
gpuname_label.configure(text=CLDevicesNames[s])
|
gpuname_label.configure(text=CLDevicesNames[s])
|
||||||
else:
|
else:
|
||||||
|
@ -3231,12 +3231,12 @@ def show_gui():
|
||||||
global runmode_untouched
|
global runmode_untouched
|
||||||
runmode_untouched = False
|
runmode_untouched = False
|
||||||
index = runopts_var.get()
|
index = runopts_var.get()
|
||||||
if index == "Use Vulkan" or index == "Use Vulkan (Old CPU)" or index == "Use CLBlast" or index == "Use CLBlast (Old CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)":
|
if index == "Use Vulkan" or index == "Use Vulkan (Old CPU)" or index == "Use CLBlast" or index == "Use CLBlast (Older CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)":
|
||||||
quick_gpuname_label.grid(row=3, column=1, padx=75, sticky="W")
|
quick_gpuname_label.grid(row=3, column=1, padx=75, sticky="W")
|
||||||
gpuname_label.grid(row=3, column=1, padx=75, sticky="W")
|
gpuname_label.grid(row=3, column=1, padx=75, sticky="W")
|
||||||
gpu_selector_label.grid(row=3, column=0, padx = 8, pady=1, stick="nw")
|
gpu_selector_label.grid(row=3, column=0, padx = 8, pady=1, stick="nw")
|
||||||
quick_gpu_selector_label.grid(row=3, column=0, padx = 8, pady=1, stick="nw")
|
quick_gpu_selector_label.grid(row=3, column=0, padx = 8, pady=1, stick="nw")
|
||||||
if index == "Use CLBlast" or index == "Use CLBlast (Old CPU)":
|
if index == "Use CLBlast" or index == "Use CLBlast (Older CPU)":
|
||||||
gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw")
|
gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw")
|
||||||
quick_gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw")
|
quick_gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw")
|
||||||
CUDA_gpu_selector_box.grid_remove()
|
CUDA_gpu_selector_box.grid_remove()
|
||||||
|
@ -3280,7 +3280,7 @@ def show_gui():
|
||||||
else:
|
else:
|
||||||
quick_use_flashattn.grid(row=22, column=1, padx=8, pady=1, stick="nw")
|
quick_use_flashattn.grid(row=22, column=1, padx=8, pady=1, stick="nw")
|
||||||
|
|
||||||
if index == "Use Vulkan" or index == "Use Vulkan (Old CPU)" or index == "Use CLBlast" or index == "Use CLBlast (Old CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)":
|
if index == "Use Vulkan" or index == "Use Vulkan (Old CPU)" or index == "Use CLBlast" or index == "Use CLBlast (Older CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)":
|
||||||
gpu_layers_label.grid(row=6, column=0, padx = 8, pady=1, stick="nw")
|
gpu_layers_label.grid(row=6, column=0, padx = 8, pady=1, stick="nw")
|
||||||
gpu_layers_entry.grid(row=6, column=1, padx=8, pady=1, stick="nw")
|
gpu_layers_entry.grid(row=6, column=1, padx=8, pady=1, stick="nw")
|
||||||
quick_gpu_layers_label.grid(row=6, column=0, padx = 8, pady=1, stick="nw")
|
quick_gpu_layers_label.grid(row=6, column=0, padx = 8, pady=1, stick="nw")
|
||||||
|
@ -3302,7 +3302,7 @@ def show_gui():
|
||||||
# presets selector
|
# presets selector
|
||||||
makelabel(quick_tab, "Presets:", 1,0,"Select a backend to use.\nCuBLAS runs on Nvidia GPUs, and is much faster.\nVulkan and CLBlast works on all GPUs but is somewhat slower.\nOtherwise, runs on CPU only.\nNoAVX2 and Failsafe modes support older PCs.")
|
makelabel(quick_tab, "Presets:", 1,0,"Select a backend to use.\nCuBLAS runs on Nvidia GPUs, and is much faster.\nVulkan and CLBlast works on all GPUs but is somewhat slower.\nOtherwise, runs on CPU only.\nNoAVX2 and Failsafe modes support older PCs.")
|
||||||
|
|
||||||
runoptbox = ctk.CTkComboBox(quick_tab, values=runopts, width=180,variable=runopts_var, state="readonly")
|
runoptbox = ctk.CTkComboBox(quick_tab, values=runopts, width=190,variable=runopts_var, state="readonly")
|
||||||
runoptbox.grid(row=1, column=1,padx=8, stick="nw")
|
runoptbox.grid(row=1, column=1,padx=8, stick="nw")
|
||||||
runoptbox.set(runopts[0]) # Set to first available option
|
runoptbox.set(runopts[0]) # Set to first available option
|
||||||
|
|
||||||
|
@ -3636,9 +3636,9 @@ def show_gui():
|
||||||
args.noavx2 = False
|
args.noavx2 = False
|
||||||
if gpu_choice_var.get()!="All":
|
if gpu_choice_var.get()!="All":
|
||||||
gpuchoiceidx = int(gpu_choice_var.get())-1
|
gpuchoiceidx = int(gpu_choice_var.get())-1
|
||||||
if runopts_var.get() == "Use CLBlast" or runopts_var.get() == "Use CLBlast (Old CPU)":
|
if runopts_var.get() == "Use CLBlast" or runopts_var.get() == "Use CLBlast (Older CPU)":
|
||||||
args.useclblast = [[0,0], [1,0], [0,1], [1,1]][gpuchoiceidx]
|
args.useclblast = [[0,0], [1,0], [0,1], [1,1]][gpuchoiceidx]
|
||||||
if runopts_var.get() == "Use CLBlast (Old CPU)":
|
if runopts_var.get() == "Use CLBlast (Older CPU)":
|
||||||
args.noavx2 = True
|
args.noavx2 = True
|
||||||
if runopts_var.get() == "Use CuBLAS" or runopts_var.get() == "Use hipBLAS (ROCm)":
|
if runopts_var.get() == "Use CuBLAS" or runopts_var.get() == "Use hipBLAS (ROCm)":
|
||||||
if gpu_choice_var.get()=="All":
|
if gpu_choice_var.get()=="All":
|
||||||
|
@ -3664,7 +3664,7 @@ def show_gui():
|
||||||
args.usecpu = True
|
args.usecpu = True
|
||||||
if runopts_var.get()=="Use CPU (Old CPU)":
|
if runopts_var.get()=="Use CPU (Old CPU)":
|
||||||
args.noavx2 = True
|
args.noavx2 = True
|
||||||
if runopts_var.get()=="Failsafe Mode (Old CPU)":
|
if runopts_var.get()=="Failsafe Mode (Older CPU)":
|
||||||
args.noavx2 = True
|
args.noavx2 = True
|
||||||
args.usecpu = True
|
args.usecpu = True
|
||||||
args.nommap = True
|
args.nommap = True
|
||||||
|
@ -4517,6 +4517,9 @@ def main(launch_args,start_server=True):
|
||||||
global libname, args, friendlymodelname, friendlysdmodelname, fullsdmodelpath, mmprojpath, password, fullwhispermodelpath
|
global libname, args, friendlymodelname, friendlysdmodelname, fullsdmodelpath, mmprojpath, password, fullwhispermodelpath
|
||||||
|
|
||||||
args = launch_args
|
args = launch_args
|
||||||
|
if (args.version) and len(sys.argv) <= 2:
|
||||||
|
print(f"{KcppVersion}") # just print version and exit
|
||||||
|
return
|
||||||
if (args.model_param or args.model) and args.prompt and not args.benchmark and not (args.debugmode >= 1):
|
if (args.model_param or args.model) and args.prompt and not args.benchmark and not (args.debugmode >= 1):
|
||||||
suppress_stdout()
|
suppress_stdout()
|
||||||
|
|
||||||
|
@ -5164,6 +5167,7 @@ if __name__ == '__main__':
|
||||||
|
|
||||||
#more advanced params
|
#more advanced params
|
||||||
advparser = parser.add_argument_group('Advanced Commands')
|
advparser = parser.add_argument_group('Advanced Commands')
|
||||||
|
advparser.add_argument("--version", help="Prints version and exits.", action='store_true')
|
||||||
advparser.add_argument("--ropeconfig", help="If set, uses customized RoPE scaling from configured frequency scale and frequency base (e.g. --ropeconfig 0.25 10000). Otherwise, uses NTK-Aware scaling set automatically based on context size. For linear rope, simply set the freq-scale and ignore the freq-base",metavar=('[rope-freq-scale]', '[rope-freq-base]'), default=[0.0, 10000.0], type=float, nargs='+')
|
advparser.add_argument("--ropeconfig", help="If set, uses customized RoPE scaling from configured frequency scale and frequency base (e.g. --ropeconfig 0.25 10000). Otherwise, uses NTK-Aware scaling set automatically based on context size. For linear rope, simply set the freq-scale and ignore the freq-base",metavar=('[rope-freq-scale]', '[rope-freq-base]'), default=[0.0, 10000.0], type=float, nargs='+')
|
||||||
advparser.add_argument("--blasbatchsize", help="Sets the batch size used in BLAS processing (default 512). Setting it to -1 disables BLAS mode, but keeps other benefits like GPU offload.", type=int,choices=[-1,32,64,128,256,512,1024,2048], default=512)
|
advparser.add_argument("--blasbatchsize", help="Sets the batch size used in BLAS processing (default 512). Setting it to -1 disables BLAS mode, but keeps other benefits like GPU offload.", type=int,choices=[-1,32,64,128,256,512,1024,2048], default=512)
|
||||||
advparser.add_argument("--blasthreads", help="Use a different number of threads during BLAS if specified. Otherwise, has the same value as --threads",metavar=('[threads]'), type=int, default=0)
|
advparser.add_argument("--blasthreads", help="Use a different number of threads during BLAS if specified. Otherwise, has the same value as --threads",metavar=('[threads]'), type=int, default=0)
|
||||||
|
|
|
@ -15,7 +15,7 @@ VSVersionInfo(
|
||||||
StringTable(
|
StringTable(
|
||||||
u'040904b0',
|
u'040904b0',
|
||||||
[
|
[
|
||||||
StringStruct(u'CompanyName', u'Your Company Name'),
|
StringStruct(u'CompanyName', u'KoboldCpp'),
|
||||||
StringStruct(u'FileDescription', u'KoboldCpp'),
|
StringStruct(u'FileDescription', u'KoboldCpp'),
|
||||||
StringStruct(u'InternalName', u'KoboldCpp'),
|
StringStruct(u'InternalName', u'KoboldCpp'),
|
||||||
StringStruct(u'LegalCopyright', u'AGPLv3'),
|
StringStruct(u'LegalCopyright', u'AGPLv3'),
|
||||||
|
|
|
@ -15,7 +15,7 @@ VSVersionInfo(
|
||||||
StringTable(
|
StringTable(
|
||||||
u'040904b0',
|
u'040904b0',
|
||||||
[
|
[
|
||||||
StringStruct(u'CompanyName', u'Your Company Name'),
|
StringStruct(u'CompanyName', u'KoboldCpp'),
|
||||||
StringStruct(u'FileDescription', u'KoboldCpp'),
|
StringStruct(u'FileDescription', u'KoboldCpp'),
|
||||||
StringStruct(u'InternalName', u'KoboldCpp'),
|
StringStruct(u'InternalName', u'KoboldCpp'),
|
||||||
StringStruct(u'LegalCopyright', u'AGPLv3'),
|
StringStruct(u'LegalCopyright', u'AGPLv3'),
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue