mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 09:34:37 +00:00
added one more backend for clblast noavx2 and clblast failsafe
This commit is contained in:
parent
898856e183
commit
7a5499e77b
10 changed files with 57 additions and 382 deletions
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -110,6 +110,7 @@ tests/test-tokenizer-1-bpe
|
||||||
/koboldcpp_noavx2.so
|
/koboldcpp_noavx2.so
|
||||||
/koboldcpp_clblast.so
|
/koboldcpp_clblast.so
|
||||||
/koboldcpp_clblast_noavx2.so
|
/koboldcpp_clblast_noavx2.so
|
||||||
|
/koboldcpp_clblast_failsafe.so
|
||||||
/koboldcpp_cublas.so
|
/koboldcpp_cublas.so
|
||||||
/koboldcpp_vulkan.so
|
/koboldcpp_vulkan.so
|
||||||
/koboldcpp_vulkan_noavx2.so
|
/koboldcpp_vulkan_noavx2.so
|
||||||
|
@ -119,6 +120,7 @@ tests/test-tokenizer-1-bpe
|
||||||
/koboldcpp_clblast.dll
|
/koboldcpp_clblast.dll
|
||||||
/koboldcpp_clblast_noavx2.dll
|
/koboldcpp_clblast_noavx2.dll
|
||||||
/koboldcpp_vulkan_noavx2.dll
|
/koboldcpp_vulkan_noavx2.dll
|
||||||
|
/koboldcpp_clblast_failsafe.dll
|
||||||
/koboldcpp_cublas.dll
|
/koboldcpp_cublas.dll
|
||||||
/koboldcpp_vulkan.dll
|
/koboldcpp_vulkan.dll
|
||||||
/cublas64_11.dll
|
/cublas64_11.dll
|
||||||
|
|
45
Makefile
45
Makefile
|
@ -3,7 +3,7 @@
|
||||||
|
|
||||||
.PHONY: finishedmsg
|
.PHONY: finishedmsg
|
||||||
|
|
||||||
default: koboldcpp_default koboldcpp_failsafe koboldcpp_noavx2 koboldcpp_clblast koboldcpp_clblast_noavx2 koboldcpp_cublas koboldcpp_hipblas koboldcpp_vulkan koboldcpp_vulkan_noavx2 finishedmsg
|
default: koboldcpp_default koboldcpp_failsafe koboldcpp_noavx2 koboldcpp_clblast koboldcpp_clblast_noavx2 koboldcpp_clblast_failsafe koboldcpp_cublas koboldcpp_hipblas koboldcpp_vulkan koboldcpp_vulkan_noavx2 finishedmsg
|
||||||
tools: quantize_gpt2 quantize_gptj quantize_gguf quantize_neox quantize_mpt quantize_clip ttsmain whispermain sdmain gguf-split
|
tools: quantize_gpt2 quantize_gptj quantize_gguf quantize_neox quantize_mpt quantize_clip ttsmain whispermain sdmain gguf-split
|
||||||
|
|
||||||
ifndef UNAME_S
|
ifndef UNAME_S
|
||||||
|
@ -140,12 +140,8 @@ CCV := $(shell $(CC) --version | head -n 1)
|
||||||
CXXV := $(shell $(CXX) --version | head -n 1)
|
CXXV := $(shell $(CXX) --version | head -n 1)
|
||||||
|
|
||||||
# Architecture specific
|
# Architecture specific
|
||||||
# TODO: probably these flags need to be tweaked on some architectures
|
# For x86 based architectures
|
||||||
# feel free to update the Makefile for your architecture and send a pull request or issue
|
|
||||||
ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686 amd64))
|
ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686 amd64))
|
||||||
# Use all CPU extensions that are available:
|
|
||||||
# old library NEEDS mf16c to work. so we must build with it. new one doesnt
|
|
||||||
ifeq ($(OS),Windows_NT)
|
|
||||||
ifdef LLAMA_PORTABLE
|
ifdef LLAMA_PORTABLE
|
||||||
SIMPLECFLAGS += -mavx -msse3 -mssse3
|
SIMPLECFLAGS += -mavx -msse3 -mssse3
|
||||||
SIMPLERCFLAGS += -msse3 -mssse3
|
SIMPLERCFLAGS += -msse3 -mssse3
|
||||||
|
@ -153,24 +149,11 @@ ifdef LLAMA_NOAVX2
|
||||||
FULLCFLAGS += -msse3 -mssse3 -mavx
|
FULLCFLAGS += -msse3 -mssse3 -mavx
|
||||||
else
|
else
|
||||||
FULLCFLAGS += -mavx2 -msse3 -mssse3 -mfma -mf16c -mavx
|
FULLCFLAGS += -mavx2 -msse3 -mssse3 -mfma -mf16c -mavx
|
||||||
endif
|
endif # LLAMA_NOAVX2
|
||||||
else
|
else
|
||||||
CFLAGS += -march=native -mtune=native
|
CFLAGS += -march=native -mtune=native
|
||||||
endif
|
endif # LLAMA_PORTABLE
|
||||||
else
|
endif # if x86
|
||||||
ifdef LLAMA_PORTABLE
|
|
||||||
SIMPLECFLAGS += -mavx -msse3 -mssse3
|
|
||||||
SIMPLERCFLAGS += -msse3 -mssse3
|
|
||||||
ifdef LLAMA_NOAVX2
|
|
||||||
FULLCFLAGS += -msse3 -mssse3 -mavx
|
|
||||||
else
|
|
||||||
FULLCFLAGS += -mavx2 -msse3 -mssse3 -mfma -mf16c -mavx
|
|
||||||
endif
|
|
||||||
else
|
|
||||||
CFLAGS += -march=native -mtune=native
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifndef LLAMA_NO_ACCELERATE
|
ifndef LLAMA_NO_ACCELERATE
|
||||||
# Mac M1 - include Accelerate framework.
|
# Mac M1 - include Accelerate framework.
|
||||||
|
@ -436,6 +419,8 @@ ggml_v4_clblast.o: ggml/src/ggml.c ggml/include/ggml.h
|
||||||
ggml_v4_cublas.o: ggml/src/ggml.c ggml/include/ggml.h
|
ggml_v4_cublas.o: ggml/src/ggml.c ggml/include/ggml.h
|
||||||
$(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(CUBLAS_FLAGS) $(HIPFLAGS) -c $< -o $@
|
$(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(CUBLAS_FLAGS) $(HIPFLAGS) -c $< -o $@
|
||||||
ggml_v4_clblast_noavx2.o: ggml/src/ggml.c ggml/include/ggml.h
|
ggml_v4_clblast_noavx2.o: ggml/src/ggml.c ggml/include/ggml.h
|
||||||
|
$(CC) $(FASTCFLAGS) $(SIMPLECFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
|
||||||
|
ggml_v4_clblast_failsafe.o: ggml/src/ggml.c ggml/include/ggml.h
|
||||||
$(CC) $(FASTCFLAGS) $(SIMPLERCFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
|
$(CC) $(FASTCFLAGS) $(SIMPLERCFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
|
||||||
ggml_v4_vulkan.o: ggml/src/ggml.c ggml/include/ggml.h
|
ggml_v4_vulkan.o: ggml/src/ggml.c ggml/include/ggml.h
|
||||||
$(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(VULKAN_FLAGS) -c $< -o $@
|
$(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(VULKAN_FLAGS) -c $< -o $@
|
||||||
|
@ -452,6 +437,8 @@ ggml-cpu_v4_noavx2.o: ggml/src/ggml-cpu/ggml-cpu.c ggml/include/ggml-cpu.h
|
||||||
ggml-cpu_v4_clblast.o: ggml/src/ggml-cpu/ggml-cpu.c ggml/include/ggml-cpu.h
|
ggml-cpu_v4_clblast.o: ggml/src/ggml-cpu/ggml-cpu.c ggml/include/ggml-cpu.h
|
||||||
$(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
|
$(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
|
||||||
ggml-cpu_v4_clblast_noavx2.o: ggml/src/ggml-cpu/ggml-cpu.c ggml/include/ggml-cpu.h
|
ggml-cpu_v4_clblast_noavx2.o: ggml/src/ggml-cpu/ggml-cpu.c ggml/include/ggml-cpu.h
|
||||||
|
$(CC) $(FASTCFLAGS) $(SIMPLECFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
|
||||||
|
ggml-cpu_v4_clblast_failsafe.o: ggml/src/ggml-cpu/ggml-cpu.c ggml/include/ggml-cpu.h
|
||||||
$(CC) $(FASTCFLAGS) $(SIMPLERCFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
|
$(CC) $(FASTCFLAGS) $(SIMPLERCFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
|
||||||
|
|
||||||
#quants
|
#quants
|
||||||
|
@ -548,6 +535,8 @@ ggml_v3_clblast.o: otherarch/ggml_v3.c otherarch/ggml_v3.h
|
||||||
ggml_v3_cublas.o: otherarch/ggml_v3.c otherarch/ggml_v3.h
|
ggml_v3_cublas.o: otherarch/ggml_v3.c otherarch/ggml_v3.h
|
||||||
$(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(CUBLAS_FLAGS) $(HIPFLAGS) -c $< -o $@
|
$(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(CUBLAS_FLAGS) $(HIPFLAGS) -c $< -o $@
|
||||||
ggml_v3_clblast_noavx2.o: otherarch/ggml_v3.c otherarch/ggml_v3.h
|
ggml_v3_clblast_noavx2.o: otherarch/ggml_v3.c otherarch/ggml_v3.h
|
||||||
|
$(CC) $(FASTCFLAGS) $(SIMPLECFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
|
||||||
|
ggml_v3_clblast_failsafe.o: otherarch/ggml_v3.c otherarch/ggml_v3.h
|
||||||
$(CC) $(FASTCFLAGS) $(SIMPLERCFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
|
$(CC) $(FASTCFLAGS) $(SIMPLERCFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
|
||||||
|
|
||||||
#version 2 libs
|
#version 2 libs
|
||||||
|
@ -562,6 +551,8 @@ ggml_v2_clblast.o: otherarch/ggml_v2.c otherarch/ggml_v2.h
|
||||||
ggml_v2_cublas.o: otherarch/ggml_v2.c otherarch/ggml_v2.h
|
ggml_v2_cublas.o: otherarch/ggml_v2.c otherarch/ggml_v2.h
|
||||||
$(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(CUBLAS_FLAGS) $(HIPFLAGS) -c $< -o $@
|
$(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(CUBLAS_FLAGS) $(HIPFLAGS) -c $< -o $@
|
||||||
ggml_v2_clblast_noavx2.o: otherarch/ggml_v2.c otherarch/ggml_v2.h
|
ggml_v2_clblast_noavx2.o: otherarch/ggml_v2.c otherarch/ggml_v2.h
|
||||||
|
$(CC) $(FASTCFLAGS) $(SIMPLECFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
|
||||||
|
ggml_v2_clblast_failsafe.o: otherarch/ggml_v2.c otherarch/ggml_v2.h
|
||||||
$(CC) $(FASTCFLAGS) $(SIMPLERCFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
|
$(CC) $(FASTCFLAGS) $(SIMPLERCFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
|
||||||
|
|
||||||
#extreme old version compat
|
#extreme old version compat
|
||||||
|
@ -633,7 +624,7 @@ gpttype_adapter_vulkan_noavx2.o: $(GPTTYPE_ADAPTER)
|
||||||
$(CXX) $(CXXFLAGS) $(FAILSAFE_FLAGS) $(VULKAN_FLAGS) -c $< -o $@
|
$(CXX) $(CXXFLAGS) $(FAILSAFE_FLAGS) $(VULKAN_FLAGS) -c $< -o $@
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -vf *.o main sdmain whispermain quantize_gguf quantize_clip quantize_gpt2 quantize_gptj quantize_neox quantize_mpt vulkan-shaders-gen gguf-split gguf-split.exe vulkan-shaders-gen.exe main.exe sdmain.exe whispermain.exe quantize_clip.exe quantize_gguf.exe quantize_gptj.exe quantize_gpt2.exe quantize_neox.exe quantize_mpt.exe koboldcpp_default.dll koboldcpp_failsafe.dll koboldcpp_noavx2.dll koboldcpp_clblast.dll koboldcpp_clblast_noavx2.dll koboldcpp_cublas.dll koboldcpp_hipblas.dll koboldcpp_vulkan.dll koboldcpp_vulkan_noavx2.dll koboldcpp_default.so koboldcpp_failsafe.so koboldcpp_noavx2.so koboldcpp_clblast.so koboldcpp_clblast_noavx2.so koboldcpp_cublas.so koboldcpp_hipblas.so koboldcpp_vulkan.so koboldcpp_vulkan_noavx2.so
|
rm -vf *.o main sdmain whispermain quantize_gguf quantize_clip quantize_gpt2 quantize_gptj quantize_neox quantize_mpt vulkan-shaders-gen gguf-split gguf-split.exe vulkan-shaders-gen.exe main.exe sdmain.exe whispermain.exe quantize_clip.exe quantize_gguf.exe quantize_gptj.exe quantize_gpt2.exe quantize_neox.exe quantize_mpt.exe koboldcpp_default.dll koboldcpp_failsafe.dll koboldcpp_noavx2.dll koboldcpp_clblast.dll koboldcpp_clblast_noavx2.dll koboldcpp_clblast_failsafe.dll koboldcpp_cublas.dll koboldcpp_hipblas.dll koboldcpp_vulkan.dll koboldcpp_vulkan_noavx2.dll koboldcpp_default.so koboldcpp_failsafe.so koboldcpp_noavx2.so koboldcpp_clblast.so koboldcpp_clblast_noavx2.so koboldcpp_clblast_failsafe.so koboldcpp_cublas.so koboldcpp_hipblas.so koboldcpp_vulkan.so koboldcpp_vulkan_noavx2.so
|
||||||
rm -vrf ggml/src/ggml-cuda/*.o
|
rm -vrf ggml/src/ggml-cuda/*.o
|
||||||
rm -vrf ggml/src/ggml-cuda/template-instances/*.o
|
rm -vrf ggml/src/ggml-cuda/template-instances/*.o
|
||||||
|
|
||||||
|
@ -679,17 +670,23 @@ ifdef CLBLAST_BUILD
|
||||||
koboldcpp_clblast: ggml_v4_clblast.o ggml-cpu_v4_clblast.o ggml_v3_clblast.o ggml_v2_clblast.o ggml_v1.o expose.o gpttype_adapter_clblast.o ggml-opencl.o ggml_v3-opencl.o ggml_v2-opencl.o ggml_v2-opencl-legacy.o sdcpp_default.o whispercpp_default.o tts_default.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) $(OBJS)
|
koboldcpp_clblast: ggml_v4_clblast.o ggml-cpu_v4_clblast.o ggml_v3_clblast.o ggml_v2_clblast.o ggml_v1.o expose.o gpttype_adapter_clblast.o ggml-opencl.o ggml_v3-opencl.o ggml_v2-opencl.o ggml_v2-opencl-legacy.o sdcpp_default.o whispercpp_default.o tts_default.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) $(OBJS)
|
||||||
$(CLBLAST_BUILD)
|
$(CLBLAST_BUILD)
|
||||||
ifdef NOAVX2_BUILD
|
ifdef NOAVX2_BUILD
|
||||||
koboldcpp_clblast_noavx2: ggml_v4_clblast_noavx2.o ggml-cpu_v4_clblast_noavx2.o ggml_v3_clblast_noavx2.o ggml_v2_clblast_noavx2.o ggml_v1_failsafe.o expose.o gpttype_adapter_clblast_noavx2.o ggml-opencl.o ggml_v3-opencl.o ggml_v2-opencl.o ggml_v2-opencl-legacy.o sdcpp_default.o whispercpp_default.o tts_default.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_SIMPLER) $(OBJS)
|
koboldcpp_clblast_noavx2: ggml_v4_clblast_noavx2.o ggml-cpu_v4_clblast_noavx2.o ggml_v3_clblast_noavx2.o ggml_v2_clblast_noavx2.o ggml_v1_failsafe.o expose.o gpttype_adapter_clblast_noavx2.o ggml-opencl.o ggml_v3-opencl.o ggml_v2-opencl.o ggml_v2-opencl-legacy.o sdcpp_default.o whispercpp_default.o tts_default.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_SIMPLE) $(OBJS)
|
||||||
|
$(CLBLAST_BUILD)
|
||||||
|
koboldcpp_clblast_failsafe: ggml_v4_clblast_failsafe.o ggml-cpu_v4_clblast_failsafe.o ggml_v3_clblast_failsafe.o ggml_v2_clblast_failsafe.o ggml_v1_failsafe.o expose.o gpttype_adapter_clblast_noavx2.o ggml-opencl.o ggml_v3-opencl.o ggml_v2-opencl.o ggml_v2-opencl-legacy.o sdcpp_default.o whispercpp_default.o tts_default.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_SIMPLER) $(OBJS)
|
||||||
$(CLBLAST_BUILD)
|
$(CLBLAST_BUILD)
|
||||||
else
|
else
|
||||||
koboldcpp_clblast_noavx2:
|
koboldcpp_clblast_noavx2:
|
||||||
$(DONOTHING)
|
$(DONOTHING)
|
||||||
|
koboldcpp_clblast_failsafe:
|
||||||
|
$(DONOTHING)
|
||||||
endif
|
endif
|
||||||
else
|
else
|
||||||
koboldcpp_clblast:
|
koboldcpp_clblast:
|
||||||
$(DONOTHING)
|
$(DONOTHING)
|
||||||
koboldcpp_clblast_noavx2:
|
koboldcpp_clblast_noavx2:
|
||||||
$(DONOTHING)
|
$(DONOTHING)
|
||||||
|
koboldcpp_clblast_failsafe:
|
||||||
|
$(DONOTHING)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifdef CUBLAS_BUILD
|
ifdef CUBLAS_BUILD
|
||||||
|
|
41
koboldcpp.py
41
koboldcpp.py
|
@ -377,6 +377,7 @@ lib_failsafe = pick_existant_file("koboldcpp_failsafe.dll","koboldcpp_failsafe.s
|
||||||
lib_noavx2 = pick_existant_file("koboldcpp_noavx2.dll","koboldcpp_noavx2.so")
|
lib_noavx2 = pick_existant_file("koboldcpp_noavx2.dll","koboldcpp_noavx2.so")
|
||||||
lib_clblast = pick_existant_file("koboldcpp_clblast.dll","koboldcpp_clblast.so")
|
lib_clblast = pick_existant_file("koboldcpp_clblast.dll","koboldcpp_clblast.so")
|
||||||
lib_clblast_noavx2 = pick_existant_file("koboldcpp_clblast_noavx2.dll","koboldcpp_clblast_noavx2.so")
|
lib_clblast_noavx2 = pick_existant_file("koboldcpp_clblast_noavx2.dll","koboldcpp_clblast_noavx2.so")
|
||||||
|
lib_clblast_failsafe = pick_existant_file("koboldcpp_clblast_failsafe.dll","koboldcpp_clblast_failsafe.so")
|
||||||
lib_cublas = pick_existant_file("koboldcpp_cublas.dll","koboldcpp_cublas.so")
|
lib_cublas = pick_existant_file("koboldcpp_cublas.dll","koboldcpp_cublas.so")
|
||||||
lib_hipblas = pick_existant_file("koboldcpp_hipblas.dll","koboldcpp_hipblas.so")
|
lib_hipblas = pick_existant_file("koboldcpp_hipblas.dll","koboldcpp_hipblas.so")
|
||||||
lib_vulkan = pick_existant_file("koboldcpp_vulkan.dll","koboldcpp_vulkan.so")
|
lib_vulkan = pick_existant_file("koboldcpp_vulkan.dll","koboldcpp_vulkan.so")
|
||||||
|
@ -384,26 +385,30 @@ lib_vulkan_noavx2 = pick_existant_file("koboldcpp_vulkan_noavx2.dll","koboldcpp_
|
||||||
libname = ""
|
libname = ""
|
||||||
lib_option_pairs = [
|
lib_option_pairs = [
|
||||||
(lib_default, "Use CPU"),
|
(lib_default, "Use CPU"),
|
||||||
(lib_clblast, "Use CLBlast"),
|
|
||||||
(lib_cublas, "Use CuBLAS"),
|
(lib_cublas, "Use CuBLAS"),
|
||||||
(lib_hipblas, "Use hipBLAS (ROCm)"),
|
(lib_hipblas, "Use hipBLAS (ROCm)"),
|
||||||
(lib_vulkan, "Use Vulkan"),
|
(lib_vulkan, "Use Vulkan"),
|
||||||
|
(lib_clblast, "Use CLBlast"),
|
||||||
(lib_noavx2, "Use CPU (Old CPU)"),
|
(lib_noavx2, "Use CPU (Old CPU)"),
|
||||||
(lib_vulkan_noavx2, "Use Vulkan (Old CPU)"),
|
(lib_vulkan_noavx2, "Use Vulkan (Old CPU)"),
|
||||||
(lib_clblast_noavx2, "Use CLBlast (Older CPU)"),
|
(lib_clblast_noavx2, "Use CLBlast (Old CPU)"),
|
||||||
|
(lib_clblast_failsafe, "Use CLBlast (Older CPU)"),
|
||||||
(lib_failsafe, "Failsafe Mode (Older CPU)")]
|
(lib_failsafe, "Failsafe Mode (Older CPU)")]
|
||||||
default_option, clblast_option, cublas_option, hipblas_option, vulkan_option, noavx2_option, vulkan_noavx2_option, clblast_noavx2_option, failsafe_option = (opt if file_exists(lib) or (os.name == 'nt' and file_exists(opt + ".dll")) else None for lib, opt in lib_option_pairs)
|
default_option, cublas_option, hipblas_option, vulkan_option, clblast_option, noavx2_option, vulkan_noavx2_option, clblast_noavx2_option, clblast_failsafe_option, failsafe_option = (opt if file_exists(lib) or (os.name == 'nt' and file_exists(opt + ".dll")) else None for lib, opt in lib_option_pairs)
|
||||||
runopts = [opt for lib, opt in lib_option_pairs if file_exists(lib)]
|
runopts = [opt for lib, opt in lib_option_pairs if file_exists(lib)]
|
||||||
|
|
||||||
def init_library():
|
def init_library():
|
||||||
global handle, args, libname
|
global handle, args, libname
|
||||||
global lib_default,lib_failsafe,lib_noavx2,lib_clblast,lib_clblast_noavx2,lib_cublas,lib_hipblas,lib_vulkan,lib_vulkan_noavx2
|
global lib_default,lib_failsafe,lib_noavx2,lib_clblast,lib_clblast_noavx2,lib_clblast_failsafe,lib_cublas,lib_hipblas,lib_vulkan,lib_vulkan_noavx2
|
||||||
|
|
||||||
libname = lib_default
|
libname = lib_default
|
||||||
|
|
||||||
if args.noavx2:
|
if args.noavx2:
|
||||||
if args.useclblast and file_exists(lib_clblast_noavx2) and (os.name!='nt' or file_exists("clblast.dll")):
|
if args.useclblast and (os.name!='nt' or file_exists("clblast.dll")):
|
||||||
libname = lib_clblast_noavx2
|
if (args.failsafe) and file_exists(lib_clblast_failsafe):
|
||||||
|
libname = lib_clblast_failsafe
|
||||||
|
elif file_exists(lib_clblast_noavx2):
|
||||||
|
libname = lib_clblast_noavx2
|
||||||
elif (args.usevulkan is not None) and file_exists(lib_vulkan_noavx2):
|
elif (args.usevulkan is not None) and file_exists(lib_vulkan_noavx2):
|
||||||
libname = lib_vulkan_noavx2
|
libname = lib_vulkan_noavx2
|
||||||
elif (args.failsafe) and file_exists(lib_failsafe):
|
elif (args.failsafe) and file_exists(lib_failsafe):
|
||||||
|
@ -3425,7 +3430,7 @@ def show_gui():
|
||||||
# backend count label with the tooltip function
|
# backend count label with the tooltip function
|
||||||
nl = '\n'
|
nl = '\n'
|
||||||
tooltxt = "Number of backends you have built and available." + (f"\n\nMissing Backends: \n\n{nl.join(antirunopts)}" if len(runopts) < 8 else "")
|
tooltxt = "Number of backends you have built and available." + (f"\n\nMissing Backends: \n\n{nl.join(antirunopts)}" if len(runopts) < 8 else "")
|
||||||
num_backends_built = makelabel(parent, str(len(runopts)) + "/8", 5, 2,tooltxt)
|
num_backends_built = makelabel(parent, str(len(runopts)) + "/9", 5, 2,tooltxt)
|
||||||
num_backends_built.grid(row=1, column=1, padx=205, pady=0)
|
num_backends_built.grid(row=1, column=1, padx=205, pady=0)
|
||||||
num_backends_built.configure(text_color="#00ff00")
|
num_backends_built.configure(text_color="#00ff00")
|
||||||
|
|
||||||
|
@ -3446,7 +3451,7 @@ def show_gui():
|
||||||
predicted_gpu_layers = autoset_gpu_layers(int(contextsize_text[context_var.get()]),(sd_quant_var.get()==1),int(blasbatchsize_values[int(blas_size_var.get())]))
|
predicted_gpu_layers = autoset_gpu_layers(int(contextsize_text[context_var.get()]),(sd_quant_var.get()==1),int(blasbatchsize_values[int(blas_size_var.get())]))
|
||||||
max_gpu_layers = (f"/{modelfile_extracted_meta[0][0]+3}" if (modelfile_extracted_meta and modelfile_extracted_meta[0] and modelfile_extracted_meta[0][0]!=0) else "")
|
max_gpu_layers = (f"/{modelfile_extracted_meta[0][0]+3}" if (modelfile_extracted_meta and modelfile_extracted_meta[0] and modelfile_extracted_meta[0][0]!=0) else "")
|
||||||
index = runopts_var.get()
|
index = runopts_var.get()
|
||||||
gpu_be = (index == "Use Vulkan" or index == "Use Vulkan (Old CPU)" or index == "Use CLBlast" or index == "Use CLBlast (Older CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)")
|
gpu_be = (index == "Use Vulkan" or index == "Use Vulkan (Old CPU)" or index == "Use CLBlast" or index == "Use CLBlast (Old CPU)" or index == "Use CLBlast (Older CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)")
|
||||||
layercounter_label.grid(row=6, column=1, padx=75, sticky="W")
|
layercounter_label.grid(row=6, column=1, padx=75, sticky="W")
|
||||||
quick_layercounter_label.grid(row=6, column=1, padx=75, sticky="W")
|
quick_layercounter_label.grid(row=6, column=1, padx=75, sticky="W")
|
||||||
if sys.platform=="darwin" and gpulayers_var.get()=="-1":
|
if sys.platform=="darwin" and gpulayers_var.get()=="-1":
|
||||||
|
@ -3477,7 +3482,7 @@ def show_gui():
|
||||||
if v == "Use Vulkan" or v == "Use Vulkan (Old CPU)":
|
if v == "Use Vulkan" or v == "Use Vulkan (Old CPU)":
|
||||||
quick_gpuname_label.configure(text=VKDevicesNames[s])
|
quick_gpuname_label.configure(text=VKDevicesNames[s])
|
||||||
gpuname_label.configure(text=VKDevicesNames[s])
|
gpuname_label.configure(text=VKDevicesNames[s])
|
||||||
elif v == "Use CLBlast" or v == "Use CLBlast (Older CPU)":
|
elif v == "Use CLBlast" or v == "Use CLBlast (Old CPU)" or v == "Use CLBlast (Older CPU)":
|
||||||
quick_gpuname_label.configure(text=CLDevicesNames[s])
|
quick_gpuname_label.configure(text=CLDevicesNames[s])
|
||||||
gpuname_label.configure(text=CLDevicesNames[s])
|
gpuname_label.configure(text=CLDevicesNames[s])
|
||||||
else:
|
else:
|
||||||
|
@ -3534,12 +3539,12 @@ def show_gui():
|
||||||
global runmode_untouched
|
global runmode_untouched
|
||||||
runmode_untouched = False
|
runmode_untouched = False
|
||||||
index = runopts_var.get()
|
index = runopts_var.get()
|
||||||
if index == "Use Vulkan" or index == "Use Vulkan (Old CPU)" or index == "Use CLBlast" or index == "Use CLBlast (Older CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)":
|
if index == "Use Vulkan" or index == "Use Vulkan (Old CPU)" or index == "Use CLBlast" or index == "Use CLBlast (Old CPU)" or index == "Use CLBlast (Older CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)":
|
||||||
quick_gpuname_label.grid(row=3, column=1, padx=75, sticky="W")
|
quick_gpuname_label.grid(row=3, column=1, padx=75, sticky="W")
|
||||||
gpuname_label.grid(row=3, column=1, padx=75, sticky="W")
|
gpuname_label.grid(row=3, column=1, padx=75, sticky="W")
|
||||||
gpu_selector_label.grid(row=3, column=0, padx = 8, pady=1, stick="nw")
|
gpu_selector_label.grid(row=3, column=0, padx = 8, pady=1, stick="nw")
|
||||||
quick_gpu_selector_label.grid(row=3, column=0, padx = 8, pady=1, stick="nw")
|
quick_gpu_selector_label.grid(row=3, column=0, padx = 8, pady=1, stick="nw")
|
||||||
if index == "Use CLBlast" or index == "Use CLBlast (Older CPU)":
|
if index == "Use CLBlast" or index == "Use CLBlast (Old CPU)" or index == "Use CLBlast (Older CPU)":
|
||||||
gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw")
|
gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw")
|
||||||
quick_gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw")
|
quick_gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw")
|
||||||
CUDA_gpu_selector_box.grid_remove()
|
CUDA_gpu_selector_box.grid_remove()
|
||||||
|
@ -3583,7 +3588,7 @@ def show_gui():
|
||||||
else:
|
else:
|
||||||
quick_use_flashattn.grid(row=22, column=1, padx=8, pady=1, stick="nw")
|
quick_use_flashattn.grid(row=22, column=1, padx=8, pady=1, stick="nw")
|
||||||
|
|
||||||
if index == "Use Vulkan" or index == "Use Vulkan (Old CPU)" or index == "Use CLBlast" or index == "Use CLBlast (Older CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)":
|
if index == "Use Vulkan" or index == "Use Vulkan (Old CPU)" or index == "Use CLBlast" or index == "Use CLBlast (Old CPU)" or index == "Use CLBlast (Older CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)":
|
||||||
gpu_layers_label.grid(row=6, column=0, padx = 8, pady=1, stick="nw")
|
gpu_layers_label.grid(row=6, column=0, padx = 8, pady=1, stick="nw")
|
||||||
gpu_layers_entry.grid(row=6, column=1, padx=8, pady=1, stick="nw")
|
gpu_layers_entry.grid(row=6, column=1, padx=8, pady=1, stick="nw")
|
||||||
quick_gpu_layers_label.grid(row=6, column=0, padx = 8, pady=1, stick="nw")
|
quick_gpu_layers_label.grid(row=6, column=0, padx = 8, pady=1, stick="nw")
|
||||||
|
@ -3954,10 +3959,13 @@ def show_gui():
|
||||||
args.noavx2 = False
|
args.noavx2 = False
|
||||||
if gpu_choice_var.get()!="All":
|
if gpu_choice_var.get()!="All":
|
||||||
gpuchoiceidx = int(gpu_choice_var.get())-1
|
gpuchoiceidx = int(gpu_choice_var.get())-1
|
||||||
if runopts_var.get() == "Use CLBlast" or runopts_var.get() == "Use CLBlast (Older CPU)":
|
if runopts_var.get() == "Use CLBlast" or runopts_var.get() == "Use CLBlast (Old CPU)" or runopts_var.get() == "Use CLBlast (Older CPU)":
|
||||||
args.useclblast = [[0,0], [1,0], [0,1], [1,1]][gpuchoiceidx]
|
args.useclblast = [[0,0], [1,0], [0,1], [1,1]][gpuchoiceidx]
|
||||||
if runopts_var.get() == "Use CLBlast (Older CPU)":
|
if runopts_var.get() == "Use CLBlast (Old CPU)":
|
||||||
args.noavx2 = True
|
args.noavx2 = True
|
||||||
|
elif runopts_var.get() == "Use CLBlast (Older CPU)":
|
||||||
|
args.noavx2 = True
|
||||||
|
args.failsafe = True
|
||||||
if runopts_var.get() == "Use CuBLAS" or runopts_var.get() == "Use hipBLAS (ROCm)":
|
if runopts_var.get() == "Use CuBLAS" or runopts_var.get() == "Use hipBLAS (ROCm)":
|
||||||
if gpu_choice_var.get()=="All":
|
if gpu_choice_var.get()=="All":
|
||||||
args.usecublas = ["lowvram"] if lowvram_var.get() == 1 else ["normal"]
|
args.usecublas = ["lowvram"] if lowvram_var.get() == 1 else ["normal"]
|
||||||
|
@ -4926,6 +4934,9 @@ def main(launch_args,start_server=True):
|
||||||
if args.quantkv and args.quantkv>0 and not args.flashattention:
|
if args.quantkv and args.quantkv>0 and not args.flashattention:
|
||||||
exit_with_error(1, "Error: Using --quantkv requires --flashattention")
|
exit_with_error(1, "Error: Using --quantkv requires --flashattention")
|
||||||
|
|
||||||
|
if args.failsafe: #failsafe implies noavx2
|
||||||
|
args.noavx2 = True
|
||||||
|
|
||||||
if not args.model_param:
|
if not args.model_param:
|
||||||
args.model_param = args.model
|
args.model_param = args.model
|
||||||
|
|
||||||
|
@ -5596,7 +5607,7 @@ if __name__ == '__main__':
|
||||||
compatgroup3.add_argument("--usemmap", help="If set, uses mmap to load model. This model will not be unloadable.", action='store_true')
|
compatgroup3.add_argument("--usemmap", help="If set, uses mmap to load model. This model will not be unloadable.", action='store_true')
|
||||||
advparser.add_argument("--usemlock", help="Enables mlock, preventing the RAM used to load the model from being paged out. Not usually recommended.", action='store_true')
|
advparser.add_argument("--usemlock", help="Enables mlock, preventing the RAM used to load the model from being paged out. Not usually recommended.", action='store_true')
|
||||||
advparser.add_argument("--noavx2", help="Do not use AVX2 instructions, a slower compatibility mode for older devices.", action='store_true')
|
advparser.add_argument("--noavx2", help="Do not use AVX2 instructions, a slower compatibility mode for older devices.", action='store_true')
|
||||||
advparser.add_argument("--failsafe", help="Use failsafe mode, extremely slow CPU only compatibility mode that should work on all devices.", action='store_true')
|
advparser.add_argument("--failsafe", help="Use failsafe mode, extremely slow CPU only compatibility mode that should work on all devices. Can be combined with useclblast if your device supports OpenCL.", action='store_true')
|
||||||
advparser.add_argument("--debugmode", help="Shows additional debug info in the terminal.", nargs='?', const=1, type=int, default=0)
|
advparser.add_argument("--debugmode", help="Shows additional debug info in the terminal.", nargs='?', const=1, type=int, default=0)
|
||||||
advparser.add_argument("--onready", help="An optional shell command to execute after the model has been loaded.", metavar=('[shell command]'), type=str, default="",nargs=1)
|
advparser.add_argument("--onready", help="An optional shell command to execute after the model has been loaded.", metavar=('[shell command]'), type=str, default="",nargs=1)
|
||||||
advparser.add_argument("--benchmark", help="Do not start server, instead run benchmarks. If filename is provided, appends results to provided file.", metavar=('[filename]'), nargs='?', const="stdout", type=str, default=None)
|
advparser.add_argument("--benchmark", help="Do not start server, instead run benchmarks. If filename is provided, appends results to provided file.", metavar=('[filename]'), nargs='?', const="stdout", type=str, default=None)
|
||||||
|
|
|
@ -38,10 +38,10 @@ if [[ $1 == "rebuild" ]]; then
|
||||||
elif [[ $1 == "dist" ]]; then
|
elif [[ $1 == "dist" ]]; then
|
||||||
bin/micromamba remove --no-rc -r conda -p conda/envs/linux --force ocl-icd -y
|
bin/micromamba remove --no-rc -r conda -p conda/envs/linux --force ocl-icd -y
|
||||||
if [ -n "$NOAVX2" ]; then
|
if [ -n "$NOAVX2" ]; then
|
||||||
bin/micromamba run -r conda -p conda/envs/linux pyinstaller --noconfirm --onefile --collect-all customtkinter --collect-all psutil --add-data './koboldcpp_default.so:.' --add-data './koboldcpp_cublas.so:.' --add-data './koboldcpp_vulkan.so:.' --add-data './koboldcpp_clblast.so:.' --add-data './koboldcpp_failsafe.so:.' --add-data './koboldcpp_noavx2.so:.' --add-data './koboldcpp_clblast_noavx2.so:.' --add-data './koboldcpp_vulkan_noavx2.so:.' --add-data './kcpp_adapters:./kcpp_adapters' --add-data './koboldcpp.py:.' --add-data './klite.embd:.' --add-data './kcpp_docs.embd:.' --add-data './kcpp_sdui.embd:.' --add-data './taesd.embd:.' --add-data './taesd_xl.embd:.' --add-data './taesd_f.embd:.' --add-data './taesd_3.embd:.' --add-data './rwkv_vocab.embd:.' --add-data './rwkv_world_vocab.embd:.' --version-file './version.txt' --clean --console koboldcpp.py -n "koboldcpp-linux-x64$KCPP_CUDAAPPEND"
|
bin/micromamba run -r conda -p conda/envs/linux pyinstaller --noconfirm --onefile --collect-all customtkinter --collect-all psutil --add-data './koboldcpp_default.so:.' --add-data './koboldcpp_cublas.so:.' --add-data './koboldcpp_vulkan.so:.' --add-data './koboldcpp_clblast.so:.' --add-data './koboldcpp_failsafe.so:.' --add-data './koboldcpp_noavx2.so:.' --add-data './koboldcpp_clblast_noavx2.so:.' --add-data './koboldcpp_clblast_failsafe.so:.' --add-data './koboldcpp_vulkan_noavx2.so:.' --add-data './kcpp_adapters:./kcpp_adapters' --add-data './koboldcpp.py:.' --add-data './klite.embd:.' --add-data './kcpp_docs.embd:.' --add-data './kcpp_sdui.embd:.' --add-data './taesd.embd:.' --add-data './taesd_xl.embd:.' --add-data './taesd_f.embd:.' --add-data './taesd_3.embd:.' --add-data './rwkv_vocab.embd:.' --add-data './rwkv_world_vocab.embd:.' --version-file './version.txt' --clean --console koboldcpp.py -n "koboldcpp-linux-x64$KCPP_CUDAAPPEND"
|
||||||
else
|
else
|
||||||
bin/micromamba run -r conda -p conda/envs/linux pyinstaller --noconfirm --onefile --collect-all customtkinter --collect-all psutil --add-data './koboldcpp_default.so:.' --add-data './koboldcpp_cublas.so:.' --add-data './koboldcpp_vulkan.so:.' --add-data './koboldcpp_clblast.so:.' --add-data './koboldcpp_failsafe.so:.' --add-data './koboldcpp_noavx2.so:.' --add-data './koboldcpp_clblast_noavx2.so:.' --add-data './koboldcpp_vulkan_noavx2.so:.' --add-data './kcpp_adapters:./kcpp_adapters' --add-data './koboldcpp.py:.' --add-data './klite.embd:.' --add-data './kcpp_docs.embd:.' --add-data './kcpp_sdui.embd:.' --add-data './taesd.embd:.' --add-data './taesd_xl.embd:.' --add-data './taesd_f.embd:.' --add-data './taesd_3.embd:.' --add-data './rwkv_vocab.embd:.' --add-data './rwkv_world_vocab.embd:.' --version-file './version.txt' --clean --console koboldcpp.py -n "koboldcpp-linux-x64$KCPP_CUDAAPPEND"
|
bin/micromamba run -r conda -p conda/envs/linux pyinstaller --noconfirm --onefile --collect-all customtkinter --collect-all psutil --add-data './koboldcpp_default.so:.' --add-data './koboldcpp_cublas.so:.' --add-data './koboldcpp_vulkan.so:.' --add-data './koboldcpp_clblast.so:.' --add-data './koboldcpp_failsafe.so:.' --add-data './koboldcpp_noavx2.so:.' --add-data './koboldcpp_clblast_noavx2.so:.' --add-data './koboldcpp_clblast_failsafe.so:.' --add-data './koboldcpp_vulkan_noavx2.so:.' --add-data './kcpp_adapters:./kcpp_adapters' --add-data './koboldcpp.py:.' --add-data './klite.embd:.' --add-data './kcpp_docs.embd:.' --add-data './kcpp_sdui.embd:.' --add-data './taesd.embd:.' --add-data './taesd_xl.embd:.' --add-data './taesd_f.embd:.' --add-data './taesd_3.embd:.' --add-data './rwkv_vocab.embd:.' --add-data './rwkv_world_vocab.embd:.' --version-file './version.txt' --clean --console koboldcpp.py -n "koboldcpp-linux-x64$KCPP_CUDAAPPEND"
|
||||||
bin/micromamba run -r conda -p conda/envs/linux pyinstaller --noconfirm --onefile --collect-all customtkinter --collect-all psutil --add-data './koboldcpp_default.so:.' --add-data './koboldcpp_vulkan.so:.' --add-data './koboldcpp_clblast.so:.' --add-data './koboldcpp_failsafe.so:.' --add-data './koboldcpp_noavx2.so:.' --add-data './koboldcpp_clblast_noavx2.so:.' --add-data './koboldcpp_vulkan_noavx2.so:.' --add-data './kcpp_adapters:./kcpp_adapters' --add-data './koboldcpp.py:.' --add-data './klite.embd:.' --add-data './kcpp_docs.embd:.' --add-data './kcpp_sdui.embd:.' --add-data './taesd.embd:.' --add-data './taesd_xl.embd:.' --add-data './taesd_f.embd:.' --add-data './taesd_3.embd:.' --add-data './rwkv_vocab.embd:.' --add-data './rwkv_world_vocab.embd:.' --version-file './version.txt' --clean --console koboldcpp.py -n "koboldcpp-linux-x64-nocuda$KCPP_APPEND"
|
bin/micromamba run -r conda -p conda/envs/linux pyinstaller --noconfirm --onefile --collect-all customtkinter --collect-all psutil --add-data './koboldcpp_default.so:.' --add-data './koboldcpp_vulkan.so:.' --add-data './koboldcpp_clblast.so:.' --add-data './koboldcpp_failsafe.so:.' --add-data './koboldcpp_noavx2.so:.' --add-data './koboldcpp_clblast_noavx2.so:.' --add-data './koboldcpp_clblast_failsafe.so:.' --add-data './koboldcpp_vulkan_noavx2.so:.' --add-data './kcpp_adapters:./kcpp_adapters' --add-data './koboldcpp.py:.' --add-data './klite.embd:.' --add-data './kcpp_docs.embd:.' --add-data './kcpp_sdui.embd:.' --add-data './taesd.embd:.' --add-data './taesd_xl.embd:.' --add-data './taesd_f.embd:.' --add-data './taesd_3.embd:.' --add-data './rwkv_vocab.embd:.' --add-data './rwkv_world_vocab.embd:.' --version-file './version.txt' --clean --console koboldcpp.py -n "koboldcpp-linux-x64-nocuda$KCPP_APPEND"
|
||||||
fi
|
fi
|
||||||
bin/micromamba install --no-rc -r conda -p conda/envs/linux ocl-icd -c conda-forge -y
|
bin/micromamba install --no-rc -r conda -p conda/envs/linux ocl-icd -c conda-forge -y
|
||||||
else
|
else
|
||||||
|
|
|
@ -1,2 +1,2 @@
|
||||||
call create_ver_file.bat
|
call create_ver_file.bat
|
||||||
PyInstaller --noconfirm --onefile --clean --console --collect-all customtkinter --collect-all psutil --icon "./niko.ico" --add-data "./winclinfo.exe;." --add-data "./OpenCL.dll;." --add-data "./kcpp_adapters;./kcpp_adapters" --add-data "./koboldcpp.py;." --add-data "./klite.embd;." --add-data "./kcpp_docs.embd;." --add-data "./kcpp_sdui.embd;." --add-data "./taesd.embd;." --add-data "./taesd_xl.embd;." --add-data "./taesd_f.embd;." --add-data "./taesd_3.embd;." --add-data "./koboldcpp_default.dll;." --add-data "./koboldcpp_failsafe.dll;." --add-data "./koboldcpp_noavx2.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./koboldcpp_clblast_noavx2.dll;." --add-data "./koboldcpp_vulkan_noavx2.dll;." --add-data "./clblast.dll;." --add-data "./koboldcpp_vulkan.dll;." --add-data "./vulkan-1.dll;." --add-data "./rwkv_vocab.embd;." --add-data "./rwkv_world_vocab.embd;." --version-file "./version.txt" "./koboldcpp.py" -n "koboldcpp_nocuda.exe"
|
PyInstaller --noconfirm --onefile --clean --console --collect-all customtkinter --collect-all psutil --icon "./niko.ico" --add-data "./winclinfo.exe;." --add-data "./OpenCL.dll;." --add-data "./kcpp_adapters;./kcpp_adapters" --add-data "./koboldcpp.py;." --add-data "./klite.embd;." --add-data "./kcpp_docs.embd;." --add-data "./kcpp_sdui.embd;." --add-data "./taesd.embd;." --add-data "./taesd_xl.embd;." --add-data "./taesd_f.embd;." --add-data "./taesd_3.embd;." --add-data "./koboldcpp_default.dll;." --add-data "./koboldcpp_failsafe.dll;." --add-data "./koboldcpp_noavx2.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./koboldcpp_clblast_noavx2.dll;." --add-data "./koboldcpp_clblast_failsafe.dll;." --add-data "./koboldcpp_vulkan_noavx2.dll;." --add-data "./clblast.dll;." --add-data "./koboldcpp_vulkan.dll;." --add-data "./vulkan-1.dll;." --add-data "./rwkv_vocab.embd;." --add-data "./rwkv_world_vocab.embd;." --version-file "./version.txt" "./koboldcpp.py" -n "koboldcpp_nocuda.exe"
|
|
@ -16,6 +16,7 @@ pyinstaller --noconfirm --onefile --clean --console --collect-all customtkinter
|
||||||
--add-data "./koboldcpp_noavx2.so:." \
|
--add-data "./koboldcpp_noavx2.so:." \
|
||||||
--add-data "./koboldcpp_clblast.so:." \
|
--add-data "./koboldcpp_clblast.so:." \
|
||||||
--add-data "./koboldcpp_clblast_noavx2.so:." \
|
--add-data "./koboldcpp_clblast_noavx2.so:." \
|
||||||
|
--add-data "./koboldcpp_clblast_failsafe.so:." \
|
||||||
--add-data "./koboldcpp_vulkan_noavx2.so:." \
|
--add-data "./koboldcpp_vulkan_noavx2.so:." \
|
||||||
--add-data "./koboldcpp_vulkan.so:." \
|
--add-data "./koboldcpp_vulkan.so:." \
|
||||||
--add-data "./rwkv_vocab.embd:." \
|
--add-data "./rwkv_vocab.embd:." \
|
||||||
|
|
|
@ -1,2 +1,2 @@
|
||||||
call create_ver_file.bat
|
call create_ver_file.bat
|
||||||
PyInstaller --noconfirm --onefile --clean --console --collect-all customtkinter --collect-all psutil --icon "./nikogreen.ico" --add-data "./winclinfo.exe;." --add-data "./OpenCL.dll;." --add-data "./kcpp_adapters;./kcpp_adapters" --add-data "./koboldcpp.py;." --add-data "./klite.embd;." --add-data "./kcpp_docs.embd;." --add-data "./kcpp_sdui.embd;." --add-data "./taesd.embd;." --add-data "./taesd_xl.embd;." --add-data "./taesd_f.embd;." --add-data "./taesd_3.embd;." --add-data "./koboldcpp_default.dll;." --add-data "./koboldcpp_failsafe.dll;." --add-data "./koboldcpp_noavx2.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./koboldcpp_clblast_noavx2.dll;." --add-data "./koboldcpp_vulkan_noavx2.dll;." --add-data "./clblast.dll;." --add-data "./koboldcpp_vulkan.dll;." --add-data "./vulkan-1.dll;." --add-data "./koboldcpp_cublas.dll;." --add-data "./cublas64_11.dll;." --add-data "./cublasLt64_11.dll;." --add-data "./cudart64_110.dll;." --add-data "./msvcp140.dll;." --add-data "./msvcp140_codecvt_ids.dll;." --add-data "./vcruntime140.dll;." --add-data "./vcruntime140_1.dll;." --add-data "./rwkv_vocab.embd;." --add-data "./rwkv_world_vocab.embd;." --version-file "./version.txt" "./koboldcpp.py" -n "koboldcpp.exe"
|
PyInstaller --noconfirm --onefile --clean --console --collect-all customtkinter --collect-all psutil --icon "./nikogreen.ico" --add-data "./winclinfo.exe;." --add-data "./OpenCL.dll;." --add-data "./kcpp_adapters;./kcpp_adapters" --add-data "./koboldcpp.py;." --add-data "./klite.embd;." --add-data "./kcpp_docs.embd;." --add-data "./kcpp_sdui.embd;." --add-data "./taesd.embd;." --add-data "./taesd_xl.embd;." --add-data "./taesd_f.embd;." --add-data "./taesd_3.embd;." --add-data "./koboldcpp_default.dll;." --add-data "./koboldcpp_failsafe.dll;." --add-data "./koboldcpp_noavx2.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./koboldcpp_clblast_noavx2.dll;." --add-data "./koboldcpp_clblast_failsafe.dll;." --add-data "./koboldcpp_vulkan_noavx2.dll;." --add-data "./clblast.dll;." --add-data "./koboldcpp_vulkan.dll;." --add-data "./vulkan-1.dll;." --add-data "./koboldcpp_cublas.dll;." --add-data "./cublas64_11.dll;." --add-data "./cublasLt64_11.dll;." --add-data "./cudart64_110.dll;." --add-data "./msvcp140.dll;." --add-data "./msvcp140_codecvt_ids.dll;." --add-data "./vcruntime140.dll;." --add-data "./vcruntime140_1.dll;." --add-data "./rwkv_vocab.embd;." --add-data "./rwkv_world_vocab.embd;." --version-file "./version.txt" "./koboldcpp.py" -n "koboldcpp.exe"
|
|
@ -1,2 +1,2 @@
|
||||||
call create_ver_file.bat
|
call create_ver_file.bat
|
||||||
PyInstaller --noconfirm --onefile --clean --console --collect-all customtkinter --collect-all psutil --icon "./nikogreen.ico" --add-data "./winclinfo.exe;." --add-data "./OpenCL.dll;." --add-data "./kcpp_adapters;./kcpp_adapters" --add-data "./koboldcpp.py;." --add-data "./klite.embd;." --add-data "./kcpp_docs.embd;." --add-data "./kcpp_sdui.embd;." --add-data "./taesd.embd;." --add-data "./taesd_xl.embd;." --add-data "./taesd_f.embd;." --add-data "./taesd_3.embd;." --add-data "./koboldcpp_default.dll;." --add-data "./koboldcpp_failsafe.dll;." --add-data "./koboldcpp_noavx2.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./koboldcpp_clblast_noavx2.dll;." --add-data "./koboldcpp_vulkan_noavx2.dll;." --add-data "./clblast.dll;." --add-data "./koboldcpp_vulkan.dll;." --add-data "./vulkan-1.dll;." --add-data "./koboldcpp_cublas.dll;." --add-data "./cublas64_12.dll;." --add-data "./cublasLt64_12.dll;." --add-data "./cudart64_12.dll;." --add-data "./msvcp140.dll;." --add-data "./msvcp140_codecvt_ids.dll;." --add-data "./vcruntime140.dll;." --add-data "./vcruntime140_1.dll;." --add-data "./rwkv_vocab.embd;." --add-data "./rwkv_world_vocab.embd;." --version-file "./version.txt" "./koboldcpp.py" -n "koboldcpp_cu12.exe"
|
PyInstaller --noconfirm --onefile --clean --console --collect-all customtkinter --collect-all psutil --icon "./nikogreen.ico" --add-data "./winclinfo.exe;." --add-data "./OpenCL.dll;." --add-data "./kcpp_adapters;./kcpp_adapters" --add-data "./koboldcpp.py;." --add-data "./klite.embd;." --add-data "./kcpp_docs.embd;." --add-data "./kcpp_sdui.embd;." --add-data "./taesd.embd;." --add-data "./taesd_xl.embd;." --add-data "./taesd_f.embd;." --add-data "./taesd_3.embd;." --add-data "./koboldcpp_default.dll;." --add-data "./koboldcpp_failsafe.dll;." --add-data "./koboldcpp_noavx2.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./koboldcpp_clblast_noavx2.dll;." --add-data "./koboldcpp_clblast_failsafe.dll;." --add-data "./koboldcpp_vulkan_noavx2.dll;." --add-data "./clblast.dll;." --add-data "./koboldcpp_vulkan.dll;." --add-data "./vulkan-1.dll;." --add-data "./koboldcpp_cublas.dll;." --add-data "./cublas64_12.dll;." --add-data "./cublasLt64_12.dll;." --add-data "./cudart64_12.dll;." --add-data "./msvcp140.dll;." --add-data "./msvcp140_codecvt_ids.dll;." --add-data "./vcruntime140.dll;." --add-data "./vcruntime140_1.dll;." --add-data "./rwkv_vocab.embd;." --add-data "./rwkv_world_vocab.embd;." --version-file "./version.txt" "./koboldcpp.py" -n "koboldcpp_cu12.exe"
|
|
@ -1,2 +1,2 @@
|
||||||
call create_ver_file.bat
|
call create_ver_file.bat
|
||||||
PyInstaller --noconfirm --onefile --clean --console --collect-all customtkinter --collect-all psutil --icon "./nikogreen.ico" --add-data "./winclinfo.exe;." --add-data "./OpenCL.dll;." --add-data "./kcpp_adapters;./kcpp_adapters" --add-data "./koboldcpp.py;." --add-data "./klite.embd;." --add-data "./kcpp_docs.embd;." --add-data "./kcpp_sdui.embd;." --add-data "./taesd.embd;." --add-data "./taesd_xl.embd;." --add-data "./taesd_f.embd;." --add-data "./taesd_3.embd;." --add-data "./koboldcpp_default.dll;." --add-data "./koboldcpp_failsafe.dll;." --add-data "./koboldcpp_noavx2.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./koboldcpp_clblast_noavx2.dll;." --add-data "./koboldcpp_vulkan_noavx2.dll;." --add-data "./clblast.dll;." --add-data "./koboldcpp_vulkan.dll;." --add-data "./vulkan-1.dll;." --add-data "./koboldcpp_cublas.dll;." --add-data "./cublas64_11.dll;." --add-data "./cublasLt64_11.dll;." --add-data "./cudart64_110.dll;." --add-data "./msvcp140.dll;." --add-data "./msvcp140_codecvt_ids.dll;." --add-data "./vcruntime140.dll;." --add-data "./vcruntime140_1.dll;." --add-data "./rwkv_vocab.embd;." --add-data "./rwkv_world_vocab.embd;." --version-file "./version.txt" "./koboldcpp.py" -n "koboldcpp_oldcpu.exe"
|
PyInstaller --noconfirm --onefile --clean --console --collect-all customtkinter --collect-all psutil --icon "./nikogreen.ico" --add-data "./winclinfo.exe;." --add-data "./OpenCL.dll;." --add-data "./kcpp_adapters;./kcpp_adapters" --add-data "./koboldcpp.py;." --add-data "./klite.embd;." --add-data "./kcpp_docs.embd;." --add-data "./kcpp_sdui.embd;." --add-data "./taesd.embd;." --add-data "./taesd_xl.embd;." --add-data "./taesd_f.embd;." --add-data "./taesd_3.embd;." --add-data "./koboldcpp_default.dll;." --add-data "./koboldcpp_failsafe.dll;." --add-data "./koboldcpp_noavx2.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./koboldcpp_clblast_noavx2.dll;." --add-data "./koboldcpp_clblast_failsafe.dll;." --add-data "./koboldcpp_vulkan_noavx2.dll;." --add-data "./clblast.dll;." --add-data "./koboldcpp_vulkan.dll;." --add-data "./vulkan-1.dll;." --add-data "./koboldcpp_cublas.dll;." --add-data "./cublas64_11.dll;." --add-data "./cublasLt64_11.dll;." --add-data "./cudart64_110.dll;." --add-data "./msvcp140.dll;." --add-data "./msvcp140_codecvt_ids.dll;." --add-data "./vcruntime140.dll;." --add-data "./vcruntime140_1.dll;." --add-data "./rwkv_vocab.embd;." --add-data "./rwkv_world_vocab.embd;." --version-file "./version.txt" "./koboldcpp.py" -n "koboldcpp_oldcpu.exe"
|
|
@ -1,336 +0,0 @@
|
||||||
## KoboldCpp based GGML Backend by Concedo
|
|
||||||
## For use as a custom backend in KoboldAI United
|
|
||||||
## Not intended for general use.
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import time, json
|
|
||||||
import torch
|
|
||||||
import requests
|
|
||||||
import numpy as np
|
|
||||||
from typing import List, Optional, Union
|
|
||||||
import os, time
|
|
||||||
from . import koboldcpp
|
|
||||||
|
|
||||||
import utils
|
|
||||||
from logger import logger
|
|
||||||
from modeling.inference_model import (
|
|
||||||
GenerationResult,
|
|
||||||
GenerationSettings,
|
|
||||||
InferenceModel,
|
|
||||||
)
|
|
||||||
|
|
||||||
model_backend_name = "KoboldCPP" #specific instead of ggml
|
|
||||||
model_backend_type = "ggml" #This should be a generic name in case multiple model backends are compatible (think Hugging Face Custom and Basic Hugging Face)
|
|
||||||
|
|
||||||
class KoboldCppException(Exception):
|
|
||||||
"""To be used for errors on cpp side of KoboldCpp."""
|
|
||||||
|
|
||||||
class KcppArgsObject:
|
|
||||||
def __init__(self, **kwargs):
|
|
||||||
self.__dict__.update(kwargs)
|
|
||||||
|
|
||||||
class model_backend(InferenceModel):
|
|
||||||
def __init__(self) -> None:
|
|
||||||
super().__init__()
|
|
||||||
self.kcpp_backend_loaded = False
|
|
||||||
|
|
||||||
def is_valid(self, model_name, model_path, menu_path):
|
|
||||||
|
|
||||||
foundfile = False
|
|
||||||
try:
|
|
||||||
files = os.listdir(model_path)
|
|
||||||
foundfile = len([filename for filename in files if (("ggml" in filename.lower() and ".bin" in filename.lower()) or ".gguf" in filename.lower())])>0
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
return foundfile
|
|
||||||
|
|
||||||
def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}):
|
|
||||||
|
|
||||||
self.kcpp_threads = 5
|
|
||||||
self.model_name = "GGML_Model"
|
|
||||||
self.kcpp_ctxsize = 2048
|
|
||||||
self.kcpp_blasbatchsize = 512
|
|
||||||
self.kcpp_gpulayers = 0
|
|
||||||
self.kcpp_smartcontext = False
|
|
||||||
self.kcpp_ropescale = 0.0
|
|
||||||
self.kcpp_ropebase = 10000.0
|
|
||||||
self.kcpp_useclblast = None
|
|
||||||
self.kcpp_usecublas = None
|
|
||||||
self.kcpp_usecpu = False
|
|
||||||
self.kcpp_noavx2 = False
|
|
||||||
self.kcpp_nommap = False
|
|
||||||
self.kcpp_usevulkan = None
|
|
||||||
self.kcpp_debugmode = 0
|
|
||||||
self.kcpp_tensor_split_str = ""
|
|
||||||
self.kcpp_tensor_split = None
|
|
||||||
|
|
||||||
files = os.listdir(model_path)
|
|
||||||
foundfiles = [filename for filename in files if (("ggml" in filename.lower() and ".bin" in filename.lower()) or ".gguf" in filename.lower())]
|
|
||||||
|
|
||||||
requested_parameters = []
|
|
||||||
foldermdls = []
|
|
||||||
for ff in foundfiles:
|
|
||||||
foldermdls.append({'text': ff, 'value': os.path.join(model_path, ff)})
|
|
||||||
requested_parameters.append({
|
|
||||||
"uitype": "dropdown",
|
|
||||||
"unit": "string",
|
|
||||||
"label": "GGML DataFile Name",
|
|
||||||
"id": "kcpp_filename",
|
|
||||||
"default": os.path.join(model_path, foundfiles[0]) if len(foundfiles)>0 else model_name,
|
|
||||||
"check": {"value": "", 'check': "!="},
|
|
||||||
"tooltip": "Actual GGML DataFile Name",
|
|
||||||
"menu_path": "",
|
|
||||||
"refresh_model_inputs": False,
|
|
||||||
"extra_classes": "",
|
|
||||||
'children': foldermdls
|
|
||||||
})
|
|
||||||
requested_parameters.append({
|
|
||||||
"uitype": "dropdown",
|
|
||||||
"unit": "int",
|
|
||||||
"label": "KoboldCpp Accelerator",
|
|
||||||
"id": "kcpp_accelerator",
|
|
||||||
"default": 0,
|
|
||||||
"check": {"value": "", 'check': "!="},
|
|
||||||
'multiple': False,
|
|
||||||
"tooltip": "KoboldCpp Accelerator",
|
|
||||||
"menu_path": "",
|
|
||||||
"refresh_model_inputs": False,
|
|
||||||
"extra_classes": "",
|
|
||||||
'children': [{'text': 'Use No BLAS', 'value': 0}, {'text': 'Use CuBLAS', 'value': 1},
|
|
||||||
{'text': 'Use CLBLast GPU #1', 'value': 2},{'text': 'Use CLBLast GPU #2', 'value': 3},{'text': 'Use CLBLast GPU #3', 'value': 4}
|
|
||||||
,{'text': 'NoAVX2 Mode (Old CPU)', 'value': 5},{'text': 'Failsafe Mode (Older CPU)', 'value': 6},{'text': 'Use Vulkan GPU #1', 'value': 7},{'text': 'Use Vulkan GPU #2', 'value': 8}],
|
|
||||||
})
|
|
||||||
requested_parameters.append({
|
|
||||||
"uitype": "text",
|
|
||||||
"unit": "int",
|
|
||||||
"label": "Threads",
|
|
||||||
"id": "kcpp_threads",
|
|
||||||
"default": self.kcpp_threads,
|
|
||||||
"check": {"value": "", 'check': "!="},
|
|
||||||
"tooltip": "Thread Count",
|
|
||||||
"menu_path": "",
|
|
||||||
"refresh_model_inputs": False,
|
|
||||||
"extra_classes": ""
|
|
||||||
})
|
|
||||||
|
|
||||||
requested_parameters.append({
|
|
||||||
"uitype": "text",
|
|
||||||
"unit": "int",
|
|
||||||
"label": "Max Context Size",
|
|
||||||
"id": "kcpp_ctxsize",
|
|
||||||
"default": self.kcpp_ctxsize,
|
|
||||||
"check": {"value": "", 'check': "!="},
|
|
||||||
"tooltip": "Max Context Size",
|
|
||||||
"menu_path": "",
|
|
||||||
"refresh_model_inputs": False,
|
|
||||||
"extra_classes": ""
|
|
||||||
})
|
|
||||||
requested_parameters.append({
|
|
||||||
"uitype": "text",
|
|
||||||
"unit": "int",
|
|
||||||
"label": "BLAS Batch Size",
|
|
||||||
"id": "kcpp_blasbatchsize",
|
|
||||||
"default": self.kcpp_blasbatchsize,
|
|
||||||
"check": {"value": "", 'check': "!="},
|
|
||||||
"tooltip": "BLAS Batch Size",
|
|
||||||
"menu_path": "",
|
|
||||||
"refresh_model_inputs": False,
|
|
||||||
"extra_classes": ""
|
|
||||||
})
|
|
||||||
requested_parameters.append({
|
|
||||||
"uitype": "text",
|
|
||||||
"unit": "int",
|
|
||||||
"label": "GPU Layers",
|
|
||||||
"id": "kcpp_gpulayers",
|
|
||||||
"default": self.kcpp_gpulayers,
|
|
||||||
"check": {"value": "", 'check': "!="},
|
|
||||||
"tooltip": "GPU Layers",
|
|
||||||
"menu_path": "",
|
|
||||||
"refresh_model_inputs": False,
|
|
||||||
"extra_classes": ""
|
|
||||||
})
|
|
||||||
requested_parameters.append({
|
|
||||||
"uitype": "text",
|
|
||||||
"unit": "int",
|
|
||||||
"label": "Rope Scale",
|
|
||||||
"id": "kcpp_ropescale",
|
|
||||||
"default": self.kcpp_ropescale,
|
|
||||||
"check": {"value": "", 'check': "!="},
|
|
||||||
"tooltip": "Rope Scale",
|
|
||||||
"menu_path": "",
|
|
||||||
"refresh_model_inputs": False,
|
|
||||||
"extra_classes": ""
|
|
||||||
})
|
|
||||||
requested_parameters.append({
|
|
||||||
"uitype": "text",
|
|
||||||
"unit": "int",
|
|
||||||
"label": "Rope Base",
|
|
||||||
"id": "kcpp_ropebase",
|
|
||||||
"default": self.kcpp_ropebase,
|
|
||||||
"check": {"value": "", 'check': "!="},
|
|
||||||
"tooltip": "Rope Base",
|
|
||||||
"menu_path": "",
|
|
||||||
"refresh_model_inputs": False,
|
|
||||||
"extra_classes": ""
|
|
||||||
})
|
|
||||||
requested_parameters.append({
|
|
||||||
"uitype": "dropdown",
|
|
||||||
"unit": "int",
|
|
||||||
"label": "Smart Context",
|
|
||||||
"id": "kcpp_smartcontext",
|
|
||||||
"default": self.kcpp_smartcontext,
|
|
||||||
"check": {"value": "", 'check': "!="},
|
|
||||||
'multiple': False,
|
|
||||||
"tooltip": "Smart Context",
|
|
||||||
"menu_path": "",
|
|
||||||
"refresh_model_inputs": False,
|
|
||||||
"extra_classes": "",
|
|
||||||
'children': [{'text': 'False', 'value': False}, {'text': 'True', 'value': True}],
|
|
||||||
})
|
|
||||||
requested_parameters.append({
|
|
||||||
"uitype": "text",
|
|
||||||
"unit": "text",
|
|
||||||
"label": "GPU ID",
|
|
||||||
"id": "kcpp_tensor_split_str",
|
|
||||||
"default": "1",
|
|
||||||
"check": {"value": "", 'check': "!="},
|
|
||||||
"tooltip": "Which GPU's do we use? For example:1 2",
|
|
||||||
"menu_path": "",
|
|
||||||
"refresh_model_inputs": False,
|
|
||||||
"extra_classes": ""
|
|
||||||
})
|
|
||||||
requested_parameters.append({
|
|
||||||
"uitype": "dropdown",
|
|
||||||
"unit": "int",
|
|
||||||
"label": "Debug Mode",
|
|
||||||
"id": "kcpp_debugmode",
|
|
||||||
"default": self.kcpp_debugmode,
|
|
||||||
"check": {"value": "", 'check': "!="},
|
|
||||||
'multiple': False,
|
|
||||||
"tooltip": "Debug Mode",
|
|
||||||
"menu_path": "",
|
|
||||||
"refresh_model_inputs": False,
|
|
||||||
"extra_classes": "",
|
|
||||||
'children': [{'text': 'False', 'value': 0}, {'text': 'True', 'value': 1}],
|
|
||||||
})
|
|
||||||
return requested_parameters
|
|
||||||
|
|
||||||
def set_input_parameters(self, parameters):
|
|
||||||
self.kcpp_threads = parameters["kcpp_threads"]
|
|
||||||
self.kcpp_filename = parameters["kcpp_filename"]
|
|
||||||
self.kcpp_ctxsize = parameters["kcpp_ctxsize"]
|
|
||||||
self.kcpp_blasbatchsize = parameters["kcpp_blasbatchsize"]
|
|
||||||
self.kcpp_gpulayers = parameters["kcpp_gpulayers"]
|
|
||||||
self.kcpp_smartcontext = parameters["kcpp_smartcontext"]
|
|
||||||
self.kcpp_ropescale = parameters["kcpp_ropescale"]
|
|
||||||
self.kcpp_ropebase = parameters["kcpp_ropebase"]
|
|
||||||
self.kcpp_debugmode = parameters["kcpp_debugmode"]
|
|
||||||
self.kcpp_tensor_split_str = parameters["kcpp_tensor_split_str"]
|
|
||||||
if self.kcpp_tensor_split_str and self.kcpp_tensor_split_str!="":
|
|
||||||
splits = self.kcpp_tensor_split_str.split()
|
|
||||||
self.kcpp_tensor_split = []
|
|
||||||
for s in splits:
|
|
||||||
self.kcpp_tensor_split.append(int(s))
|
|
||||||
print(self.kcpp_tensor_split)
|
|
||||||
|
|
||||||
accel = parameters["kcpp_accelerator"]
|
|
||||||
if accel==0:
|
|
||||||
self.kcpp_usecpu = True
|
|
||||||
elif accel==1:
|
|
||||||
self.kcpp_usecublas = ["normal"]
|
|
||||||
elif accel==2:
|
|
||||||
self.kcpp_useclblast = [0,0]
|
|
||||||
elif accel==3:
|
|
||||||
self.kcpp_useclblast = [1,0]
|
|
||||||
elif accel==4:
|
|
||||||
self.kcpp_useclblast = [0,1]
|
|
||||||
elif accel==5:
|
|
||||||
self.kcpp_noavx2 = True
|
|
||||||
elif accel==6:
|
|
||||||
self.kcpp_noavx2 = True
|
|
||||||
self.kcpp_usecpu = True
|
|
||||||
self.kcpp_nommap = True
|
|
||||||
elif accel==7:
|
|
||||||
self.kcpp_usevulkan = [0]
|
|
||||||
elif accel==8:
|
|
||||||
self.kcpp_usevulkan = [1]
|
|
||||||
pass
|
|
||||||
|
|
||||||
def unload(self):
|
|
||||||
print("Attemping to unload library")
|
|
||||||
self.process.terminate()
|
|
||||||
|
|
||||||
|
|
||||||
def _load(self, save_model: bool, initial_load: bool) -> None:
|
|
||||||
self.tokenizer = self._get_tokenizer("gpt2")
|
|
||||||
kcppargs = KcppArgsObject(model=self.kcpp_filename, model_param=self.kcpp_filename,
|
|
||||||
port=5001, port_param=5001, host='', launch=False, lora=None, threads=self.kcpp_threads, blasthreads=self.kcpp_threads,
|
|
||||||
psutil_set_threads=False, highpriority=False, contextsize=self.kcpp_ctxsize, blasbatchsize=self.kcpp_blasbatchsize,
|
|
||||||
ropeconfig=[self.kcpp_ropescale, self.kcpp_ropebase], stream=False, smartcontext=self.kcpp_smartcontext, forceversion=0,
|
|
||||||
nommap=self.kcpp_nommap, usemlock=False, noavx2=self.kcpp_noavx2, debugmode=self.kcpp_debugmode, skiplauncher=True, usecpu=self.kcpp_usecpu,
|
|
||||||
useclblast=self.kcpp_useclblast, usecublas=self.kcpp_usecublas, usevulkan=self.kcpp_usevulkan, gpulayers=self.kcpp_gpulayers,
|
|
||||||
tensor_split=self.kcpp_tensor_split, config=None, onready='', multiuser=False, foreground=False, preloadstory=None, noshift=False,
|
|
||||||
remotetunnel=False, ssl=False, benchmark=None, nocertify=False, mmproj=None, password=None, chatcompletionsadapter=None)
|
|
||||||
|
|
||||||
|
|
||||||
#koboldcpp.main(kcppargs,False) #initialize library without enabling Lite http server
|
|
||||||
(self.output_queue, self.input_queue, self.process) = koboldcpp.start_in_seperate_process(kcppargs)
|
|
||||||
while True:
|
|
||||||
data = self.output_queue.get()
|
|
||||||
if data['command'] == 'load status':
|
|
||||||
utils.koboldai_vars.total_layers = data['data']['total']
|
|
||||||
utils.koboldai_vars.loaded_layers = data['data']['loaded']
|
|
||||||
elif data['command'] == 'complete':
|
|
||||||
break
|
|
||||||
time.sleep(0.02)
|
|
||||||
|
|
||||||
def _save_settings(self):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def _raw_generate(
|
|
||||||
self,
|
|
||||||
prompt_tokens: Union[List[int], torch.Tensor],
|
|
||||||
max_new: int,
|
|
||||||
gen_settings: GenerationSettings,
|
|
||||||
single_line: bool = False,
|
|
||||||
batch_count: int = 1,
|
|
||||||
seed: Optional[int] = None,
|
|
||||||
**kwargs,
|
|
||||||
) -> GenerationResult:
|
|
||||||
|
|
||||||
decoded_prompt = utils.decodenewlines(self.tokenizer.decode(prompt_tokens))
|
|
||||||
|
|
||||||
# Store context in memory to use it for comparison with generated content
|
|
||||||
utils.koboldai_vars.lastctx = decoded_prompt
|
|
||||||
|
|
||||||
self.input_queue.put({'command': 'generate', 'data': {'prompt':decoded_prompt, 'max_length': max_new, 'max_context_length': utils.koboldai_vars.max_length,
|
|
||||||
'temperature': gen_settings.temp, 'top_k': int(gen_settings.top_k), 'top_a': gen_settings.top_a, 'top_p': gen_settings.top_p,
|
|
||||||
'typical_p': gen_settings.typical, 'tfs': gen_settings.tfs, 'rep_pen': gen_settings.rep_pen, 'rep_pen_range': gen_settings.rep_pen_range,
|
|
||||||
"sampler_order": gen_settings.sampler_order, "use_default_badwordsids": utils.koboldai_vars.use_default_badwordsids}
|
|
||||||
})
|
|
||||||
|
|
||||||
#genresult = koboldcpp.generate(decoded_prompt,"",max_new,utils.koboldai_vars.max_length,
|
|
||||||
#gen_settings.temp,int(gen_settings.top_k),gen_settings.top_a,gen_settings.top_p,
|
|
||||||
#gen_settings.typical,gen_settings.tfs,gen_settings.rep_pen,gen_settings.rep_pen_range,
|
|
||||||
#sampler_order=gen_settings.sampler_order,use_default_badwordsids=utils.koboldai_vars.use_default_badwordsids)
|
|
||||||
|
|
||||||
genresult = []
|
|
||||||
while True:
|
|
||||||
data = self.output_queue.get()
|
|
||||||
print(data)
|
|
||||||
if data['command'] == 'generated text':
|
|
||||||
genresult.append(data['data'])
|
|
||||||
if self.output_queue.empty():
|
|
||||||
break
|
|
||||||
time.sleep(0.02)
|
|
||||||
|
|
||||||
return GenerationResult(
|
|
||||||
model=self,
|
|
||||||
out_batches=np.array(
|
|
||||||
[self.tokenizer.encode(x) for x in genresult]
|
|
||||||
),
|
|
||||||
prompt=prompt_tokens,
|
|
||||||
is_whole_generation=True,
|
|
||||||
single_line=single_line,
|
|
||||||
)
|
|
Loading…
Add table
Add a link
Reference in a new issue