mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 17:14:36 +00:00
updated class py, added imatrix
This commit is contained in:
parent
c2e497ccfb
commit
f81404e33c
4 changed files with 13 additions and 7 deletions
8
Makefile
8
Makefile
|
@ -491,14 +491,14 @@ gpttype_adapter_vulkan.o: $(GPTTYPE_ADAPTER)
|
|||
$(CXX) $(CXXFLAGS) $(VULKAN_FLAGS) -c $< -o $@
|
||||
|
||||
clean:
|
||||
rm -vf *.o main quantize_llama quantize_gpt2 quantize_gptj quantize_neox quantize_mpt quantize-stats perplexity embedding benchmark-matmult save-load-state gguf gguf.exe main.exe quantize_llama.exe quantize_gptj.exe quantize_gpt2.exe quantize_neox.exe quantize_mpt.exe koboldcpp_default.dll koboldcpp_openblas.dll koboldcpp_failsafe.dll koboldcpp_noavx2.dll koboldcpp_clblast.dll koboldcpp_clblast_noavx2.dll koboldcpp_cublas.dll koboldcpp_hipblas.dll koboldcpp_vulkan.dll koboldcpp_default.so koboldcpp_openblas.so koboldcpp_failsafe.so koboldcpp_noavx2.so koboldcpp_clblast.so koboldcpp_clblast_noavx2.so koboldcpp_cublas.so koboldcpp_hipblas.so koboldcpp_vulkan.so
|
||||
rm -vf *.o main quantize_llama quantize_gpt2 quantize_gptj quantize_neox quantize_mpt quantize-stats perplexity embedding benchmark-matmult save-load-state gguf imatrix imatrix.exe gguf.exe main.exe quantize_llama.exe quantize_gptj.exe quantize_gpt2.exe quantize_neox.exe quantize_mpt.exe koboldcpp_default.dll koboldcpp_openblas.dll koboldcpp_failsafe.dll koboldcpp_noavx2.dll koboldcpp_clblast.dll koboldcpp_clblast_noavx2.dll koboldcpp_cublas.dll koboldcpp_hipblas.dll koboldcpp_vulkan.dll koboldcpp_default.so koboldcpp_openblas.so koboldcpp_failsafe.so koboldcpp_noavx2.so koboldcpp_clblast.so koboldcpp_clblast_noavx2.so koboldcpp_cublas.so koboldcpp_hipblas.so koboldcpp_vulkan.so
|
||||
|
||||
# useful tools
|
||||
main: examples/main/main.cpp common/sampling.cpp build-info.h ggml.o ggml-quants.o ggml-alloc.o ggml-backend.o llama.o common.o console.o grammar-parser.o $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||
@echo
|
||||
@echo '==== Run ./main -h for help. ===='
|
||||
@echo
|
||||
|
||||
imatrix: examples/imatrix/imatrix.cpp common/sampling.cpp build-info.h ggml.o ggml-quants.o ggml-alloc.o ggml-backend.o llama.o common.o console.o grammar-parser.o $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||
gguf: examples/gguf/gguf.cpp build-info.h ggml.o llama.o $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||
|
||||
|
|
9
class.py
9
class.py
|
@ -60,6 +60,7 @@ class model_backend(InferenceModel):
|
|||
self.kcpp_noblas = False
|
||||
self.kcpp_noavx2 = False
|
||||
self.kcpp_nommap = False
|
||||
self.kcpp_usevulkan = None
|
||||
self.kcpp_debugmode = 0
|
||||
self.kcpp_tensor_split_str = ""
|
||||
self.kcpp_tensor_split = None
|
||||
|
@ -98,7 +99,7 @@ class model_backend(InferenceModel):
|
|||
"extra_classes": "",
|
||||
'children': [{'text': 'Use No BLAS', 'value': 0}, {'text': 'Use OpenBLAS', 'value': 1}, {'text': 'Use CuBLAS', 'value': 2},
|
||||
{'text': 'Use CLBLast GPU #1', 'value': 3},{'text': 'Use CLBLast GPU #2', 'value': 4},{'text': 'Use CLBLast GPU #3', 'value': 5}
|
||||
,{'text': 'NoAVX2 Mode (Old CPU)', 'value': 6},{'text': 'Failsafe Mode (Old CPU)', 'value': 7}],
|
||||
,{'text': 'NoAVX2 Mode (Old CPU)', 'value': 6},{'text': 'Failsafe Mode (Old CPU)', 'value': 7},{'text': 'Use Vulkan GPU #1', 'value': 8},{'text': 'Use Vulkan GPU #2', 'value': 9}],
|
||||
})
|
||||
requested_parameters.append({
|
||||
"uitype": "text",
|
||||
|
@ -252,6 +253,10 @@ class model_backend(InferenceModel):
|
|||
self.kcpp_noavx2 = True
|
||||
self.kcpp_noblas = True
|
||||
self.kcpp_nommap = True
|
||||
elif accel==8:
|
||||
self.kcpp_usevulkan = 0
|
||||
elif accel==9:
|
||||
self.kcpp_usevulkan = 1
|
||||
pass
|
||||
|
||||
def unload(self):
|
||||
|
@ -267,7 +272,7 @@ class model_backend(InferenceModel):
|
|||
blasbatchsize=self.kcpp_blasbatchsize, ropeconfig=[self.kcpp_ropescale, self.kcpp_ropebase], stream=False, smartcontext=self.kcpp_smartcontext,
|
||||
unbantokens=False, bantokens=None, usemirostat=None, forceversion=0, nommap=self.kcpp_nommap,
|
||||
usemlock=False, noavx2=self.kcpp_noavx2, debugmode=self.kcpp_debugmode, skiplauncher=True, hordeconfig=None, noblas=self.kcpp_noblas,
|
||||
useclblast=self.kcpp_useclblast, usecublas=self.kcpp_usecublas, gpulayers=self.kcpp_gpulayers, tensor_split=self.kcpp_tensor_split, config=None,
|
||||
useclblast=self.kcpp_useclblast, usecublas=self.kcpp_usecublas, usevulkan=self.kcpp_usevulkan, gpulayers=self.kcpp_gpulayers, tensor_split=self.kcpp_tensor_split, config=None,
|
||||
onready='', multiuser=False, foreground=False, preloadstory=None, noshift=False, remotetunnel=False, ssl=False)
|
||||
|
||||
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
#include "build-info.h"
|
||||
#include "common.h"
|
||||
#include "llama.h"
|
||||
|
||||
|
|
|
@ -452,7 +452,7 @@ maxhordelen = 256
|
|||
modelbusy = threading.Lock()
|
||||
requestsinqueue = 0
|
||||
defaultport = 5001
|
||||
KcppVersion = "1.56"
|
||||
KcppVersion = "1.57"
|
||||
showdebug = True
|
||||
showsamplerwarning = True
|
||||
showmaxctxwarning = True
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue