mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 01:24:36 +00:00
updated class py, added imatrix
This commit is contained in:
parent
c2e497ccfb
commit
f81404e33c
4 changed files with 13 additions and 7 deletions
8
Makefile
8
Makefile
|
@ -491,14 +491,14 @@ gpttype_adapter_vulkan.o: $(GPTTYPE_ADAPTER)
|
||||||
$(CXX) $(CXXFLAGS) $(VULKAN_FLAGS) -c $< -o $@
|
$(CXX) $(CXXFLAGS) $(VULKAN_FLAGS) -c $< -o $@
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -vf *.o main quantize_llama quantize_gpt2 quantize_gptj quantize_neox quantize_mpt quantize-stats perplexity embedding benchmark-matmult save-load-state gguf gguf.exe main.exe quantize_llama.exe quantize_gptj.exe quantize_gpt2.exe quantize_neox.exe quantize_mpt.exe koboldcpp_default.dll koboldcpp_openblas.dll koboldcpp_failsafe.dll koboldcpp_noavx2.dll koboldcpp_clblast.dll koboldcpp_clblast_noavx2.dll koboldcpp_cublas.dll koboldcpp_hipblas.dll koboldcpp_vulkan.dll koboldcpp_default.so koboldcpp_openblas.so koboldcpp_failsafe.so koboldcpp_noavx2.so koboldcpp_clblast.so koboldcpp_clblast_noavx2.so koboldcpp_cublas.so koboldcpp_hipblas.so koboldcpp_vulkan.so
|
rm -vf *.o main quantize_llama quantize_gpt2 quantize_gptj quantize_neox quantize_mpt quantize-stats perplexity embedding benchmark-matmult save-load-state gguf imatrix imatrix.exe gguf.exe main.exe quantize_llama.exe quantize_gptj.exe quantize_gpt2.exe quantize_neox.exe quantize_mpt.exe koboldcpp_default.dll koboldcpp_openblas.dll koboldcpp_failsafe.dll koboldcpp_noavx2.dll koboldcpp_clblast.dll koboldcpp_clblast_noavx2.dll koboldcpp_cublas.dll koboldcpp_hipblas.dll koboldcpp_vulkan.dll koboldcpp_default.so koboldcpp_openblas.so koboldcpp_failsafe.so koboldcpp_noavx2.so koboldcpp_clblast.so koboldcpp_clblast_noavx2.so koboldcpp_cublas.so koboldcpp_hipblas.so koboldcpp_vulkan.so
|
||||||
|
|
||||||
|
# useful tools
|
||||||
main: examples/main/main.cpp common/sampling.cpp build-info.h ggml.o ggml-quants.o ggml-alloc.o ggml-backend.o llama.o common.o console.o grammar-parser.o $(OBJS)
|
main: examples/main/main.cpp common/sampling.cpp build-info.h ggml.o ggml-quants.o ggml-alloc.o ggml-backend.o llama.o common.o console.o grammar-parser.o $(OBJS)
|
||||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||||
@echo
|
|
||||||
@echo '==== Run ./main -h for help. ===='
|
@echo '==== Run ./main -h for help. ===='
|
||||||
@echo
|
imatrix: examples/imatrix/imatrix.cpp common/sampling.cpp build-info.h ggml.o ggml-quants.o ggml-alloc.o ggml-backend.o llama.o common.o console.o grammar-parser.o $(OBJS)
|
||||||
|
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||||
gguf: examples/gguf/gguf.cpp build-info.h ggml.o llama.o $(OBJS)
|
gguf: examples/gguf/gguf.cpp build-info.h ggml.o llama.o $(OBJS)
|
||||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||||
|
|
||||||
|
|
9
class.py
9
class.py
|
@ -60,6 +60,7 @@ class model_backend(InferenceModel):
|
||||||
self.kcpp_noblas = False
|
self.kcpp_noblas = False
|
||||||
self.kcpp_noavx2 = False
|
self.kcpp_noavx2 = False
|
||||||
self.kcpp_nommap = False
|
self.kcpp_nommap = False
|
||||||
|
self.kcpp_usevulkan = None
|
||||||
self.kcpp_debugmode = 0
|
self.kcpp_debugmode = 0
|
||||||
self.kcpp_tensor_split_str = ""
|
self.kcpp_tensor_split_str = ""
|
||||||
self.kcpp_tensor_split = None
|
self.kcpp_tensor_split = None
|
||||||
|
@ -98,7 +99,7 @@ class model_backend(InferenceModel):
|
||||||
"extra_classes": "",
|
"extra_classes": "",
|
||||||
'children': [{'text': 'Use No BLAS', 'value': 0}, {'text': 'Use OpenBLAS', 'value': 1}, {'text': 'Use CuBLAS', 'value': 2},
|
'children': [{'text': 'Use No BLAS', 'value': 0}, {'text': 'Use OpenBLAS', 'value': 1}, {'text': 'Use CuBLAS', 'value': 2},
|
||||||
{'text': 'Use CLBLast GPU #1', 'value': 3},{'text': 'Use CLBLast GPU #2', 'value': 4},{'text': 'Use CLBLast GPU #3', 'value': 5}
|
{'text': 'Use CLBLast GPU #1', 'value': 3},{'text': 'Use CLBLast GPU #2', 'value': 4},{'text': 'Use CLBLast GPU #3', 'value': 5}
|
||||||
,{'text': 'NoAVX2 Mode (Old CPU)', 'value': 6},{'text': 'Failsafe Mode (Old CPU)', 'value': 7}],
|
,{'text': 'NoAVX2 Mode (Old CPU)', 'value': 6},{'text': 'Failsafe Mode (Old CPU)', 'value': 7},{'text': 'Use Vulkan GPU #1', 'value': 8},{'text': 'Use Vulkan GPU #2', 'value': 9}],
|
||||||
})
|
})
|
||||||
requested_parameters.append({
|
requested_parameters.append({
|
||||||
"uitype": "text",
|
"uitype": "text",
|
||||||
|
@ -252,6 +253,10 @@ class model_backend(InferenceModel):
|
||||||
self.kcpp_noavx2 = True
|
self.kcpp_noavx2 = True
|
||||||
self.kcpp_noblas = True
|
self.kcpp_noblas = True
|
||||||
self.kcpp_nommap = True
|
self.kcpp_nommap = True
|
||||||
|
elif accel==8:
|
||||||
|
self.kcpp_usevulkan = 0
|
||||||
|
elif accel==9:
|
||||||
|
self.kcpp_usevulkan = 1
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def unload(self):
|
def unload(self):
|
||||||
|
@ -267,7 +272,7 @@ class model_backend(InferenceModel):
|
||||||
blasbatchsize=self.kcpp_blasbatchsize, ropeconfig=[self.kcpp_ropescale, self.kcpp_ropebase], stream=False, smartcontext=self.kcpp_smartcontext,
|
blasbatchsize=self.kcpp_blasbatchsize, ropeconfig=[self.kcpp_ropescale, self.kcpp_ropebase], stream=False, smartcontext=self.kcpp_smartcontext,
|
||||||
unbantokens=False, bantokens=None, usemirostat=None, forceversion=0, nommap=self.kcpp_nommap,
|
unbantokens=False, bantokens=None, usemirostat=None, forceversion=0, nommap=self.kcpp_nommap,
|
||||||
usemlock=False, noavx2=self.kcpp_noavx2, debugmode=self.kcpp_debugmode, skiplauncher=True, hordeconfig=None, noblas=self.kcpp_noblas,
|
usemlock=False, noavx2=self.kcpp_noavx2, debugmode=self.kcpp_debugmode, skiplauncher=True, hordeconfig=None, noblas=self.kcpp_noblas,
|
||||||
useclblast=self.kcpp_useclblast, usecublas=self.kcpp_usecublas, gpulayers=self.kcpp_gpulayers, tensor_split=self.kcpp_tensor_split, config=None,
|
useclblast=self.kcpp_useclblast, usecublas=self.kcpp_usecublas, usevulkan=self.kcpp_usevulkan, gpulayers=self.kcpp_gpulayers, tensor_split=self.kcpp_tensor_split, config=None,
|
||||||
onready='', multiuser=False, foreground=False, preloadstory=None, noshift=False, remotetunnel=False, ssl=False)
|
onready='', multiuser=False, foreground=False, preloadstory=None, noshift=False, remotetunnel=False, ssl=False)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
#include "build-info.h"
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "llama.h"
|
#include "llama.h"
|
||||||
|
|
||||||
|
|
|
@ -452,7 +452,7 @@ maxhordelen = 256
|
||||||
modelbusy = threading.Lock()
|
modelbusy = threading.Lock()
|
||||||
requestsinqueue = 0
|
requestsinqueue = 0
|
||||||
defaultport = 5001
|
defaultport = 5001
|
||||||
KcppVersion = "1.56"
|
KcppVersion = "1.57"
|
||||||
showdebug = True
|
showdebug = True
|
||||||
showsamplerwarning = True
|
showsamplerwarning = True
|
||||||
showmaxctxwarning = True
|
showmaxctxwarning = True
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue