mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-08 18:30:50 +00:00
rename blas to just batching
This commit is contained in:
parent
48cd70b14b
commit
45a02ae534
1 changed files with 5 additions and 5 deletions
10
koboldcpp.py
10
koboldcpp.py
|
|
@ -4682,7 +4682,7 @@ def show_gui():
|
|||
tabcontent = {}
|
||||
# slider data
|
||||
blasbatchsize_values = ["-1","16","32","64","128","256","512","1024","2048","4096"]
|
||||
blasbatchsize_text = ["Don't Batch BLAS","16","32","64","128","256","512","1024","2048","4096"]
|
||||
blasbatchsize_text = ["Don't Batch","16","32","64","128","256","512","1024","2048","4096"]
|
||||
contextsize_text = ["256", "512", "1024", "2048", "3072", "4096", "6144", "8192", "10240", "12288", "14336", "16384", "20480", "24576", "28672", "32768", "40960", "49152", "57344", "65536", "81920", "98304", "114688", "131072"]
|
||||
antirunopts = [opt.replace("Use ", "") for lib, opt in lib_option_pairs if opt not in runopts]
|
||||
quantkv_text = ["F16 (Off)","8-Bit","4-Bit"]
|
||||
|
|
@ -5386,9 +5386,9 @@ def show_gui():
|
|||
makecheckbox(hardware_tab, name, properties[0], int(idx/2) + 30, idx % 2, tooltiptxt=properties[1])
|
||||
|
||||
# blas thread specifier
|
||||
makelabelentry(hardware_tab, "BLAS threads:" , blas_threads_var, 14, 50,tooltip="How many threads to use during BLAS processing.\nIf left blank, uses same value as regular thread count.")
|
||||
makelabelentry(hardware_tab, "Batch Threads:" , blas_threads_var, 14, 50,tooltip="How many threads to use during batched processing.\nIf left blank, uses same value as regular thread count.")
|
||||
# blas batch size
|
||||
makeslider(hardware_tab, "BLAS Batch Size:", blasbatchsize_text, blas_size_var, 0, len(blasbatchsize_values)-1, 16,width=200, set=6,tooltip="How many tokens to process at once per batch.\nLarger values use more memory.")
|
||||
makeslider(hardware_tab, "Batch Size:", blasbatchsize_text, blas_size_var, 0, len(blasbatchsize_values)-1, 16,width=200, set=6,tooltip="How many tokens to process at once per batch.\nLarger values use more memory.")
|
||||
blas_size_var.trace_add("write", changed_gpulayers_estimate)
|
||||
|
||||
# force version
|
||||
|
|
@ -7818,8 +7818,8 @@ if __name__ == '__main__':
|
|||
advparser.add_argument("--version", help="Prints version and exits.", action='store_true')
|
||||
advparser.add_argument("--analyze", metavar=('[filename]'), help="Reads the metadata, weight types and tensor names in any GGUF file.", default="")
|
||||
advparser.add_argument("--maingpu","--main-gpu","-mg", help="Only used in a multi-gpu setup. Sets the index of the main GPU that will be used.",metavar=('[Device ID]'), type=int, default=-1)
|
||||
advparser.add_argument("--blasbatchsize","--batch-size","-b", help="Sets the batch size used in BLAS processing (default 512). Setting it to -1 disables BLAS mode, but keeps other benefits like GPU offload.", type=int,choices=[-1,16,32,64,128,256,512,1024,2048,4096], default=512)
|
||||
advparser.add_argument("--blasthreads","--threads-batch", help="Use a different number of threads during BLAS if specified. Otherwise, has the same value as --threads",metavar=('[threads]'), type=int, default=0)
|
||||
advparser.add_argument("--blasbatchsize","--batchsize","--batch-size","-b", help="Sets the batch size used in batched processing (default 512). Setting it to -1 disables batched mode, but keeps other benefits like GPU offload.", type=int,choices=[-1,16,32,64,128,256,512,1024,2048,4096], default=512)
|
||||
advparser.add_argument("--blasthreads","--batchthreads","--threadsbatch","--threads-batch", help="Use a different number of threads during batching if specified. Otherwise, has the same value as --threads",metavar=('[threads]'), type=int, default=0)
|
||||
advparser.add_argument("--lora", help="GGUF models only, applies a lora file on top of model.", metavar=('[lora_filename]'), nargs='+')
|
||||
advparser.add_argument("--loramult", metavar=('[amount]'), help="Multiplier for the Text LORA model to be applied.", type=float, default=1.0)
|
||||
advparser.add_argument("--noshift","--no-context-shift", help="If set, do not attempt to Trim and Shift the GGUF context.", action='store_true')
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue