diff --git a/koboldcpp.py b/koboldcpp.py index a0b4da48d..0dac1386f 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -1650,10 +1650,10 @@ def show_new_gui(): gpuname_label.grid(row=3, column=1, padx=75, sticky="W") gpuname_label.configure(text_color="#ffff00") gpu_layers_entry,gpu_layers_label = makelabelentry(hardware_tab,"GPU Layers:", gpulayers_var, 6, 50,"How many layers to offload onto the GPU.\nVRAM intensive, usage increases with model and context size.\nRequires some trial and error to find the best fit value.") - tensor_split_entry,tensor_split_label = makelabelentry(hardware_tab, "Tensor Split:", tensor_split_str_vars, 8, 80) - lowvram_box = makecheckbox(hardware_tab, "Low VRAM", lowvram_var, 4,0) - mmq_box = makecheckbox(hardware_tab, "Use QuantMatMul (mmq)", mmq_var, 4,1) - splitmode_box = makecheckbox(hardware_tab, "Row-Split", rowsplit_var, 5,0) + tensor_split_entry,tensor_split_label = makelabelentry(hardware_tab, "Tensor Split:", tensor_split_str_vars, 8, 80, tooltip='When using multiple GPUs this option controls how large tensors should be split across all GPUs.\nUses a comma-separated list of non-negative values that assigns the proportion of data that each GPU should get in order.\nFor example, "3,2" will assign 60% of the data to GPU 0 and 40% to GPU 1.') + lowvram_box = makecheckbox(hardware_tab, "Low VRAM", lowvram_var, 4,0, tooltiptxt='Select lowvram to not allocate VRAM scratch buffer/ "K" & "V" Cache.\nCan save a decent amount of VRAM, but makes fully offloaded model processing a bit slower.') + mmq_box = makecheckbox(hardware_tab, "Use QuantMatMul (mmq)", mmq_var, 4,1, tooltiptxt="Enable MMQ mode to use finetuned kernels instead of default CuBLAS/HipBLAS for prompt processing.\nRead the wiki. Speed may vary.") + splitmode_box = makecheckbox(hardware_tab, "Row-Split", rowsplit_var, 5,0, tooltiptxt="Split rows across GPUs instead of splitting layers and KV across GPUs.\nUses the main GPU for small tensors and intermediate results. Speed may vary.") # threads makelabelentry(hardware_tab, "Threads:" , threads_var, 11, 50,"How many threads to use.\nRecommended value is your CPU core count, defaults are usually OK.")