hide flash attention in quick launch for vulkan, updated lite

This commit is contained in:
Concedo 2024-10-24 22:00:09 +08:00
parent becd737e0f
commit d0a6a52855
2 changed files with 323 additions and 44 deletions

View file

@ -2703,9 +2703,12 @@ def show_gui():
tensor_split_entry.grid_remove()
splitmode_box.grid_remove()
if index == "Use Vulkan":
if index == "Use Vulkan" or index == "Use Vulkan (Old CPU)":
tensor_split_label.grid(row=8, column=0, padx = 8, pady=1, stick="nw")
tensor_split_entry.grid(row=8, column=1, padx=8, pady=1, stick="nw")
quick_use_flashattn.grid_remove()
else:
quick_use_flashattn.grid(row=22, column=1, padx=8, pady=1, stick="nw")
if index == "Use Vulkan" or index == "Use Vulkan (Old CPU)" or index == "Use CLBlast" or index == "Use CLBlast (Old CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)":
gpu_layers_label.grid(row=6, column=0, padx = 8, pady=1, stick="nw")
@ -2755,13 +2758,14 @@ def show_gui():
"Disable MMAP": [disablemmap, "Avoids using mmap to load models if enabled"],
"Use ContextShift": [contextshift, "Uses Context Shifting to reduce reprocessing.\nRecommended. Check the wiki for more info."],
"Remote Tunnel": [remotetunnel, "Creates a trycloudflare tunnel.\nAllows you to access koboldcpp from other devices over an internet URL."],
"Use FlashAttention": [flashattention, "Enable flash attention for GGUF models."],
"Quiet Mode": [quietmode, "Prevents all generation related terminal output from being displayed."]
}
for idx, (name, properties) in enumerate(quick_boxes.items()):
makecheckbox(quick_tab, name, properties[0], int(idx/2) + 20, idx % 2, tooltiptxt=properties[1])
quick_use_flashattn = makecheckbox(quick_tab, "Use FlashAttention", flashattention, 22, 1, tooltiptxt="Enable flash attention for GGUF models.")
# context size
makeslider(quick_tab, "Context Size:", contextsize_text, context_var, 0, len(contextsize_text)-1, 30, width=280, set=5,tooltip="What is the maximum context size to support. Model specific. You cannot exceed it.\nLarger contexts require more memory, and not all models support it.")