hide flash attention in quick launch for vulkan, updated lite

2025-09-10 09:04:36 +00:00 · 2024-10-24 22:00:09 +08:00 · 2024-10-24 22:00:09 +08:00 · d0a6a52855
commit d0a6a52855
parent becd737e0f
2 changed files with 323 additions and 44 deletions
--- a/koboldcpp.py
+++ b/koboldcpp.py
@ -2703,9 +2703,12 @@ def show_gui():
            tensor_split_entry.grid_remove()
            splitmode_box.grid_remove()

-        if index == "Use Vulkan":
+        if index == "Use Vulkan" or index == "Use Vulkan (Old CPU)":
            tensor_split_label.grid(row=8, column=0, padx = 8, pady=1, stick="nw")
            tensor_split_entry.grid(row=8, column=1, padx=8, pady=1, stick="nw")
+            quick_use_flashattn.grid_remove()
+        else:
+            quick_use_flashattn.grid(row=22, column=1, padx=8, pady=1,  stick="nw")

        if index == "Use Vulkan" or index == "Use Vulkan (Old CPU)" or index == "Use CLBlast" or index == "Use CLBlast (Old CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)":
            gpu_layers_label.grid(row=6, column=0, padx = 8, pady=1, stick="nw")
@ -2755,13 +2758,14 @@ def show_gui():
        "Disable MMAP": [disablemmap,  "Avoids using mmap to load models if enabled"],
        "Use ContextShift": [contextshift, "Uses Context Shifting to reduce reprocessing.\nRecommended. Check the wiki for more info."],
        "Remote Tunnel": [remotetunnel,  "Creates a trycloudflare tunnel.\nAllows you to access koboldcpp from other devices over an internet URL."],
-        "Use FlashAttention": [flashattention, "Enable flash attention for GGUF models."],
        "Quiet Mode": [quietmode, "Prevents all generation related terminal output from being displayed."]
    }

    for idx, (name, properties) in enumerate(quick_boxes.items()):
        makecheckbox(quick_tab, name, properties[0], int(idx/2) + 20, idx % 2, tooltiptxt=properties[1])

+    quick_use_flashattn = makecheckbox(quick_tab, "Use FlashAttention", flashattention, 22, 1, tooltiptxt="Enable flash attention for GGUF models.")
+
    # context size
    makeslider(quick_tab, "Context Size:", contextsize_text, context_var, 0, len(contextsize_text)-1, 30, width=280, set=5,tooltip="What is the maximum context size to support. Model specific. You cannot exceed it.\nLarger contexts require more memory, and not all models support it.")