vision can set max res (+1 squashed commits)

Squashed commits: [938fc655] vision can set max res
2025-09-10 09:04:36 +00:00 · 2025-01-30 00:19:16 +08:00 · 2025-01-30 00:19:16 +08:00 · 70f1d8d746
commit 70f1d8d746
parent 2f69432774
6 changed files with 25 additions and 12 deletions
--- a/koboldcpp.py
+++ b/koboldcpp.py
@ -36,6 +36,8 @@ bias_min_value = -100.0
 bias_max_value = 100.0
 logprobs_max = 5
 default_draft_amount = 8
+default_ttsmaxlen = 4096
+default_visionmaxres = 1024

 # abuse prevention
 stop_token_max = 256
@ -151,6 +153,7 @@ class load_model_inputs(ctypes.Structure):
                ("draft_gpulayers", ctypes.c_int),
                ("draft_gpusplit", ctypes.c_float * tensor_split_max),
                ("mmproj_filename", ctypes.c_char_p),
+                ("visionmaxres", ctypes.c_int),
                ("use_mmap", ctypes.c_bool),
                ("use_mlock", ctypes.c_bool),
                ("use_smartcontext", ctypes.c_bool),
@ -1057,6 +1060,7 @@ def load_model(model_filename):
        else:
            inputs.draft_gpusplit[n] = 0
    inputs.mmproj_filename = args.mmproj.encode("UTF-8") if args.mmproj else "".encode("UTF-8")
+    inputs.visionmaxres = (512 if args.visionmaxres < 512 else (2048 if args.visionmaxres > 2048 else args.visionmaxres))
    inputs.use_smartcontext = args.smartcontext
    inputs.use_contextshift = (0 if args.noshift else 1)
    inputs.use_fastforward = (0 if args.nofastforward else 1)
@ -3242,6 +3246,7 @@ def show_gui():
    lora_base_var = ctk.StringVar()
    preloadstory_var = ctk.StringVar()
    mmproj_var = ctk.StringVar()
+    visionmaxres_var = ctk.StringVar(value=str(default_visionmaxres))
    draftmodel_var = ctk.StringVar()
    draftamount_var = ctk.StringVar(value=str(default_draft_amount))
    draftgpulayers_var = ctk.StringVar(value=str(999))
@ -3281,7 +3286,7 @@ def show_gui():
    wavtokenizer_var = ctk.StringVar()
    ttsgpu_var = ctk.IntVar(value=0)
    tts_threads_var = ctk.StringVar(value=str(default_threads))
-    ttsmaxlen_var = ctk.StringVar(value=str(4096))
+    ttsmaxlen_var = ctk.StringVar(value=str(default_ttsmaxlen))

    def tabbuttonaction(name):
        for t in tabcontent:
@ -3739,10 +3744,11 @@ def show_gui():
    makefileentry(model_tab, "Text Lora:", "Select Lora File",lora_var, 3,width=280,singlerow=True,tooltiptxt="Select an optional GGML Text LoRA adapter to use.\nLeave blank to skip.")
    makefileentry(model_tab, "Lora Base:", "Select Lora Base File", lora_base_var, 5,width=280,singlerow=True,tooltiptxt="Select an optional F16 GGML Text LoRA base file to use.\nLeave blank to skip.")
    makefileentry(model_tab, "Vision mmproj:", "Select Vision mmproj File", mmproj_var, 7,width=280,singlerow=True,tooltiptxt="Select a mmproj file to use for vision models like LLaVA.\nLeave blank to skip.")
-    makefileentry(model_tab, "Draft Model:", "Select Speculative Text Model File", draftmodel_var, 9,width=280,singlerow=True,tooltiptxt="Select a draft text model file to use for speculative decoding.\nLeave blank to skip.")
-    makelabelentry(model_tab, "Draft Amount: ", draftamount_var, 11, 50,padx=100,singleline=True,tooltip="How many tokens to draft per chunk before verifying results")
-    makelabelentry(model_tab, "Splits: ", draftgpusplit_str_vars, 11, 50,padx=210,singleline=True,tooltip="Distribution of draft model layers. Leave blank to follow main model's gpu split. Only works if multi-gpu (All) selected in main model.", labelpadx=160)
-    makelabelentry(model_tab, "Layers: ", draftgpulayers_var, 11, 50,padx=320,singleline=True,tooltip="How many layers to GPU offload for the draft model", labelpadx=270)
+    makelabelentry(model_tab, "Vision MaxRes:", visionmaxres_var, 9, padx=100, singleline=True, tooltip=f"Clamp MMProj vision maximum allowed resolution. Allowed values are between 512 to 2048 px (default {default_visionmaxres}).")
+    makefileentry(model_tab, "Draft Model:", "Select Speculative Text Model File", draftmodel_var, 11,width=280,singlerow=True,tooltiptxt="Select a draft text model file to use for speculative decoding.\nLeave blank to skip.")
+    makelabelentry(model_tab, "Draft Amount: ", draftamount_var, 13, 50,padx=100,singleline=True,tooltip="How many tokens to draft per chunk before verifying results")
+    makelabelentry(model_tab, "Splits: ", draftgpusplit_str_vars, 13, 50,padx=210,singleline=True,tooltip="Distribution of draft model layers. Leave blank to follow main model's gpu split. Only works if multi-gpu (All) selected in main model.", labelpadx=160)
+    makelabelentry(model_tab, "Layers: ", draftgpulayers_var, 13, 50,padx=320,singleline=True,tooltip="How many layers to GPU offload for the draft model", labelpadx=270)
    makefileentry(model_tab, "Preload Story:", "Select Preloaded Story File", preloadstory_var, 15,width=280,singlerow=True,tooltiptxt="Select an optional KoboldAI JSON savefile \nto be served on launch to any client.")
    makefileentry(model_tab, "ChatCompletions Adapter:", "Select ChatCompletions Adapter File", chatcompletionsadapter_var, 24, width=250, filetypes=[("JSON Adapter", "*.json")], tooltiptxt="Select an optional ChatCompletions Adapter JSON file to force custom instruct tags.")
    def pickpremadetemplate():
@ -4022,6 +4028,7 @@ def show_gui():
        except Exception:
            pass
        args.mmproj = None if mmproj_var.get() == "" else mmproj_var.get()
+        args.visionmaxres = int(visionmaxres_var.get()) if visionmaxres_var.get()!="" else default_visionmaxres
        args.draftmodel = None if draftmodel_var.get() == "" else draftmodel_var.get()
        args.draftamount = int(draftamount_var.get()) if draftamount_var.get()!="" else default_draft_amount
        args.draftgpulayers = int(draftgpulayers_var.get()) if draftgpulayers_var.get()!="" else 999
@ -4199,6 +4206,8 @@ def show_gui():
                lora_var.set(dict["lora"][0])

        mmproj_var.set(dict["mmproj"] if ("mmproj" in dict and dict["mmproj"]) else "")
+        if "visionmaxres" in dict and dict["visionmaxres"]:
+            visionmaxres_var.set(dict["visionmaxres"])
        draftmodel_var.set(dict["draftmodel"] if ("draftmodel" in dict and dict["draftmodel"]) else "")
        if "draftamount" in dict:
            draftamount_var.set(dict["draftamount"])
@ -4247,7 +4256,7 @@ def show_gui():
        tts_model_var.set(dict["ttsmodel"] if ("ttsmodel" in dict and dict["ttsmodel"]) else "")
        wavtokenizer_var.set(dict["ttswavtokenizer"] if ("ttswavtokenizer" in dict and dict["ttswavtokenizer"]) else "")
        ttsgpu_var.set(dict["ttsgpu"] if ("ttsgpu" in dict) else 0)
-        ttsmaxlen_var.set(str(dict["ttsmaxlen"]) if ("ttsmaxlen" in dict and dict["ttsmaxlen"]) else str(4096))
+        ttsmaxlen_var.set(str(dict["ttsmaxlen"]) if ("ttsmaxlen" in dict and dict["ttsmaxlen"]) else str(default_ttsmaxlen))

        importvars_in_progress = False
        gui_changed_modelfile()
@ -5604,6 +5613,7 @@ if __name__ == '__main__':
    advparser.add_argument("--ssl", help="Allows all content to be served over SSL instead. A valid UNENCRYPTED SSL cert and key .pem files must be provided", metavar=('[cert_pem]', '[key_pem]'), nargs='+')
    advparser.add_argument("--nocertify", help="Allows insecure SSL connections. Use this if you have cert errors and need to bypass certificate restrictions.", action='store_true')
    advparser.add_argument("--mmproj", metavar=('[filename]'), help="Select a multimodal projector file for vision models like LLaVA.", default="")
+    advparser.add_argument("--visionmaxres", metavar=('[max px]'), help="Clamp MMProj vision maximum allowed resolution. Allowed values are between 512 to 2048 px (default 1024).", type=int, default=default_visionmaxres)
    advparser.add_argument("--draftmodel", metavar=('[filename]'), help="Load a small draft model for speculative decoding. It will be fully offloaded. Vocab must match the main model.", default="")
    advparser.add_argument("--draftamount", metavar=('[tokens]'), help="How many tokens to draft per chunk before verifying results", type=int, default=default_draft_amount)
    advparser.add_argument("--draftgpulayers", metavar=('[layers]'), help="How many layers to offload to GPU for the draft model (default=full offload)", type=int, default=999)
@ -5652,7 +5662,7 @@ if __name__ == '__main__':
    ttsparsergroup.add_argument("--ttsmodel", metavar=('[filename]'), help="Specify the OuteTTS Text-To-Speech GGUF model.", default="")
    ttsparsergroup.add_argument("--ttswavtokenizer", metavar=('[filename]'), help="Specify the WavTokenizer GGUF model.", default="")
    ttsparsergroup.add_argument("--ttsgpu", help="Use the GPU for TTS.", action='store_true')
-    ttsparsergroup.add_argument("--ttsmaxlen", help="Limit number of audio tokens generated with TTS.",  type=int, default=4096)
+    ttsparsergroup.add_argument("--ttsmaxlen", help="Limit number of audio tokens generated with TTS.",  type=int, default=default_ttsmaxlen)
    ttsparsergroup.add_argument("--ttsthreads", metavar=('[threads]'), help="Use a different number of threads for TTS if specified. Otherwise, has the same value as --threads.", type=int, default=0)

    deprecatedgroup = parser.add_argument_group('Deprecated Commands, DO NOT USE!')