adjust lite default port, disable double BOS warning, whisper and SD go quiet when horde mode is set too

2025-09-10 09:04:36 +00:00 · 2024-06-13 15:10:35 +08:00 · 2024-06-13 15:10:35 +08:00 · 49e4c3fd7b
commit 49e4c3fd7b
parent 92bbebb357
3 changed files with 12 additions and 12 deletions
--- a/klite.embd
+++ b/klite.embd
@ -3732,7 +3732,7 @@ Current version: 146
 	var temp_scenario = null;
 	var last_token_budget = ""; //to display token limits
 	var last_known_filename = "saved_story.json";
-	var localmodeport = 5000;
+	var localmodeport = 5001;
 	var localmodehost = "localhost";
 	var sublocalpathname = "";
 	var localmodekey = "";
--- a/koboldcpp.py
+++ b/koboldcpp.py
@ -599,7 +599,7 @@ def sd_generate(genparams):
    height = tryparseint(genparams.get("height", 512))
    seed = tryparseint(genparams.get("seed", -1))
    sample_method = genparams.get("sampler_name", "k_euler_a")
-    is_quiet = True if args.quiet else False
+    is_quiet = True if (args.quiet or args.debugmode == -1) else False
    clip_skip = tryparseint(genparams.get("clip_skip", -1))

    #clean vars
@ -656,7 +656,7 @@ def whisper_load_model(model_filename):

 def whisper_generate(genparams):
    global args
-    is_quiet = True if args.quiet else False
+    is_quiet = True if (args.quiet or args.debugmode == -1) else False
    prompt = genparams.get("prompt", "")
    audio_data = genparams.get("audio_data", "")
    if audio_data.startswith("data:audio"):
@ -672,7 +672,7 @@ def whisper_generate(genparams):
    return outstr

 def utfprint(str):
-    maxlen = 20000
+    maxlen = 25000
    strlength = len(str)
    if strlength > maxlen: #limit max output len
        str = str[:maxlen] + f"... (+{strlength-maxlen} chars)"
--- a/llama.cpp
+++ b/llama.cpp
@ -13781,10 +13781,10 @@ static std::vector<llama_vocab::id> llama_tokenize_internal(const llama_vocab &
                }

                if (add_special && vocab.special_add_bos != 0 && output.size() >= 2 && output[1] == vocab.special_bos_id) {
-                    LLAMA_LOG_WARN(
-                        "%s: Added a BOS token to the prompt as specified by the model but the prompt "
-                        "also starts with a BOS token. So now the final prompt starts with 2 BOS tokens. "
-                        "Are you sure this is what you want?\n", __FUNCTION__);
+                    // LLAMA_LOG_WARN(
+                    //     "%s: Added a BOS token to the prompt as specified by the model but the prompt "
+                    //     "also starts with a BOS token. So now the final prompt starts with 2 BOS tokens. "
+                    //     "Are you sure this is what you want?\n", __FUNCTION__);
                }

                if (add_special && vocab.special_add_eos == 1) {
@ -13824,10 +13824,10 @@ static std::vector<llama_vocab::id> llama_tokenize_internal(const llama_vocab &
                }

                if (add_special && vocab.special_add_bos != 0 && output.size() >= 2 && output[1] == vocab.special_bos_id) {
-                    LLAMA_LOG_WARN(
-                        "%s: Added a BOS token to the prompt as specified by the model but the prompt "
-                        "also starts with a BOS token. So now the final prompt starts with 2 BOS tokens. "
-                        "Are you sure this is what you want?\n", __FUNCTION__);
+                    // LLAMA_LOG_WARN(
+                    //     "%s: Added a BOS token to the prompt as specified by the model but the prompt "
+                    //     "also starts with a BOS token. So now the final prompt starts with 2 BOS tokens. "
+                    //     "Are you sure this is what you want?\n", __FUNCTION__);
                }

                if (add_special && vocab.special_add_eos == 1) {