diff --git a/expose.h b/expose.h index e44fb22bb..b12374605 100644 --- a/expose.h +++ b/expose.h @@ -58,6 +58,8 @@ struct load_model_inputs const float rope_freq_base = 10000.0f; const bool flash_attention = false; const float tensor_split[tensor_split_max]; + const int quant_k = 0; + const int quant_v = 0; }; struct generation_inputs { diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index fdfbb4903..97aabe545 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -1107,6 +1107,8 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in } llama_ctx_params.flash_attn = kcpp_params->flash_attn; + llama_ctx_params.type_k = (inputs.quant_k>1?GGML_TYPE_Q4_0:(inputs.quant_k==1?GGML_TYPE_Q8_0:GGML_TYPE_F16)); + llama_ctx_params.type_v = (inputs.quant_v>1?GGML_TYPE_Q4_0:(inputs.quant_v==1?GGML_TYPE_Q8_0:GGML_TYPE_F16)); llama_ctx_v4 = llama_new_context_with_model(llamamodel, llama_ctx_params); if (llama_ctx_v4 == NULL) diff --git a/klite.embd b/klite.embd index 28f060957..274ee40e3 100644 --- a/klite.embd +++ b/klite.embd @@ -6856,6 +6856,7 @@ Current version: 143 function is_popup_open() { return !( + document.getElementById("inputboxcontainer").classList.contains("hidden") && document.getElementById("saveloadcontainer").classList.contains("hidden") && document.getElementById("newgamecontainer").classList.contains("hidden") && document.getElementById("yesnocontainer").classList.contains("hidden") && @@ -6881,6 +6882,7 @@ Current version: 143 document.getElementById("newgamecontainer").classList.add("hidden"); document.getElementById("yesnocontainer").classList.add("hidden"); document.getElementById("settingscontainer").classList.add("hidden"); + document.getElementById("inputboxcontainer").classList.add("hidden"); document.getElementById("msgboxcontainer").classList.add("hidden"); document.getElementById("memorycontainer").classList.add("hidden"); document.getElementById("workercontainer").classList.add("hidden"); @@ -9704,6 +9706,22 @@ Current version: 143 } } + function handle_escape_button(event) + { + if(is_popup_open()) + { + var isEscape = false; + if ("key" in event) { + isEscape = (event.key === "Escape" || event.key === "Esc"); + } else { + isEscape = (event.keyCode === 27); + } + if (isEscape) { + hide_popups(); + } + } + } + function handle_typing(event) { var event = event || window.event; var charCode = event.keyCode || event.which; @@ -14072,7 +14090,7 @@ Current version: 143 } //setup wi tab - backup_wi(); + start_editing_wi(); update_wi(); populate_placeholder_tags(); @@ -14192,7 +14210,7 @@ Current version: 143 } function toggle_wi_sk(idx) { - var ce = current_wi[idx]; + var ce = pending_wi_obj[idx]; ce.selective = !ce.selective; var tgt = document.getElementById("wiskt" + idx); var tgt2 = document.getElementById("wikeysec" + idx); @@ -14211,7 +14229,7 @@ Current version: 143 } function toggle_wi_ck(idx) { - var ce = current_wi[idx]; + var ce = pending_wi_obj[idx]; ce.constant = !ce.constant; var tgt = document.getElementById("wickt" + idx); if (ce.constant) { @@ -14225,29 +14243,29 @@ Current version: 143 function del_wi(idx) { save_wi(); - var ce = current_wi[idx]; - current_wi.splice(idx, 1); + var ce = pending_wi_obj[idx]; + pending_wi_obj.splice(idx, 1); update_wi(); } function up_wi(idx) { save_wi(); - var ce = current_wi[idx]; - if (idx > 0 && idx < current_wi.length) { - const temp = current_wi[idx - 1]; - current_wi[idx - 1] = current_wi[idx]; - current_wi[idx] = temp; + var ce = pending_wi_obj[idx]; + if (idx > 0 && idx < pending_wi_obj.length) { + const temp = pending_wi_obj[idx - 1]; + pending_wi_obj[idx - 1] = pending_wi_obj[idx]; + pending_wi_obj[idx] = temp; } update_wi(); } function down_wi(idx) { save_wi(); - var ce = current_wi[idx]; - if (idx >= 0 && idx+1 < current_wi.length) { - const temp = current_wi[idx + 1]; - current_wi[idx + 1] = current_wi[idx]; - current_wi[idx] = temp; + var ce = pending_wi_obj[idx]; + if (idx >= 0 && idx+1 < pending_wi_obj.length) { + const temp = pending_wi_obj[idx + 1]; + pending_wi_obj[idx + 1] = pending_wi_obj[idx]; + pending_wi_obj[idx] = temp; } update_wi(); } @@ -14265,32 +14283,32 @@ Current version: 143 "constant": false, "probability":100 }; - current_wi.push(ne); + pending_wi_obj.push(ne); update_wi(); } function save_wi() { - for (var i = 0; i < current_wi.length; ++i) { - current_wi[i].key = document.getElementById("wikey" + i).value; - current_wi[i].keysecondary = document.getElementById("wikeysec" + i).value; - current_wi[i].keyanti = document.getElementById("wikeyanti" + i).value; - current_wi[i].content = document.getElementById("wival" + i).value; + for (var i = 0; i < pending_wi_obj.length; ++i) { + pending_wi_obj[i].key = document.getElementById("wikey" + i).value; + pending_wi_obj[i].keysecondary = document.getElementById("wikeysec" + i).value; + pending_wi_obj[i].keyanti = document.getElementById("wikeyanti" + i).value; + pending_wi_obj[i].content = document.getElementById("wival" + i).value; let prb = document.getElementById("wirng" + i).value; - current_wi[i].probability = (prb?prb:100); + pending_wi_obj[i].probability = (prb?prb:100); } localsettings.case_sensitive_wi = (document.getElementById("case_sensitive_wi").checked?true:false); wi_searchdepth = document.getElementById("wi_searchdepth").value; wi_insertlocation = document.getElementById("wi_insertlocation").value; } - let backup_wi_obj = []; - function revert_wi() + let pending_wi_obj = []; //only the pending copy is edited until committed + function commit_wi_changes() { - current_wi = JSON.parse(JSON.stringify(backup_wi_obj)); + current_wi = JSON.parse(JSON.stringify(pending_wi_obj)); } - function backup_wi() + function start_editing_wi() { - backup_wi_obj = JSON.parse(JSON.stringify(current_wi)); //in case we need to reset + pending_wi_obj = JSON.parse(JSON.stringify(current_wi)); } function wi_quick_search() @@ -14305,8 +14323,8 @@ Current version: 143 let wilist = document.getElementById("wilist"); let qsval = document.getElementById("wiquicksearch").value; let selectionhtml = ``; - for (var i = 0; i < current_wi.length; ++i) { - var curr = current_wi[i]; + for (var i = 0; i < pending_wi_obj.length; ++i) { + var curr = pending_wi_obj[i]; var winame = escapeHtml(curr.key); var witxt = escapeHtml(curr.content); var wisec = (curr.keysecondary?curr.keysecondary:""); @@ -14348,7 +14366,7 @@ Current version: 143 `; } - if (current_wi.length == 0) { + if (pending_wi_obj.length == 0) { selectionhtml = "
No world info.
Click [+Add] to add a new entry.
" } @@ -15156,7 +15174,7 @@ Current version: 143 - +
@@ -16460,8 +16478,8 @@ Current version: 143
- - + +
diff --git a/koboldcpp.py b/koboldcpp.py index 14855e2a2..10f4ea32d 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -59,7 +59,9 @@ class load_model_inputs(ctypes.Structure): ("rope_freq_scale", ctypes.c_float), ("rope_freq_base", ctypes.c_float), ("flash_attention", ctypes.c_bool), - ("tensor_split", ctypes.c_float * tensor_split_max)] + ("tensor_split", ctypes.c_float * tensor_split_max), + ("quant_k", ctypes.c_int), + ("quant_v", ctypes.c_int)] class generation_inputs(ctypes.Structure): _fields_ = [("seed", ctypes.c_int), @@ -294,11 +296,14 @@ def init_library(): os.add_dll_directory(abs_path) os.add_dll_directory(os.getcwd()) if libname == lib_cublas and "CUDA_PATH" in os.environ: - os.add_dll_directory(os.path.join(os.environ["CUDA_PATH"], "bin")) + newpath = os.path.join(os.environ["CUDA_PATH"], "bin") + if os.path.exists(newpath): + os.add_dll_directory(newpath) if libname == lib_hipblas and "HIP_PATH" in os.environ: - os.add_dll_directory(os.path.join(os.environ["HIP_PATH"], "bin")) - if args.debugmode == 1: - print(f"HIP/ROCm SDK at {os.environ['HIP_PATH']} included in .DLL load path") + newpath = os.path.join(os.environ["HIP_PATH"], "bin") + if os.path.exists(newpath): + os.add_dll_directory(newpath) + handle = ctypes.CDLL(os.path.join(dir_path, libname)) handle.load_model.argtypes = [load_model_inputs] @@ -413,6 +418,8 @@ def load_model(model_filename): inputs.use_smartcontext = args.smartcontext inputs.use_contextshift = (0 if args.noshift else 1) inputs.flash_attention = args.flashattention + inputs.quant_k = 0 + inputs.quant_v = 0 inputs.blasbatchsize = args.blasbatchsize inputs.forceversion = args.forceversion inputs.gpulayers = args.gpulayers