added new binding fields for quant k and quant v

This commit is contained in:
Concedo 2024-06-03 14:35:59 +08:00
parent 039cc392d1
commit 10a1d628ad
4 changed files with 67 additions and 38 deletions

View file

@ -58,6 +58,8 @@ struct load_model_inputs
const float rope_freq_base = 10000.0f; const float rope_freq_base = 10000.0f;
const bool flash_attention = false; const bool flash_attention = false;
const float tensor_split[tensor_split_max]; const float tensor_split[tensor_split_max];
const int quant_k = 0;
const int quant_v = 0;
}; };
struct generation_inputs struct generation_inputs
{ {

View file

@ -1107,6 +1107,8 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
} }
llama_ctx_params.flash_attn = kcpp_params->flash_attn; llama_ctx_params.flash_attn = kcpp_params->flash_attn;
llama_ctx_params.type_k = (inputs.quant_k>1?GGML_TYPE_Q4_0:(inputs.quant_k==1?GGML_TYPE_Q8_0:GGML_TYPE_F16));
llama_ctx_params.type_v = (inputs.quant_v>1?GGML_TYPE_Q4_0:(inputs.quant_v==1?GGML_TYPE_Q8_0:GGML_TYPE_F16));
llama_ctx_v4 = llama_new_context_with_model(llamamodel, llama_ctx_params); llama_ctx_v4 = llama_new_context_with_model(llamamodel, llama_ctx_params);
if (llama_ctx_v4 == NULL) if (llama_ctx_v4 == NULL)

View file

@ -6856,6 +6856,7 @@ Current version: 143
function is_popup_open() function is_popup_open()
{ {
return !( return !(
document.getElementById("inputboxcontainer").classList.contains("hidden") &&
document.getElementById("saveloadcontainer").classList.contains("hidden") && document.getElementById("saveloadcontainer").classList.contains("hidden") &&
document.getElementById("newgamecontainer").classList.contains("hidden") && document.getElementById("newgamecontainer").classList.contains("hidden") &&
document.getElementById("yesnocontainer").classList.contains("hidden") && document.getElementById("yesnocontainer").classList.contains("hidden") &&
@ -6881,6 +6882,7 @@ Current version: 143
document.getElementById("newgamecontainer").classList.add("hidden"); document.getElementById("newgamecontainer").classList.add("hidden");
document.getElementById("yesnocontainer").classList.add("hidden"); document.getElementById("yesnocontainer").classList.add("hidden");
document.getElementById("settingscontainer").classList.add("hidden"); document.getElementById("settingscontainer").classList.add("hidden");
document.getElementById("inputboxcontainer").classList.add("hidden");
document.getElementById("msgboxcontainer").classList.add("hidden"); document.getElementById("msgboxcontainer").classList.add("hidden");
document.getElementById("memorycontainer").classList.add("hidden"); document.getElementById("memorycontainer").classList.add("hidden");
document.getElementById("workercontainer").classList.add("hidden"); document.getElementById("workercontainer").classList.add("hidden");
@ -9704,6 +9706,22 @@ Current version: 143
} }
} }
function handle_escape_button(event)
{
if(is_popup_open())
{
var isEscape = false;
if ("key" in event) {
isEscape = (event.key === "Escape" || event.key === "Esc");
} else {
isEscape = (event.keyCode === 27);
}
if (isEscape) {
hide_popups();
}
}
}
function handle_typing(event) { function handle_typing(event) {
var event = event || window.event; var event = event || window.event;
var charCode = event.keyCode || event.which; var charCode = event.keyCode || event.which;
@ -14072,7 +14090,7 @@ Current version: 143
} }
//setup wi tab //setup wi tab
backup_wi(); start_editing_wi();
update_wi(); update_wi();
populate_placeholder_tags(); populate_placeholder_tags();
@ -14192,7 +14210,7 @@ Current version: 143
} }
function toggle_wi_sk(idx) { function toggle_wi_sk(idx) {
var ce = current_wi[idx]; var ce = pending_wi_obj[idx];
ce.selective = !ce.selective; ce.selective = !ce.selective;
var tgt = document.getElementById("wiskt" + idx); var tgt = document.getElementById("wiskt" + idx);
var tgt2 = document.getElementById("wikeysec" + idx); var tgt2 = document.getElementById("wikeysec" + idx);
@ -14211,7 +14229,7 @@ Current version: 143
} }
function toggle_wi_ck(idx) { function toggle_wi_ck(idx) {
var ce = current_wi[idx]; var ce = pending_wi_obj[idx];
ce.constant = !ce.constant; ce.constant = !ce.constant;
var tgt = document.getElementById("wickt" + idx); var tgt = document.getElementById("wickt" + idx);
if (ce.constant) { if (ce.constant) {
@ -14225,29 +14243,29 @@ Current version: 143
function del_wi(idx) { function del_wi(idx) {
save_wi(); save_wi();
var ce = current_wi[idx]; var ce = pending_wi_obj[idx];
current_wi.splice(idx, 1); pending_wi_obj.splice(idx, 1);
update_wi(); update_wi();
} }
function up_wi(idx) { function up_wi(idx) {
save_wi(); save_wi();
var ce = current_wi[idx]; var ce = pending_wi_obj[idx];
if (idx > 0 && idx < current_wi.length) { if (idx > 0 && idx < pending_wi_obj.length) {
const temp = current_wi[idx - 1]; const temp = pending_wi_obj[idx - 1];
current_wi[idx - 1] = current_wi[idx]; pending_wi_obj[idx - 1] = pending_wi_obj[idx];
current_wi[idx] = temp; pending_wi_obj[idx] = temp;
} }
update_wi(); update_wi();
} }
function down_wi(idx) { function down_wi(idx) {
save_wi(); save_wi();
var ce = current_wi[idx]; var ce = pending_wi_obj[idx];
if (idx >= 0 && idx+1 < current_wi.length) { if (idx >= 0 && idx+1 < pending_wi_obj.length) {
const temp = current_wi[idx + 1]; const temp = pending_wi_obj[idx + 1];
current_wi[idx + 1] = current_wi[idx]; pending_wi_obj[idx + 1] = pending_wi_obj[idx];
current_wi[idx] = temp; pending_wi_obj[idx] = temp;
} }
update_wi(); update_wi();
} }
@ -14265,32 +14283,32 @@ Current version: 143
"constant": false, "constant": false,
"probability":100 "probability":100
}; };
current_wi.push(ne); pending_wi_obj.push(ne);
update_wi(); update_wi();
} }
function save_wi() { function save_wi() {
for (var i = 0; i < current_wi.length; ++i) { for (var i = 0; i < pending_wi_obj.length; ++i) {
current_wi[i].key = document.getElementById("wikey" + i).value; pending_wi_obj[i].key = document.getElementById("wikey" + i).value;
current_wi[i].keysecondary = document.getElementById("wikeysec" + i).value; pending_wi_obj[i].keysecondary = document.getElementById("wikeysec" + i).value;
current_wi[i].keyanti = document.getElementById("wikeyanti" + i).value; pending_wi_obj[i].keyanti = document.getElementById("wikeyanti" + i).value;
current_wi[i].content = document.getElementById("wival" + i).value; pending_wi_obj[i].content = document.getElementById("wival" + i).value;
let prb = document.getElementById("wirng" + i).value; let prb = document.getElementById("wirng" + i).value;
current_wi[i].probability = (prb?prb:100); pending_wi_obj[i].probability = (prb?prb:100);
} }
localsettings.case_sensitive_wi = (document.getElementById("case_sensitive_wi").checked?true:false); localsettings.case_sensitive_wi = (document.getElementById("case_sensitive_wi").checked?true:false);
wi_searchdepth = document.getElementById("wi_searchdepth").value; wi_searchdepth = document.getElementById("wi_searchdepth").value;
wi_insertlocation = document.getElementById("wi_insertlocation").value; wi_insertlocation = document.getElementById("wi_insertlocation").value;
} }
let backup_wi_obj = []; let pending_wi_obj = []; //only the pending copy is edited until committed
function revert_wi() function commit_wi_changes()
{ {
current_wi = JSON.parse(JSON.stringify(backup_wi_obj)); current_wi = JSON.parse(JSON.stringify(pending_wi_obj));
} }
function backup_wi() function start_editing_wi()
{ {
backup_wi_obj = JSON.parse(JSON.stringify(current_wi)); //in case we need to reset pending_wi_obj = JSON.parse(JSON.stringify(current_wi));
} }
function wi_quick_search() function wi_quick_search()
@ -14305,8 +14323,8 @@ Current version: 143
let wilist = document.getElementById("wilist"); let wilist = document.getElementById("wilist");
let qsval = document.getElementById("wiquicksearch").value; let qsval = document.getElementById("wiquicksearch").value;
let selectionhtml = `<table style="border-collapse: separate; border-spacing: 1.5pt;">`; let selectionhtml = `<table style="border-collapse: separate; border-spacing: 1.5pt;">`;
for (var i = 0; i < current_wi.length; ++i) { for (var i = 0; i < pending_wi_obj.length; ++i) {
var curr = current_wi[i]; var curr = pending_wi_obj[i];
var winame = escapeHtml(curr.key); var winame = escapeHtml(curr.key);
var witxt = escapeHtml(curr.content); var witxt = escapeHtml(curr.content);
var wisec = (curr.keysecondary?curr.keysecondary:""); var wisec = (curr.keysecondary?curr.keysecondary:"");
@ -14348,7 +14366,7 @@ Current version: 143
</tr> </tr>
`; `;
} }
if (current_wi.length == 0) { if (pending_wi_obj.length == 0) {
selectionhtml = "<div class=\"aidgpopuplistheader anotelabel\">No world info.<br>Click [+Add] to add a new entry.</div>" selectionhtml = "<div class=\"aidgpopuplistheader anotelabel\">No world info.<br>Click [+Add] to add a new entry.</div>"
} }
@ -15156,7 +15174,7 @@ Current version: 143
</head> </head>
<body id="outerbody" class=""> <body id="outerbody" class="" onkeydown="handle_escape_button(event)">
<div id="maincontainer" class="adaptivecontainer maincontainer"> <div id="maincontainer" class="adaptivecontainer maincontainer">
<div id="outerbodybg"></div> <div id="outerbodybg"></div>
@ -16460,8 +16478,8 @@ Current version: 143
</div> </div>
<div class="popupfooter"> <div class="popupfooter">
<button type="button" class="btn btn-primary" onclick="confirm_memory();save_wi();render_gametext();hide_popups()">OK</button> <button type="button" class="btn btn-primary" onclick="confirm_memory();save_wi();commit_wi_changes();render_gametext();hide_popups()">OK</button>
<button type="button" class="btn btn-primary" onclick="revert_wi();hide_popups()">Cancel</button> <button type="button" class="btn btn-primary" onclick="hide_popups();">Cancel</button>
</div> </div>
</div> </div>
</div> </div>

View file

@ -59,7 +59,9 @@ class load_model_inputs(ctypes.Structure):
("rope_freq_scale", ctypes.c_float), ("rope_freq_scale", ctypes.c_float),
("rope_freq_base", ctypes.c_float), ("rope_freq_base", ctypes.c_float),
("flash_attention", ctypes.c_bool), ("flash_attention", ctypes.c_bool),
("tensor_split", ctypes.c_float * tensor_split_max)] ("tensor_split", ctypes.c_float * tensor_split_max),
("quant_k", ctypes.c_int),
("quant_v", ctypes.c_int)]
class generation_inputs(ctypes.Structure): class generation_inputs(ctypes.Structure):
_fields_ = [("seed", ctypes.c_int), _fields_ = [("seed", ctypes.c_int),
@ -294,11 +296,14 @@ def init_library():
os.add_dll_directory(abs_path) os.add_dll_directory(abs_path)
os.add_dll_directory(os.getcwd()) os.add_dll_directory(os.getcwd())
if libname == lib_cublas and "CUDA_PATH" in os.environ: if libname == lib_cublas and "CUDA_PATH" in os.environ:
os.add_dll_directory(os.path.join(os.environ["CUDA_PATH"], "bin")) newpath = os.path.join(os.environ["CUDA_PATH"], "bin")
if os.path.exists(newpath):
os.add_dll_directory(newpath)
if libname == lib_hipblas and "HIP_PATH" in os.environ: if libname == lib_hipblas and "HIP_PATH" in os.environ:
os.add_dll_directory(os.path.join(os.environ["HIP_PATH"], "bin")) newpath = os.path.join(os.environ["HIP_PATH"], "bin")
if args.debugmode == 1: if os.path.exists(newpath):
print(f"HIP/ROCm SDK at {os.environ['HIP_PATH']} included in .DLL load path") os.add_dll_directory(newpath)
handle = ctypes.CDLL(os.path.join(dir_path, libname)) handle = ctypes.CDLL(os.path.join(dir_path, libname))
handle.load_model.argtypes = [load_model_inputs] handle.load_model.argtypes = [load_model_inputs]
@ -413,6 +418,8 @@ def load_model(model_filename):
inputs.use_smartcontext = args.smartcontext inputs.use_smartcontext = args.smartcontext
inputs.use_contextshift = (0 if args.noshift else 1) inputs.use_contextshift = (0 if args.noshift else 1)
inputs.flash_attention = args.flashattention inputs.flash_attention = args.flashattention
inputs.quant_k = 0
inputs.quant_v = 0
inputs.blasbatchsize = args.blasbatchsize inputs.blasbatchsize = args.blasbatchsize
inputs.forceversion = args.forceversion inputs.forceversion = args.forceversion
inputs.gpulayers = args.gpulayers inputs.gpulayers = args.gpulayers