added new binding fields for quant k and quant v

This commit is contained in:
Concedo 2024-06-03 14:35:59 +08:00
parent 039cc392d1
commit 10a1d628ad
4 changed files with 67 additions and 38 deletions

View file

@ -58,6 +58,8 @@ struct load_model_inputs
const float rope_freq_base = 10000.0f;
const bool flash_attention = false;
const float tensor_split[tensor_split_max];
const int quant_k = 0;
const int quant_v = 0;
};
struct generation_inputs
{

View file

@ -1107,6 +1107,8 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
}
llama_ctx_params.flash_attn = kcpp_params->flash_attn;
llama_ctx_params.type_k = (inputs.quant_k>1?GGML_TYPE_Q4_0:(inputs.quant_k==1?GGML_TYPE_Q8_0:GGML_TYPE_F16));
llama_ctx_params.type_v = (inputs.quant_v>1?GGML_TYPE_Q4_0:(inputs.quant_v==1?GGML_TYPE_Q8_0:GGML_TYPE_F16));
llama_ctx_v4 = llama_new_context_with_model(llamamodel, llama_ctx_params);
if (llama_ctx_v4 == NULL)

View file

@ -6856,6 +6856,7 @@ Current version: 143
function is_popup_open()
{
return !(
document.getElementById("inputboxcontainer").classList.contains("hidden") &&
document.getElementById("saveloadcontainer").classList.contains("hidden") &&
document.getElementById("newgamecontainer").classList.contains("hidden") &&
document.getElementById("yesnocontainer").classList.contains("hidden") &&
@ -6881,6 +6882,7 @@ Current version: 143
document.getElementById("newgamecontainer").classList.add("hidden");
document.getElementById("yesnocontainer").classList.add("hidden");
document.getElementById("settingscontainer").classList.add("hidden");
document.getElementById("inputboxcontainer").classList.add("hidden");
document.getElementById("msgboxcontainer").classList.add("hidden");
document.getElementById("memorycontainer").classList.add("hidden");
document.getElementById("workercontainer").classList.add("hidden");
@ -9704,6 +9706,22 @@ Current version: 143
}
}
function handle_escape_button(event)
{
if(is_popup_open())
{
var isEscape = false;
if ("key" in event) {
isEscape = (event.key === "Escape" || event.key === "Esc");
} else {
isEscape = (event.keyCode === 27);
}
if (isEscape) {
hide_popups();
}
}
}
function handle_typing(event) {
var event = event || window.event;
var charCode = event.keyCode || event.which;
@ -14072,7 +14090,7 @@ Current version: 143
}
//setup wi tab
backup_wi();
start_editing_wi();
update_wi();
populate_placeholder_tags();
@ -14192,7 +14210,7 @@ Current version: 143
}
function toggle_wi_sk(idx) {
var ce = current_wi[idx];
var ce = pending_wi_obj[idx];
ce.selective = !ce.selective;
var tgt = document.getElementById("wiskt" + idx);
var tgt2 = document.getElementById("wikeysec" + idx);
@ -14211,7 +14229,7 @@ Current version: 143
}
function toggle_wi_ck(idx) {
var ce = current_wi[idx];
var ce = pending_wi_obj[idx];
ce.constant = !ce.constant;
var tgt = document.getElementById("wickt" + idx);
if (ce.constant) {
@ -14225,29 +14243,29 @@ Current version: 143
function del_wi(idx) {
save_wi();
var ce = current_wi[idx];
current_wi.splice(idx, 1);
var ce = pending_wi_obj[idx];
pending_wi_obj.splice(idx, 1);
update_wi();
}
function up_wi(idx) {
save_wi();
var ce = current_wi[idx];
if (idx > 0 && idx < current_wi.length) {
const temp = current_wi[idx - 1];
current_wi[idx - 1] = current_wi[idx];
current_wi[idx] = temp;
var ce = pending_wi_obj[idx];
if (idx > 0 && idx < pending_wi_obj.length) {
const temp = pending_wi_obj[idx - 1];
pending_wi_obj[idx - 1] = pending_wi_obj[idx];
pending_wi_obj[idx] = temp;
}
update_wi();
}
function down_wi(idx) {
save_wi();
var ce = current_wi[idx];
if (idx >= 0 && idx+1 < current_wi.length) {
const temp = current_wi[idx + 1];
current_wi[idx + 1] = current_wi[idx];
current_wi[idx] = temp;
var ce = pending_wi_obj[idx];
if (idx >= 0 && idx+1 < pending_wi_obj.length) {
const temp = pending_wi_obj[idx + 1];
pending_wi_obj[idx + 1] = pending_wi_obj[idx];
pending_wi_obj[idx] = temp;
}
update_wi();
}
@ -14265,32 +14283,32 @@ Current version: 143
"constant": false,
"probability":100
};
current_wi.push(ne);
pending_wi_obj.push(ne);
update_wi();
}
function save_wi() {
for (var i = 0; i < current_wi.length; ++i) {
current_wi[i].key = document.getElementById("wikey" + i).value;
current_wi[i].keysecondary = document.getElementById("wikeysec" + i).value;
current_wi[i].keyanti = document.getElementById("wikeyanti" + i).value;
current_wi[i].content = document.getElementById("wival" + i).value;
for (var i = 0; i < pending_wi_obj.length; ++i) {
pending_wi_obj[i].key = document.getElementById("wikey" + i).value;
pending_wi_obj[i].keysecondary = document.getElementById("wikeysec" + i).value;
pending_wi_obj[i].keyanti = document.getElementById("wikeyanti" + i).value;
pending_wi_obj[i].content = document.getElementById("wival" + i).value;
let prb = document.getElementById("wirng" + i).value;
current_wi[i].probability = (prb?prb:100);
pending_wi_obj[i].probability = (prb?prb:100);
}
localsettings.case_sensitive_wi = (document.getElementById("case_sensitive_wi").checked?true:false);
wi_searchdepth = document.getElementById("wi_searchdepth").value;
wi_insertlocation = document.getElementById("wi_insertlocation").value;
}
let backup_wi_obj = [];
function revert_wi()
let pending_wi_obj = []; //only the pending copy is edited until committed
function commit_wi_changes()
{
current_wi = JSON.parse(JSON.stringify(backup_wi_obj));
current_wi = JSON.parse(JSON.stringify(pending_wi_obj));
}
function backup_wi()
function start_editing_wi()
{
backup_wi_obj = JSON.parse(JSON.stringify(current_wi)); //in case we need to reset
pending_wi_obj = JSON.parse(JSON.stringify(current_wi));
}
function wi_quick_search()
@ -14305,8 +14323,8 @@ Current version: 143
let wilist = document.getElementById("wilist");
let qsval = document.getElementById("wiquicksearch").value;
let selectionhtml = `<table style="border-collapse: separate; border-spacing: 1.5pt;">`;
for (var i = 0; i < current_wi.length; ++i) {
var curr = current_wi[i];
for (var i = 0; i < pending_wi_obj.length; ++i) {
var curr = pending_wi_obj[i];
var winame = escapeHtml(curr.key);
var witxt = escapeHtml(curr.content);
var wisec = (curr.keysecondary?curr.keysecondary:"");
@ -14348,7 +14366,7 @@ Current version: 143
</tr>
`;
}
if (current_wi.length == 0) {
if (pending_wi_obj.length == 0) {
selectionhtml = "<div class=\"aidgpopuplistheader anotelabel\">No world info.<br>Click [+Add] to add a new entry.</div>"
}
@ -15156,7 +15174,7 @@ Current version: 143
</head>
<body id="outerbody" class="">
<body id="outerbody" class="" onkeydown="handle_escape_button(event)">
<div id="maincontainer" class="adaptivecontainer maincontainer">
<div id="outerbodybg"></div>
@ -16460,8 +16478,8 @@ Current version: 143
</div>
<div class="popupfooter">
<button type="button" class="btn btn-primary" onclick="confirm_memory();save_wi();render_gametext();hide_popups()">OK</button>
<button type="button" class="btn btn-primary" onclick="revert_wi();hide_popups()">Cancel</button>
<button type="button" class="btn btn-primary" onclick="confirm_memory();save_wi();commit_wi_changes();render_gametext();hide_popups()">OK</button>
<button type="button" class="btn btn-primary" onclick="hide_popups();">Cancel</button>
</div>
</div>
</div>

View file

@ -59,7 +59,9 @@ class load_model_inputs(ctypes.Structure):
("rope_freq_scale", ctypes.c_float),
("rope_freq_base", ctypes.c_float),
("flash_attention", ctypes.c_bool),
("tensor_split", ctypes.c_float * tensor_split_max)]
("tensor_split", ctypes.c_float * tensor_split_max),
("quant_k", ctypes.c_int),
("quant_v", ctypes.c_int)]
class generation_inputs(ctypes.Structure):
_fields_ = [("seed", ctypes.c_int),
@ -294,11 +296,14 @@ def init_library():
os.add_dll_directory(abs_path)
os.add_dll_directory(os.getcwd())
if libname == lib_cublas and "CUDA_PATH" in os.environ:
os.add_dll_directory(os.path.join(os.environ["CUDA_PATH"], "bin"))
newpath = os.path.join(os.environ["CUDA_PATH"], "bin")
if os.path.exists(newpath):
os.add_dll_directory(newpath)
if libname == lib_hipblas and "HIP_PATH" in os.environ:
os.add_dll_directory(os.path.join(os.environ["HIP_PATH"], "bin"))
if args.debugmode == 1:
print(f"HIP/ROCm SDK at {os.environ['HIP_PATH']} included in .DLL load path")
newpath = os.path.join(os.environ["HIP_PATH"], "bin")
if os.path.exists(newpath):
os.add_dll_directory(newpath)
handle = ctypes.CDLL(os.path.join(dir_path, libname))
handle.load_model.argtypes = [load_model_inputs]
@ -413,6 +418,8 @@ def load_model(model_filename):
inputs.use_smartcontext = args.smartcontext
inputs.use_contextshift = (0 if args.noshift else 1)
inputs.flash_attention = args.flashattention
inputs.quant_k = 0
inputs.quant_v = 0
inputs.blasbatchsize = args.blasbatchsize
inputs.forceversion = args.forceversion
inputs.gpulayers = args.gpulayers