mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-22 03:10:03 +00:00
testing removed an assert
This commit is contained in:
parent
262437f393
commit
87c8e2131b
3 changed files with 7 additions and 3 deletions
|
|
@ -149,6 +149,7 @@ static ggml_cuda_device_info ggml_cuda_init() {
|
|||
//#else
|
||||
// GGML_LOG_INFO("%s: GGML_CUDA_FORCE_CUBLAS: no\n", __func__);
|
||||
//#endif // GGML_CUDA_FORCE_CUBLAS
|
||||
GGML_LOG_INFO("Initializing CUDA, please wait, this might take a while for first run...\n", __func__, info.device_count);
|
||||
GGML_LOG_INFO("%s: found %d " GGML_CUDA_NAME " devices:\n", __func__, info.device_count);
|
||||
for (int id = 0; id < info.device_count; ++id) {
|
||||
int device_vmm = 0;
|
||||
|
|
|
|||
|
|
@ -315,7 +315,7 @@ void ggml_cuda_flash_attn_ext_vec_f16_case(ggml_backend_cuda_context & ctx, ggml
|
|||
const ggml_tensor * V = dst->src[2];
|
||||
|
||||
const int32_t precision = KQV->op_params[3];
|
||||
GGML_ASSERT(precision == GGML_PREC_DEFAULT);
|
||||
GGML_ASSERT_CONTINUE(precision == GGML_PREC_DEFAULT);
|
||||
|
||||
GGML_ASSERT(K->type == type_K);
|
||||
GGML_ASSERT(V->type == type_V);
|
||||
|
|
|
|||
|
|
@ -12648,14 +12648,14 @@ Current version indicated by LITEVER below.
|
|||
//memory is allowed to be up to 0.8 times of ctx allowance, anote up to 0.6 times
|
||||
let max_mem_len = Math.floor(max_allowed_characters*0.8);
|
||||
let max_anote_len = Math.floor(max_allowed_characters*0.6);
|
||||
let max_wi_len = Math.floor(max_allowed_characters*0.7);
|
||||
let max_wi_len = Math.floor(max_allowed_characters*0.5);
|
||||
let appendedsysprompt = "";
|
||||
if(localsettings.opmode==4 && localsettings.instruct_sysprompt!="")
|
||||
{
|
||||
max_mem_len = Math.floor(max_allowed_characters*0.7);
|
||||
appendedsysprompt = get_instruct_systag(false) + localsettings.instruct_sysprompt + "\n";
|
||||
}
|
||||
let truncated_memory = appendedsysprompt + substring_to_boundary(current_memory, max_mem_len);
|
||||
let truncated_memory = substring_to_boundary(current_memory, max_mem_len);
|
||||
if (truncated_memory != null && truncated_memory != "") {
|
||||
if(newlineaftermemory)
|
||||
{
|
||||
|
|
@ -12786,12 +12786,15 @@ Current version indicated by LITEVER below.
|
|||
if(wi_insertlocation>0)
|
||||
{
|
||||
truncated_anote = wistr + truncated_anote;
|
||||
truncated_anote = substring_to_boundary(truncated_anote, max_anote_len);
|
||||
}
|
||||
else
|
||||
{
|
||||
truncated_memory += wistr;
|
||||
}
|
||||
|
||||
truncated_memory = appendedsysprompt + substring_to_boundary(truncated_memory, max_mem_len);
|
||||
|
||||
//now we resize the context such that the memory and authors note can fit inside
|
||||
truncated_context = substring_to_boundary(truncated_context, max_allowed_characters);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue