diff --git a/ggml.c b/ggml.c index 436f8806c..4fccde381 100644 --- a/ggml.c +++ b/ggml.c @@ -16367,7 +16367,11 @@ static thread_ret_t ggml_graph_compute_thread(void * data) { // wait for other threads to finish const int last = node_n; do { + #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_METAL) + //apple does nothing + #else sched_yield(); + #endif node_n = atomic_load(&state->shared->node_n); } while (node_n == last); } diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index e304f81ca..8dcb852ba 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -367,7 +367,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in else { //approximate NTK aware ctx - rope_freq_base = (params.n_ctx <= 4096 ? 40880.0f : 82684.0f); + rope_freq_base = (params.n_ctx <= 3072 ? 26000.0f : (params.n_ctx <= 4096 ? 32000.0f : (params.n_ctx <= 6144 ? 54000.0f : 82684.0f))); } printf("Using automatic RoPE scaling (scale:%.3f, base:%.1f)\n",rope_freq_scale,rope_freq_base); diff --git a/koboldcpp.py b/koboldcpp.py index 926be06b6..c396d0770 100755 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -248,7 +248,7 @@ def generate(prompt,max_length=20, max_context_length=512, temperature=0.8, top_ inputs.sampler_order[i] = sampler inputs.sampler_len = len(sampler_order) global showsamplerwarning - if showsamplerwarning and inputs.sampler_len>0 and (inputs.sampler_order[0]!=6 or inputs.sampler_order[inputs.sampler_len-1]!=5): + if showsamplerwarning and inputs.mirostat==0 and inputs.sampler_len>0 and (inputs.sampler_order[0]!=6 or inputs.sampler_order[inputs.sampler_len-1]!=5): print("\n(Note: Sub-optimal sampler_order detected. You may have reduced quality. Recommended sampler values are [6,0,1,3,4,2,5]. This message will only show once per session.)") showsamplerwarning = False except TypeError as e: