diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index f5e4b3c3f..8fa3349d9 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -498,7 +498,7 @@ void sample_top_a(llama_token_data_array * candidates, float a, size_t min_keep) } void sample_dry(int n_ctx, int penalty_range, float penalty_multiplier, float penalty_base, int allowed_length, const std::unordered_multimap>& restart_sequences, llama_token_data_array * candidates) { - if (penalty_multiplier == 0.0f || penalty_base == 0.0f) { + if (penalty_multiplier <= 0.0f || penalty_base <= 0.0f) { return; } if (penalty_range <= 0) { @@ -1352,7 +1352,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in printf("CUBLAS: Set main device to %d\n",cu_parseinfo_maindevice); } ggml_cuda_set_mul_mat_q(inputs.use_mmq); - if(file_format_meta.model_architecture == GGUFArch::ARCH_QWEN2 && kcpp_params->flash_attn) + if(file_format_meta.model_architecture == GGUFArch::ARCH_QWEN2 && !kcpp_params->flash_attn) { printf("CUBLAS: Warning, you are running Qwen2 without Flash Attention and may observe incoherent output.\n"); } @@ -2837,7 +2837,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs) float pt2 = (time2*1000.0/(realnpredict==0?1:realnpredict)); float ts2 = (1000.0/pt2); float tokens_per_second = (realnpredict == 0 ? 0 : realnpredict / (time1 + time2)); - printf("\nCtxLimit: %d/%d, Process:%.2fs (%.1fms/T = %.2fT/s), Generate:%.2fs (%.1fms/T = %.2fT/s), Total:%.2fs (%.2fT/s)",(int)current_context_tokens.size(),(int)nctx, time1, pt1, ts1, time2, pt2, ts2, (time1 + time2), tokens_per_second); + printf("\nCtxLimit:%d/%d, Amt:%d/%d, Process:%.2fs (%.1fms/T = %.2fT/s), Generate:%.2fs (%.1fms/T = %.2fT/s), Total:%.2fs (%.2fT/s)",(int)current_context_tokens.size(),(int)nctx, realnpredict, kcpp_params->n_predict, time1, pt1, ts1, time2, pt2, ts2, (time1 + time2), tokens_per_second); fflush(stdout); output.status = 1; output.stopreason = last_stop_reason; diff --git a/kcpp_docs.embd b/kcpp_docs.embd index 1f402feca..cee36fb6a 100644 --- a/kcpp_docs.embd +++ b/kcpp_docs.embd @@ -225,6 +225,28 @@ "3105": 3.2 }, }, + "dry_multiplier": { + "description": "KoboldCpp ONLY. DRY multiplier value, 0 to disable.", + "exclusiveMinimum": 0, + "type": "number" + }, + "dry_base": { + "description": "KoboldCpp ONLY. DRY base value.", + "exclusiveMinimum": 0, + "type": "number" + }, + "dry_allowed_length": { + "description": "KoboldCpp ONLY. DRY allowed length value.", + "exclusiveMinimum": 0, + "type": "number" + }, + "dry_sequence_breakers": { + "description": "An array of string sequence breakers for DRY.", + "items": { + "type": "string" + }, + "type": "array" + }, }, "required": [ "prompt" diff --git a/klite.embd b/klite.embd index 07b43d8a3..776974119 100644 --- a/klite.embd +++ b/klite.embd @@ -12,7 +12,7 @@ Current version indicated by LITEVER below. -->