From 707f7b37bfc673bb0cfc24d4485ee1610f365413 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Tue, 3 Mar 2026 21:02:51 +0800 Subject: [PATCH] optimize pp --- gpttype_adapter.cpp | 7 ++++++- otherarch/utils.cpp | 17 +++++++++++++++++ otherarch/utils.h | 2 ++ 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index 3d131a8ce..2c4071c4c 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -4465,7 +4465,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs) { skipdecodelater = true; //decode until nearly done, then snapshot and decode the last 64 - std::vector> parts = split_big_vector(embd,64); + std::vector> parts = split_big_vector_in_two(embd,64); int temp_past = n_past; evalres = true; for(int p=0;p chunk = parts[p]; kcpp_embd_batch smallbatch = kcpp_embd_batch(chunk, temp_past, use_mrope, false); decode_status = llama_decode(llama_ctx_v4, smallbatch.batch); + if(p==0 && decode_status==1) + { + skipdecodelater = false; + break; //big pp failed + } evalres = (evalres && (decode_status==0)); temp_past += chunk.size(); } diff --git a/otherarch/utils.cpp b/otherarch/utils.cpp index 1834a631d..372d1c26e 100644 --- a/otherarch/utils.cpp +++ b/otherarch/utils.cpp @@ -366,6 +366,23 @@ std::vector> split_big_vector(const std::vector& big_arr, return small_arrs; } +std::vector> split_big_vector_in_two(const std::vector& big_arr, size_t chunk_size) +{ + std::vector> result; + if (chunk_size == 0 || big_arr.empty()) + return result; + + if (big_arr.size() <= chunk_size) { + // Only one chunk (all elements) + result.emplace_back(big_arr); + return result; + } + size_t split_point = big_arr.size() - chunk_size; + result.emplace_back(big_arr.begin(), big_arr.begin() + split_point); // First big chunk + result.emplace_back(big_arr.begin() + split_point, big_arr.end()); // Last chunk (size <= chunk_size) + return result; +} + std::vector resample_wav(const std::vector & input, uint32_t input_rate, uint32_t output_rate) { if (input.empty() || input_rate == 0 || output_rate == 0) return {}; diff --git a/otherarch/utils.h b/otherarch/utils.h index b5137c08e..4482cc903 100644 --- a/otherarch/utils.h +++ b/otherarch/utils.h @@ -62,6 +62,8 @@ std::string kcpp_base64_encode(const std::string &data); std::string get_timestamp_str(); std::vector> split_big_vector(const std::vector& big_arr, size_t chunk_size); +std::vector> split_big_vector_in_two(const std::vector& big_arr, size_t chunk_size); + std::vector resample_wav(const std::vector& input, uint32_t input_rate, uint32_t output_rate); std::vector mix_planar_stereo_to_mono(const float* audio, int T_audio);