optimize pp

This commit is contained in:
Concedo 2026-03-03 21:02:51 +08:00
parent ae67caa2f7
commit 707f7b37bf
3 changed files with 25 additions and 1 deletions

View file

@ -4465,7 +4465,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
{
skipdecodelater = true;
//decode until nearly done, then snapshot and decode the last 64
std::vector<std::vector<gpt_vocab::id>> parts = split_big_vector(embd,64);
std::vector<std::vector<gpt_vocab::id>> parts = split_big_vector_in_two(embd,64);
int temp_past = n_past;
evalres = true;
for(int p=0;p<parts.size();++p)
@ -4477,6 +4477,11 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
std::vector<gpt_vocab::id> chunk = parts[p];
kcpp_embd_batch smallbatch = kcpp_embd_batch(chunk, temp_past, use_mrope, false);
decode_status = llama_decode(llama_ctx_v4, smallbatch.batch);
if(p==0 && decode_status==1)
{
skipdecodelater = false;
break; //big pp failed
}
evalres = (evalres && (decode_status==0));
temp_past += chunk.size();
}

View file

@ -366,6 +366,23 @@ std::vector<std::vector<int>> split_big_vector(const std::vector<int>& big_arr,
return small_arrs;
}
std::vector<std::vector<int>> split_big_vector_in_two(const std::vector<int>& big_arr, size_t chunk_size)
{
std::vector<std::vector<int>> result;
if (chunk_size == 0 || big_arr.empty())
return result;
if (big_arr.size() <= chunk_size) {
// Only one chunk (all elements)
result.emplace_back(big_arr);
return result;
}
size_t split_point = big_arr.size() - chunk_size;
result.emplace_back(big_arr.begin(), big_arr.begin() + split_point); // First big chunk
result.emplace_back(big_arr.begin() + split_point, big_arr.end()); // Last chunk (size <= chunk_size)
return result;
}
std::vector<float> resample_wav(const std::vector<float> & input, uint32_t input_rate, uint32_t output_rate) {
if (input.empty() || input_rate == 0 || output_rate == 0)
return {};

View file

@ -62,6 +62,8 @@ std::string kcpp_base64_encode(const std::string &data);
std::string get_timestamp_str();
std::vector<std::vector<int>> split_big_vector(const std::vector<int>& big_arr, size_t chunk_size);
std::vector<std::vector<int>> split_big_vector_in_two(const std::vector<int>& big_arr, size_t chunk_size);
std::vector<float> resample_wav(const std::vector<float>& input, uint32_t input_rate, uint32_t output_rate);
std::vector<float> mix_planar_stereo_to_mono(const float* audio, int T_audio);