mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-18 23:49:46 +00:00
optimize pp
This commit is contained in:
parent
ae67caa2f7
commit
707f7b37bf
3 changed files with 25 additions and 1 deletions
|
|
@ -4465,7 +4465,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
|
|||
{
|
||||
skipdecodelater = true;
|
||||
//decode until nearly done, then snapshot and decode the last 64
|
||||
std::vector<std::vector<gpt_vocab::id>> parts = split_big_vector(embd,64);
|
||||
std::vector<std::vector<gpt_vocab::id>> parts = split_big_vector_in_two(embd,64);
|
||||
int temp_past = n_past;
|
||||
evalres = true;
|
||||
for(int p=0;p<parts.size();++p)
|
||||
|
|
@ -4477,6 +4477,11 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
|
|||
std::vector<gpt_vocab::id> chunk = parts[p];
|
||||
kcpp_embd_batch smallbatch = kcpp_embd_batch(chunk, temp_past, use_mrope, false);
|
||||
decode_status = llama_decode(llama_ctx_v4, smallbatch.batch);
|
||||
if(p==0 && decode_status==1)
|
||||
{
|
||||
skipdecodelater = false;
|
||||
break; //big pp failed
|
||||
}
|
||||
evalres = (evalres && (decode_status==0));
|
||||
temp_past += chunk.size();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -366,6 +366,23 @@ std::vector<std::vector<int>> split_big_vector(const std::vector<int>& big_arr,
|
|||
return small_arrs;
|
||||
}
|
||||
|
||||
std::vector<std::vector<int>> split_big_vector_in_two(const std::vector<int>& big_arr, size_t chunk_size)
|
||||
{
|
||||
std::vector<std::vector<int>> result;
|
||||
if (chunk_size == 0 || big_arr.empty())
|
||||
return result;
|
||||
|
||||
if (big_arr.size() <= chunk_size) {
|
||||
// Only one chunk (all elements)
|
||||
result.emplace_back(big_arr);
|
||||
return result;
|
||||
}
|
||||
size_t split_point = big_arr.size() - chunk_size;
|
||||
result.emplace_back(big_arr.begin(), big_arr.begin() + split_point); // First big chunk
|
||||
result.emplace_back(big_arr.begin() + split_point, big_arr.end()); // Last chunk (size <= chunk_size)
|
||||
return result;
|
||||
}
|
||||
|
||||
std::vector<float> resample_wav(const std::vector<float> & input, uint32_t input_rate, uint32_t output_rate) {
|
||||
if (input.empty() || input_rate == 0 || output_rate == 0)
|
||||
return {};
|
||||
|
|
|
|||
|
|
@ -62,6 +62,8 @@ std::string kcpp_base64_encode(const std::string &data);
|
|||
|
||||
std::string get_timestamp_str();
|
||||
std::vector<std::vector<int>> split_big_vector(const std::vector<int>& big_arr, size_t chunk_size);
|
||||
std::vector<std::vector<int>> split_big_vector_in_two(const std::vector<int>& big_arr, size_t chunk_size);
|
||||
|
||||
std::vector<float> resample_wav(const std::vector<float>& input, uint32_t input_rate, uint32_t output_rate);
|
||||
std::vector<float> mix_planar_stereo_to_mono(const float* audio, int T_audio);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue