Merge commit 'c3a2624339' into concedo_experimental

This commit is contained in:
Concedo 2025-05-24 22:56:02 +08:00
commit 779a41f23e
3 changed files with 6 additions and 4 deletions

View file

@ -212,6 +212,7 @@ static __global__ void flash_attn_vec_ext_f16(
} }
} }
if (__all_sync(0xFFFFFFFF, skip)) { if (__all_sync(0xFFFFFFFF, skip)) {
__syncthreads();
continue; continue;
} }
#endif // GGML_USE_HIP #endif // GGML_USE_HIP

View file

@ -217,6 +217,7 @@ static __global__ void flash_attn_vec_ext_f32(
} }
} }
if (__all_sync(0xFFFFFFFF, skip)) { if (__all_sync(0xFFFFFFFF, skip)) {
__syncthreads();
continue; continue;
} }
#endif // GGML_USE_HIP #endif // GGML_USE_HIP

View file

@ -1060,7 +1060,7 @@ struct llm_tokenizer_ugm_session {
} }
// initialize score_sum to -FLT_MAX so it will be always lower than sums of token scores // initialize score_sum to -FLT_MAX so it will be always lower than sums of token scores
std::vector<struct best_tokenization> tokenization_results(input_len + 1, {vocab.token_unk(), 0, -FLT_MAX}); std::vector<struct best_tokenization> tokenization_results(input_len + 1, {vocab.token_unk(), 0, -DBL_MAX});
// at the beginning tokenization score is zero // at the beginning tokenization score is zero
tokenization_results[0] = { vocab.token_unk(), 0, 0 }; tokenization_results[0] = { vocab.token_unk(), 0, 0 };
@ -1092,7 +1092,7 @@ struct llm_tokenizer_ugm_session {
const double challenger_score = current_best.score_sum + token_score; const double challenger_score = current_best.score_sum + token_score;
struct best_tokenization & current_champ = tokenization_results[prefix_offset]; struct best_tokenization & current_champ = tokenization_results[prefix_offset];
if (challenger_score > current_champ.score_sum) { if (challenger_score > current_champ.score_sum) {
struct best_tokenization challenger = { token_id, input_offset, (float) challenger_score }; struct best_tokenization challenger = { token_id, input_offset, challenger_score };
current_champ = challenger; current_champ = challenger;
} }
} }
@ -1106,7 +1106,7 @@ struct llm_tokenizer_ugm_session {
prefix_offset = input_offset + n_utf8_code_units; prefix_offset = input_offset + n_utf8_code_units;
struct best_tokenization & current_champ = tokenization_results[prefix_offset]; struct best_tokenization & current_champ = tokenization_results[prefix_offset];
if (challenger_score > current_champ.score_sum) { if (challenger_score > current_champ.score_sum) {
struct best_tokenization challenger = { vocab.token_unk(), input_offset, (float) challenger_score }; struct best_tokenization challenger = { vocab.token_unk(), input_offset, challenger_score };
current_champ = challenger; current_champ = challenger;
} }
} }
@ -1232,7 +1232,7 @@ private:
struct best_tokenization { struct best_tokenization {
llama_token token_id; llama_token token_id;
size_t input_offset; size_t input_offset;
float score_sum; double score_sum;
}; };
struct normalization_result normalize_prefix(const std::string & input, size_t input_offset) { struct normalization_result normalize_prefix(const std::string & input, size_t input_offset) {