diff --git a/embd_res/klite.embd b/embd_res/klite.embd index 78b873ce0..2bdc65f66 100644 --- a/embd_res/klite.embd +++ b/embd_res/klite.embd @@ -4153,7 +4153,7 @@ Current version indicated by LITEVER below. "name":"OpenAI Harmony Non-Thinking", "user":"<|start|>user<|message|>", "user_end":"<|end|>", - "assistant":"<|start|>assistant<|channel|>final<|message|>", + "assistant":"<<|start|>assistant<|channel|>analysis<|message|>We can answer immediately.<|end|><|start|>assistant<|channel|>final<|message|>", "assistant_end":"<|end|>", "system":"<|start|>developer<|message|>", "system_end":"<|end|>", @@ -6893,6 +6893,7 @@ Current version indicated by LITEVER below. .replace(/\*\*\*([^\s*][\s\S]*?[^\\])\*\*\*/gm, "$1") .replace(/\*\*(.)\*\*/g, "$1") //handle single char bold + .replace(/ \*\*(\w+(?: \w+)?)\*\* /g, " $1 ") //hack: support 1 or 2 nested bolded words (not official) .replace(/\*\*([^\s*][\s\S]*?[^\\])\*\*/gm, "$1") .replace(/(^|[\s.,;:!?<>])\*(.)\*(?=[\s.,;:!?<>]|$)/g, "$1$2") //handle single char italics .replace(/\*([^\s*][\s\S]*?[^\\])\*/gm, "$1") diff --git a/ggml/src/ggml-cuda/fattn-wmma-f16.cu b/ggml/src/ggml-cuda/fattn-wmma-f16.cu index b06bbabd1..f7981ceb5 100644 --- a/ggml/src/ggml-cuda/fattn-wmma-f16.cu +++ b/ggml/src/ggml-cuda/fattn-wmma-f16.cu @@ -588,7 +588,7 @@ void ggml_cuda_flash_attn_ext_wmma_f16(ggml_backend_cuda_context & ctx, ggml_ten // ggml_cuda_flash_attn_ext_wmma_f16_case<256, cols_per_block, float>(ctx, dst); // break; default: - GGML_ABORT("fatal error"); + GGML_ABORT("fatal error: case %d",Q->ne[0]); break; } } diff --git a/kcpp_adapters/AutoGuess.json b/kcpp_adapters/AutoGuess.json index 60627e317..12e3ff0b6 100644 --- a/kcpp_adapters/AutoGuess.json +++ b/kcpp_adapters/AutoGuess.json @@ -218,7 +218,7 @@ "system_end": "\n\n<|end|>", "user_start": "<|start|>user<|message|>", "user_end": "<|end|>", - "assistant_start": "<|start|>assistant<|channel|>final<|message|>", + "assistant_start": "<|start|>assistant<|channel|>analysis<|message|>We can answer immediately.<|end|><|start|>assistant<|channel|>final<|message|>", "assistant_end": "<|end|>" } }, { diff --git a/kcpp_adapters/OpenAI-Harmony.json b/kcpp_adapters/OpenAI-Harmony.json index eaa1bee8d..f812659c3 100644 --- a/kcpp_adapters/OpenAI-Harmony.json +++ b/kcpp_adapters/OpenAI-Harmony.json @@ -3,6 +3,6 @@ "system_end": "<|end|>", "user_start": "<|start|>user<|message|>", "user_end": "<|end|>", - "assistant_start": "<|start|>assistant<|channel|>final<|message|>", + "assistant_start": "<|start|>assistant<|channel|>analysis<|message|>We can answer immediately.<|end|><|start|>assistant<|channel|>final<|message|>", "assistant_end": "<|end|>" }