diff --git a/embd_res/klite.embd b/embd_res/klite.embd
index 78b873ce0..2bdc65f66 100644
--- a/embd_res/klite.embd
+++ b/embd_res/klite.embd
@@ -4153,7 +4153,7 @@ Current version indicated by LITEVER below.
"name":"OpenAI Harmony Non-Thinking",
"user":"<|start|>user<|message|>",
"user_end":"<|end|>",
- "assistant":"<|start|>assistant<|channel|>final<|message|>",
+ "assistant":"<<|start|>assistant<|channel|>analysis<|message|>We can answer immediately.<|end|><|start|>assistant<|channel|>final<|message|>",
"assistant_end":"<|end|>",
"system":"<|start|>developer<|message|>",
"system_end":"<|end|>",
@@ -6893,6 +6893,7 @@ Current version indicated by LITEVER below.
.replace(/\*\*\*([^\s*][\s\S]*?[^\\])\*\*\*/gm, "$1")
.replace(/\*\*(.)\*\*/g, "$1") //handle single char bold
+ .replace(/ \*\*(\w+(?: \w+)?)\*\* /g, " $1 ") //hack: support 1 or 2 nested bolded words (not official)
.replace(/\*\*([^\s*][\s\S]*?[^\\])\*\*/gm, "$1")
.replace(/(^|[\s.,;:!?<>])\*(.)\*(?=[\s.,;:!?<>]|$)/g, "$1$2") //handle single char italics
.replace(/\*([^\s*][\s\S]*?[^\\])\*/gm, "$1")
diff --git a/ggml/src/ggml-cuda/fattn-wmma-f16.cu b/ggml/src/ggml-cuda/fattn-wmma-f16.cu
index b06bbabd1..f7981ceb5 100644
--- a/ggml/src/ggml-cuda/fattn-wmma-f16.cu
+++ b/ggml/src/ggml-cuda/fattn-wmma-f16.cu
@@ -588,7 +588,7 @@ void ggml_cuda_flash_attn_ext_wmma_f16(ggml_backend_cuda_context & ctx, ggml_ten
// ggml_cuda_flash_attn_ext_wmma_f16_case<256, cols_per_block, float>(ctx, dst);
// break;
default:
- GGML_ABORT("fatal error");
+ GGML_ABORT("fatal error: case %d",Q->ne[0]);
break;
}
}
diff --git a/kcpp_adapters/AutoGuess.json b/kcpp_adapters/AutoGuess.json
index 60627e317..12e3ff0b6 100644
--- a/kcpp_adapters/AutoGuess.json
+++ b/kcpp_adapters/AutoGuess.json
@@ -218,7 +218,7 @@
"system_end": "\n\n<|end|>",
"user_start": "<|start|>user<|message|>",
"user_end": "<|end|>",
- "assistant_start": "<|start|>assistant<|channel|>final<|message|>",
+ "assistant_start": "<|start|>assistant<|channel|>analysis<|message|>We can answer immediately.<|end|><|start|>assistant<|channel|>final<|message|>",
"assistant_end": "<|end|>"
}
}, {
diff --git a/kcpp_adapters/OpenAI-Harmony.json b/kcpp_adapters/OpenAI-Harmony.json
index eaa1bee8d..f812659c3 100644
--- a/kcpp_adapters/OpenAI-Harmony.json
+++ b/kcpp_adapters/OpenAI-Harmony.json
@@ -3,6 +3,6 @@
"system_end": "<|end|>",
"user_start": "<|start|>user<|message|>",
"user_end": "<|end|>",
- "assistant_start": "<|start|>assistant<|channel|>final<|message|>",
+ "assistant_start": "<|start|>assistant<|channel|>analysis<|message|>We can answer immediately.<|end|><|start|>assistant<|channel|>final<|message|>",
"assistant_end": "<|end|>"
}