diff --git a/CMakeLists.txt b/CMakeLists.txt index 5b43ad266..d916ff207 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -61,7 +61,7 @@ add_compile_definitions(LOG_DISABLE_LOGS) add_compile_definitions(GGML_USE_CPU) add_compile_definitions(GGML_USE_CPU_REPACK) add_compile_definitions(NOMINMAX) -add_compile_definitions(_REGEX_MAX_STACK_COUNT=80000) +add_compile_definitions(_REGEX_MAX_STACK_COUNT=32000) if (GGML_HIP_FORCE_ROCWMMA_FATTN_GFX12) add_compile_definitions(GGML_HIP_ROCWMMA_FATTN_GFX12) diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index 333b5ec0a..61d1f031c 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -2293,15 +2293,21 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in // std::string forced = "per_layer_token_embd.weight=CPU"; //this tensor on gpu is problematic on unsloth q4_0 // tensoroverrides = (tensoroverrides=="" ? forced: (forced+","+tensoroverrides)); // } - if(tensoroverrides=="" && ggml_backend_dev_count()>1 && inputs.moecpu>0) + if(ggml_backend_dev_count()>1 && inputs.moecpu>0) { + std::string toadd = ""; for (int i = 0; i < inputs.moecpu; ++i) { std::string tmp = string_format("blk\\.%d\\.ffn_(up|down|gate)_exps=CPU", i); if(i>0) { tmp = "," + tmp; } - tensoroverrides += tmp; + toadd += tmp; + } + if (tensoroverrides == "") { + tensoroverrides = toadd; + } else { + tensoroverrides += "," + toadd; } printf("Overriding %d MoE layers to CPU...\n",inputs.moecpu); } diff --git a/kcpp_adapters/AutoGuess.json b/kcpp_adapters/AutoGuess.json index 5713350a6..464aa4589 100644 --- a/kcpp_adapters/AutoGuess.json +++ b/kcpp_adapters/AutoGuess.json @@ -186,6 +186,17 @@ "assistant_start": "<|response|>", "assistant_end": "<|endofresponse|>" } +}, { + "search": ["<|start|>user<|message|>", "<|channel|>", "<|end|>"], + "name": "OpenAI Harmony", + "adapter": { + "system_start": "<|start|>system<|message|>", + "system_end": "<|end|>\n", + "user_start": "<|start|>user<|message|>", + "user_end": "<|end|>\n", + "assistant_start": "<|start|>assistant<|channel|>final<|message|>", + "assistant_end": "<|return|>\n" + } }, { "search": ["rwkv_", "'User: '"], "name": "RWKV World", diff --git a/kcpp_adapters/OpenAI-Harmony.json b/kcpp_adapters/OpenAI-Harmony.json new file mode 100644 index 000000000..6e8c374a5 --- /dev/null +++ b/kcpp_adapters/OpenAI-Harmony.json @@ -0,0 +1,8 @@ +{ + "system_start": "<|start|>system<|message|>", + "system_end": "<|end|>\n", + "user_start": "<|start|>user<|message|>", + "user_end": "<|end|>\n", + "assistant_start": "<|start|>assistant<|channel|>final<|message|>", + "assistant_end": "<|return|>\n" +} diff --git a/klite.embd b/klite.embd index 3b8053b66..80f528246 100644 --- a/klite.embd +++ b/klite.embd @@ -3629,6 +3629,15 @@ Current version indicated by LITEVER below. "assistant_end":"", "system":"", "system_end":"", + }, + { + "name":"OpenAI Harmony", + "user":"<|start|>user<|message|>", + "user_end":"<|end|>\\n", + "assistant":"<|start|>assistant<|channel|>final<|message|>", + "assistant_end":"<|return|>\\n", + "system":"<|start|>system<|message|>", + "system_end":"<|end|>\\n", } ];