gpt oss harmony template

This commit is contained in:
Concedo 2025-08-06 11:39:40 +08:00
parent 6eea7b88d2
commit 34487d3c02
5 changed files with 37 additions and 3 deletions

View file

@ -61,7 +61,7 @@ add_compile_definitions(LOG_DISABLE_LOGS)
add_compile_definitions(GGML_USE_CPU) add_compile_definitions(GGML_USE_CPU)
add_compile_definitions(GGML_USE_CPU_REPACK) add_compile_definitions(GGML_USE_CPU_REPACK)
add_compile_definitions(NOMINMAX) add_compile_definitions(NOMINMAX)
add_compile_definitions(_REGEX_MAX_STACK_COUNT=80000) add_compile_definitions(_REGEX_MAX_STACK_COUNT=32000)
if (GGML_HIP_FORCE_ROCWMMA_FATTN_GFX12) if (GGML_HIP_FORCE_ROCWMMA_FATTN_GFX12)
add_compile_definitions(GGML_HIP_ROCWMMA_FATTN_GFX12) add_compile_definitions(GGML_HIP_ROCWMMA_FATTN_GFX12)

View file

@ -2293,15 +2293,21 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
// std::string forced = "per_layer_token_embd.weight=CPU"; //this tensor on gpu is problematic on unsloth q4_0 // std::string forced = "per_layer_token_embd.weight=CPU"; //this tensor on gpu is problematic on unsloth q4_0
// tensoroverrides = (tensoroverrides=="" ? forced: (forced+","+tensoroverrides)); // tensoroverrides = (tensoroverrides=="" ? forced: (forced+","+tensoroverrides));
// } // }
if(tensoroverrides=="" && ggml_backend_dev_count()>1 && inputs.moecpu>0) if(ggml_backend_dev_count()>1 && inputs.moecpu>0)
{ {
std::string toadd = "";
for (int i = 0; i < inputs.moecpu; ++i) { for (int i = 0; i < inputs.moecpu; ++i) {
std::string tmp = string_format("blk\\.%d\\.ffn_(up|down|gate)_exps=CPU", i); std::string tmp = string_format("blk\\.%d\\.ffn_(up|down|gate)_exps=CPU", i);
if(i>0) if(i>0)
{ {
tmp = "," + tmp; tmp = "," + tmp;
} }
tensoroverrides += tmp; toadd += tmp;
}
if (tensoroverrides == "") {
tensoroverrides = toadd;
} else {
tensoroverrides += "," + toadd;
} }
printf("Overriding %d MoE layers to CPU...\n",inputs.moecpu); printf("Overriding %d MoE layers to CPU...\n",inputs.moecpu);
} }

View file

@ -186,6 +186,17 @@
"assistant_start": "<|response|>", "assistant_start": "<|response|>",
"assistant_end": "<|endofresponse|>" "assistant_end": "<|endofresponse|>"
} }
}, {
"search": ["<|start|>user<|message|>", "<|channel|>", "<|end|>"],
"name": "OpenAI Harmony",
"adapter": {
"system_start": "<|start|>system<|message|>",
"system_end": "<|end|>\n",
"user_start": "<|start|>user<|message|>",
"user_end": "<|end|>\n",
"assistant_start": "<|start|>assistant<|channel|>final<|message|>",
"assistant_end": "<|return|>\n"
}
}, { }, {
"search": ["rwkv_", "'User: '"], "search": ["rwkv_", "'User: '"],
"name": "RWKV World", "name": "RWKV World",

View file

@ -0,0 +1,8 @@
{
"system_start": "<|start|>system<|message|>",
"system_end": "<|end|>\n",
"user_start": "<|start|>user<|message|>",
"user_end": "<|end|>\n",
"assistant_start": "<|start|>assistant<|channel|>final<|message|>",
"assistant_end": "<|return|>\n"
}

View file

@ -3629,6 +3629,15 @@ Current version indicated by LITEVER below.
"assistant_end":"", "assistant_end":"",
"system":"", "system":"",
"system_end":"", "system_end":"",
},
{
"name":"OpenAI Harmony",
"user":"<|start|>user<|message|>",
"user_end":"<|end|>\\n",
"assistant":"<|start|>assistant<|channel|>final<|message|>",
"assistant_end":"<|return|>\\n",
"system":"<|start|>system<|message|>",
"system_end":"<|end|>\\n",
} }
]; ];