Merge branch 'upstream' into concedo_experimental

# Conflicts:
#	ggml/src/ggml-sycl/gemm.hpp
#	ggml/src/ggml-sycl/ggml-sycl.cpp
#	ggml/src/ggml-sycl/set_rows.cpp
This commit is contained in:
Concedo 2025-07-14 23:16:44 +08:00
commit 4db8ba6228
5 changed files with 47 additions and 4 deletions

View file

@ -79,6 +79,9 @@ SIMPLERCFLAGS =
FULLCFLAGS = FULLCFLAGS =
NONECFLAGS = NONECFLAGS =
# prefer bundled glslc
LLAMA_USE_BUNDLED_GLSLC := 1
CLBLAST_FLAGS = -DGGML_USE_CLBLAST CLBLAST_FLAGS = -DGGML_USE_CLBLAST
FAILSAFE_FLAGS = -DUSE_FAILSAFE FAILSAFE_FLAGS = -DUSE_FAILSAFE
VULKAN_FLAGS = -DGGML_USE_VULKAN -DSD_USE_VULKAN VULKAN_FLAGS = -DGGML_USE_VULKAN -DSD_USE_VULKAN

View file

@ -1082,7 +1082,14 @@ class TextModel(ModelBase):
self.gguf_writer.add_token_list(tokens) self.gguf_writer.add_token_list(tokens)
self.gguf_writer.add_token_types(toktypes) self.gguf_writer.add_token_types(toktypes)
special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=False) special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=False)
special_vocab.chat_template = "rwkv-world" if special_vocab.chat_template is None:
template_path = Path(__file__).parent / "models" / "templates" / "llama-cpp-rwkv-world.jinja"
if template_path.is_file():
with open(template_path, "r", encoding="utf-8") as f:
template = f.read()
else:
template = "rwkv-world"
special_vocab.chat_template = template
# hack: Add '\n\n' as the EOT token to make it chat normally # hack: Add '\n\n' as the EOT token to make it chat normally
special_vocab._set_special_token("eot", 261) special_vocab._set_special_token("eot", 261)
# hack: Override these as they have already been set (incorrectly) # hack: Override these as they have already been set (incorrectly)

View file

@ -0,0 +1,34 @@
{%- if not add_generation_prompt is defined -%}
{%- set add_generation_prompt = true -%}
{%- endif -%}
{%- set ns = namespace(system_prompt='') -%}
{%- for message in messages -%}
{%- if message['role'] == 'system' -%}
{%- set ns.system_prompt = message['content'] -%}
{%- endif -%}
{%- endfor -%}
{{bos_token}}
{%- if ns.system_prompt != '' -%}
{{- 'System: ' + ns.system_prompt + '\n\n' -}}
{%- endif -%}
{%- for message in messages -%}
{%- if message['role'] == 'user' -%}
{{- 'User: ' + message['content']|trim + '\n\n' -}}
{%- endif -%}
{%- if message['role'] == 'assistant' and message['content'] is not none -%}
{%- set content = message['content'] -%}
{%- if '</think>' in content -%}
{%- set content = content.split('</think>')[-1] -%}
{%- endif -%}
{{- 'Assistant: ' + content|trim + '\n\n' -}}
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt -%}
{{- 'Assistant:' -}}
{%- if enable_thinking is defined and enable_thinking is false %}
{{- ' <think>\n</think>' }}
{%- endif %}
{%- if enable_thinking is defined and enable_thinking is true %}
{{- ' <think>' }}
{%- endif %}
{%- endif -%}

View file

@ -170,7 +170,7 @@ llm_chat_template llm_chat_detect_template(const std::string & tmpl) {
// ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/discussions/8#66bae61b1893d14ee8ed85bb // ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/discussions/8#66bae61b1893d14ee8ed85bb
// EXAONE-3.0-7.8B-Instruct // EXAONE-3.0-7.8B-Instruct
return LLM_CHAT_TEMPLATE_EXAONE_3; return LLM_CHAT_TEMPLATE_EXAONE_3;
} else if (tmpl_contains("rwkv-world")) { } else if (tmpl_contains("rwkv-world") || tmpl_contains("{{- 'User: ' + message['content']|trim + '\\n\\n' -}}")) {
return LLM_CHAT_TEMPLATE_RWKV_WORLD; return LLM_CHAT_TEMPLATE_RWKV_WORLD;
} else if (tmpl_contains("<|start_of_role|>")) { } else if (tmpl_contains("<|start_of_role|>")) {
return LLM_CHAT_TEMPLATE_GRANITE; return LLM_CHAT_TEMPLATE_GRANITE;

View file

@ -887,8 +887,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
if (std::regex pattern(tname); std::regex_search(tensor_name, pattern)) { if (std::regex pattern(tname); std::regex_search(tensor_name, pattern)) {
if (qtype != new_type) { if (qtype != new_type) {
LLAMA_LOG_DEBUG("(overriding %s) ", ggml_type_name(new_type)); LLAMA_LOG_DEBUG("(overriding %s) ", ggml_type_name(new_type));
new_type = qtype; new_type = qtype; // if two or more types are specified for the same tensor, the last match wins
break; // if two or more types are specified for the tensor, first match wins
} }
} }
} }