Merge branch 'upstream' into concedo_experimental

# Conflicts:
#	README.md
#	tests/test-chat-template.cpp
This commit is contained in:
Concedo 2024-05-24 16:22:38 +08:00
commit 653050135b
3 changed files with 12 additions and 12 deletions

View file

@ -13,10 +13,10 @@ if %errorlevel% neq 0 goto ERROR
:: for FP16 :: for FP16
:: faster for long-prompt inference :: faster for long-prompt inference
:: cmake -G "MinGW Makefiles" .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icx -DCMAKE_BUILD_TYPE=Release -DLLAMA_SYCL_F16=ON :: cmake -G "MinGW Makefiles" .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icx -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=Release -DLLAMA_SYCL_F16=ON
:: for FP32 :: for FP32
cmake -G "MinGW Makefiles" .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icx -DCMAKE_BUILD_TYPE=Release cmake -G "MinGW Makefiles" .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icx -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=Release
if %errorlevel% neq 0 goto ERROR if %errorlevel% neq 0 goto ERROR
:: build example/main only :: build example/main only
:: make main :: make main

View file

@ -12145,7 +12145,7 @@ static void quantize_row_iq2_xxs_impl(const float * restrict x, void * restrict
printf("\n"); printf("\n");
GGML_ASSERT(false); GGML_ASSERT(false);
} }
q2[2*ib+0] |= (grid_index << 8*k); q2[2*ib+0] |= ((uint32_t) grid_index << 8*k);
q2[2*ib+1] |= (block_signs[k] << 7*k); q2[2*ib+1] |= (block_signs[k] << 7*k);
} }
GGML_ASSERT(scale >= 0); GGML_ASSERT(scale >= 0);

View file

@ -18170,6 +18170,15 @@ static int32_t llama_chat_apply_template_internal(
} }
} }
// llama2 templates seem to not care about "add_generation_prompt" // llama2 templates seem to not care about "add_generation_prompt"
} else if (tmpl == "phi3" || (tmpl.find("<|assistant|>") != std::string::npos && tmpl.find("<|end|>") != std::string::npos)) {
// Phi 3
for (auto message : chat) {
std::string role(message->role);
ss << "<|" << role << "|>\n" << message->content << "<|end|>\n";
}
if (add_ass) {
ss << "<|assistant|>\n";
}
} else if (tmpl == "zephyr" || tmpl.find("<|user|>") != std::string::npos) { } else if (tmpl == "zephyr" || tmpl.find("<|user|>") != std::string::npos) {
// zephyr template // zephyr template
for (auto message : chat) { for (auto message : chat) {
@ -18302,15 +18311,6 @@ static int32_t llama_chat_apply_template_internal(
if (add_ass) { if (add_ass) {
ss << "<|start_header_id|>assistant<|end_header_id|>\n\n"; ss << "<|start_header_id|>assistant<|end_header_id|>\n\n";
} }
} else if (tmpl == "phi3" || (tmpl.find("<|assistant|>") != std::string::npos && tmpl.find("<|end|>") != std::string::npos )) {
// Phi 3
for (auto message : chat) {
std::string role(message->role);
ss << "<|" << role << "|>\n" << trim(message->content) << "<|end|>\n";
}
if (add_ass) {
ss << "<|assistant|>\n";
}
} else { } else {
// template not supported // template not supported
return -1; return -1;