Merge branch 'upstream' into concedo_experimental

# Conflicts:
#	.github/labeler.yml
#	.github/workflows/server.yml
#	.gitignore
#	CMakeLists.txt
#	Makefile
#	README-sycl.md
#	README.md
#	llama.cpp
#	requirements/requirements-convert-hf-to-gguf-update.txt
#	requirements/requirements-convert-hf-to-gguf.txt
#	requirements/requirements-convert-legacy-llama.txt
#	scripts/sync-ggml.last
#	tests/test-tokenizer-random.py
This commit is contained in:
Concedo 2024-06-22 01:33:44 +08:00
commit 92afdfcae4
44 changed files with 10304 additions and 8631 deletions

View file

@ -1595,7 +1595,7 @@ struct server_context {
} else {
std::string prompt;
if (task.data.contains("prompt") && task.data.at("prompt").is_string()) {
json_value(task.data, "prompt", std::string());
prompt = json_value(task.data, "prompt", std::string());
}
slot = get_available_slot(prompt);
@ -2039,7 +2039,12 @@ struct server_context {
prefix_tokens.insert(prefix_tokens.begin(), llama_token_bos(model)); // always add BOS
prefix_tokens.insert(prefix_tokens.end(), llama_token_suffix(model));
prefix_tokens.insert(prefix_tokens.end(), suffix_tokens.begin(), suffix_tokens.end());
prefix_tokens.push_back(llama_token_middle(model));
const llama_token middle_token = llama_token_middle(model);
if (middle_token >= 0) {
prefix_tokens.push_back(middle_token);
}
prompt_tokens = prefix_tokens;
} else {
prompt_tokens = tokenize(slot.prompt, system_prompt.empty()); // add BOS if there isn't system prompt