Merge branch 'master' into concedo_experimental

# Conflicts:
#	CMakeLists.txt
#	Makefile
This commit is contained in:
Concedo 2023-12-03 21:56:29 +08:00
commit ac36aee001
10 changed files with 441 additions and 44 deletions

View file

@ -230,18 +230,15 @@ private func token_to_piece(token: llama_token, buffer: inout [CChar]) -> String
var result = [CChar](repeating: 0, count: 8)
let nTokens = llama_token_to_piece(model, token, &result, Int32(result.count))
if nTokens < 0 {
if result.count >= -Int(nTokens) {
result.removeLast(-Int(nTokens))
} else {
result.removeAll()
}
let actualTokensCount = -Int(nTokens)
result = .init(repeating: 0, count: actualTokensCount)
let check = llama_token_to_piece(
model,
token,
&result,
Int32(result.count)
)
assert(check == nTokens)
assert(check == actualTokensCount)
} else {
result.removeLast(result.count - Int(nTokens))
}
@ -259,5 +256,4 @@ private func token_to_piece(token: llama_token, buffer: inout [CChar]) -> String
buffer = []
return bufferString
}
return nil
}

View file

@ -164,13 +164,21 @@ actor LlamaContext {
private func token_to_piece(token: llama_token) -> String {
let result = UnsafeMutablePointer<Int8>.allocate(capacity: 8)
result.initialize(repeating: Int8(0), count: 8)
defer {
result.deallocate()
}
let nTokens = llama_token_to_piece(model, token, result, 8)
let _ = llama_token_to_piece(model, token, result, 8)
let resultStr = String(cString: result)
result.deallocate()
return resultStr
if nTokens < 0 {
let newResult = UnsafeMutablePointer<Int8>.allocate(capacity: Int(-nTokens))
newResult.initialize(repeating: Int8(0), count: Int(-nTokens))
defer {
newResult.deallocate()
}
_ = llama_token_to_piece(model, token, newResult, -nTokens)
return String(cString: newResult)
} else {
return String(cString: result)
}
}
}

View file

@ -70,6 +70,7 @@ def make_postData(body, chat=False, stream=False):
if(is_present(body, "mirostat_tau")): postData["mirostat_tau"] = body["mirostat_tau"]
if(is_present(body, "mirostat_eta")): postData["mirostat_eta"] = body["mirostat_eta"]
if(is_present(body, "seed")): postData["seed"] = body["seed"]
if(is_present(body, "grammar")): postData["grammar"] = body["grammar"]
if(is_present(body, "logit_bias")): postData["logit_bias"] = [[int(token), body["logit_bias"][token]] for token in body["logit_bias"].keys()]
if (args.stop != ""):
postData["stop"] = [args.stop]

View file

@ -1470,7 +1470,7 @@ struct llama_server_context
int split_multiprompt_task(task_server& multiprompt_task)
{
auto prompt_count = multiprompt_task.data.at("prompt").size();
int prompt_count = multiprompt_task.data.at("prompt").size();
assert(prompt_count > 1);
int multitask_id = id_gen++;
@ -2411,9 +2411,7 @@ json oaicompat_completion_params_parse(
}
// Handle 'stop' field
if (body["stop"].is_null()) {
llama_params["stop"] = json::array({});
} else if (body["stop"].is_string()) {
if (body.contains("stop") && body["stop"].is_string()) {
llama_params["stop"] = json::array({body["stop"].get<std::string>()});
} else {
llama_params["stop"] = json_value(body, "stop", json::array());