mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-12 01:54:37 +00:00
Merge branch 'master' into concedo_experimental
# Conflicts: # CMakeLists.txt # Makefile
This commit is contained in:
commit
ac36aee001
10 changed files with 441 additions and 44 deletions
|
@ -230,18 +230,15 @@ private func token_to_piece(token: llama_token, buffer: inout [CChar]) -> String
|
|||
var result = [CChar](repeating: 0, count: 8)
|
||||
let nTokens = llama_token_to_piece(model, token, &result, Int32(result.count))
|
||||
if nTokens < 0 {
|
||||
if result.count >= -Int(nTokens) {
|
||||
result.removeLast(-Int(nTokens))
|
||||
} else {
|
||||
result.removeAll()
|
||||
}
|
||||
let actualTokensCount = -Int(nTokens)
|
||||
result = .init(repeating: 0, count: actualTokensCount)
|
||||
let check = llama_token_to_piece(
|
||||
model,
|
||||
token,
|
||||
&result,
|
||||
Int32(result.count)
|
||||
)
|
||||
assert(check == nTokens)
|
||||
assert(check == actualTokensCount)
|
||||
} else {
|
||||
result.removeLast(result.count - Int(nTokens))
|
||||
}
|
||||
|
@ -259,5 +256,4 @@ private func token_to_piece(token: llama_token, buffer: inout [CChar]) -> String
|
|||
buffer = []
|
||||
return bufferString
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -164,13 +164,21 @@ actor LlamaContext {
|
|||
private func token_to_piece(token: llama_token) -> String {
|
||||
let result = UnsafeMutablePointer<Int8>.allocate(capacity: 8)
|
||||
result.initialize(repeating: Int8(0), count: 8)
|
||||
defer {
|
||||
result.deallocate()
|
||||
}
|
||||
let nTokens = llama_token_to_piece(model, token, result, 8)
|
||||
|
||||
let _ = llama_token_to_piece(model, token, result, 8)
|
||||
|
||||
let resultStr = String(cString: result)
|
||||
|
||||
result.deallocate()
|
||||
|
||||
return resultStr
|
||||
if nTokens < 0 {
|
||||
let newResult = UnsafeMutablePointer<Int8>.allocate(capacity: Int(-nTokens))
|
||||
newResult.initialize(repeating: Int8(0), count: Int(-nTokens))
|
||||
defer {
|
||||
newResult.deallocate()
|
||||
}
|
||||
_ = llama_token_to_piece(model, token, newResult, -nTokens)
|
||||
return String(cString: newResult)
|
||||
} else {
|
||||
return String(cString: result)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -70,6 +70,7 @@ def make_postData(body, chat=False, stream=False):
|
|||
if(is_present(body, "mirostat_tau")): postData["mirostat_tau"] = body["mirostat_tau"]
|
||||
if(is_present(body, "mirostat_eta")): postData["mirostat_eta"] = body["mirostat_eta"]
|
||||
if(is_present(body, "seed")): postData["seed"] = body["seed"]
|
||||
if(is_present(body, "grammar")): postData["grammar"] = body["grammar"]
|
||||
if(is_present(body, "logit_bias")): postData["logit_bias"] = [[int(token), body["logit_bias"][token]] for token in body["logit_bias"].keys()]
|
||||
if (args.stop != ""):
|
||||
postData["stop"] = [args.stop]
|
||||
|
|
|
@ -1470,7 +1470,7 @@ struct llama_server_context
|
|||
|
||||
int split_multiprompt_task(task_server& multiprompt_task)
|
||||
{
|
||||
auto prompt_count = multiprompt_task.data.at("prompt").size();
|
||||
int prompt_count = multiprompt_task.data.at("prompt").size();
|
||||
assert(prompt_count > 1);
|
||||
|
||||
int multitask_id = id_gen++;
|
||||
|
@ -2411,9 +2411,7 @@ json oaicompat_completion_params_parse(
|
|||
}
|
||||
|
||||
// Handle 'stop' field
|
||||
if (body["stop"].is_null()) {
|
||||
llama_params["stop"] = json::array({});
|
||||
} else if (body["stop"].is_string()) {
|
||||
if (body.contains("stop") && body["stop"].is_string()) {
|
||||
llama_params["stop"] = json::array({body["stop"].get<std::string>()});
|
||||
} else {
|
||||
llama_params["stop"] = json_value(body, "stop", json::array());
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue