mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-21 18:52:02 +00:00
Merge branch 'upstream' into concedo_experimental
# Conflicts: # README.md # docs/build.md # ggml/src/ggml-cpu/CMakeLists.txt # ggml/src/ggml-cpu/kleidiai/kernels.cpp # ggml/src/ggml-cpu/kleidiai/kernels.h # ggml/src/ggml-cpu/kleidiai/kleidiai.cpp # tests/test-backend-ops.cpp # tools/server/README.md
This commit is contained in:
commit
4abea4b5c9
4 changed files with 15 additions and 13 deletions
|
|
@ -10,7 +10,7 @@ static __global__ void im2col_kernel(
|
|||
return;
|
||||
}
|
||||
|
||||
const int64_t ksize = OW * (KH > 1 ? KW : 1);
|
||||
const int64_t ksize = OW * KH;
|
||||
const int64_t kx = i / ksize;
|
||||
const int64_t kd = kx * ksize;
|
||||
const int64_t ky = (i - kd) / OW;
|
||||
|
|
|
|||
|
|
@ -40,12 +40,10 @@ void main() {
|
|||
const uint src_base = ic * p.offset_delta + batch * p.batch_offset;
|
||||
const uint dst_base = ((batch * p.OH + oh) * p.OW) * p.CHW + ic * (p.KW * p.KH);
|
||||
const int oh_s1 = int(oh) * p.s1;
|
||||
const uint ksize = p.OW * (p.KH > 1 ? p.KW : 1);
|
||||
const uint ksize = p.OW * p.KH;
|
||||
|
||||
const uint base_linear_idx = gidx * NUM_ITER;
|
||||
|
||||
const uint max_ky = ksize / p.OW;
|
||||
|
||||
uint current_kx = base_linear_idx / ksize;
|
||||
const uint rem = base_linear_idx - (current_kx * ksize);
|
||||
uint current_ky = rem / p.OW;
|
||||
|
|
@ -76,7 +74,7 @@ void main() {
|
|||
|
||||
if (++current_ix == p.OW) {
|
||||
current_ix = 0;
|
||||
if (++current_ky == max_ky) {
|
||||
if (++current_ky == p.KH) {
|
||||
current_ky = 0;
|
||||
current_kx++;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -786,14 +786,17 @@ int main(int argc, char ** argv) {
|
|||
}
|
||||
|
||||
// check for reverse prompt using special tokens
|
||||
llama_token last_token = common_sampler_last(smpl);
|
||||
for (auto token : antiprompt_token) {
|
||||
if (token == last_token) {
|
||||
if (params.interactive) {
|
||||
is_interacting = true;
|
||||
// avoid calling common_sampler_last() if last_output is empty
|
||||
if (!last_output.empty()) {
|
||||
llama_token last_token = common_sampler_last(smpl);
|
||||
for (auto token : antiprompt_token) {
|
||||
if (token == last_token) {
|
||||
if (params.interactive) {
|
||||
is_interacting = true;
|
||||
}
|
||||
is_antiprompt = true;
|
||||
break;
|
||||
}
|
||||
is_antiprompt = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -4516,9 +4516,10 @@ int main(int argc, char ** argv) {
|
|||
json tokens_response = json::array();
|
||||
if (body.count("content") != 0) {
|
||||
const bool add_special = json_value(body, "add_special", false);
|
||||
const bool parse_special = json_value(body, "parse_special", true);
|
||||
const bool with_pieces = json_value(body, "with_pieces", false);
|
||||
|
||||
llama_tokens tokens = tokenize_mixed(ctx_server.vocab, body.at("content"), add_special, true);
|
||||
llama_tokens tokens = tokenize_mixed(ctx_server.vocab, body.at("content"), add_special, parse_special);
|
||||
|
||||
if (with_pieces) {
|
||||
for (const auto& token : tokens) {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue