diff --git a/ggml/src/ggml-cuda/im2col.cu b/ggml/src/ggml-cuda/im2col.cu index 86a54e42b..5bb85b480 100644 --- a/ggml/src/ggml-cuda/im2col.cu +++ b/ggml/src/ggml-cuda/im2col.cu @@ -10,7 +10,7 @@ static __global__ void im2col_kernel( return; } - const int64_t ksize = OW * (KH > 1 ? KW : 1); + const int64_t ksize = OW * KH; const int64_t kx = i / ksize; const int64_t kd = kx * ksize; const int64_t ky = (i - kd) / OW; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp b/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp index 17c7ccb90..fdbcf7eba 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp @@ -40,12 +40,10 @@ void main() { const uint src_base = ic * p.offset_delta + batch * p.batch_offset; const uint dst_base = ((batch * p.OH + oh) * p.OW) * p.CHW + ic * (p.KW * p.KH); const int oh_s1 = int(oh) * p.s1; - const uint ksize = p.OW * (p.KH > 1 ? p.KW : 1); + const uint ksize = p.OW * p.KH; const uint base_linear_idx = gidx * NUM_ITER; - const uint max_ky = ksize / p.OW; - uint current_kx = base_linear_idx / ksize; const uint rem = base_linear_idx - (current_kx * ksize); uint current_ky = rem / p.OW; @@ -76,7 +74,7 @@ void main() { if (++current_ix == p.OW) { current_ix = 0; - if (++current_ky == max_ky) { + if (++current_ky == p.KH) { current_ky = 0; current_kx++; } diff --git a/tools/main/main.cpp b/tools/main/main.cpp index 54fb9c8cd..b3c97f923 100644 --- a/tools/main/main.cpp +++ b/tools/main/main.cpp @@ -786,14 +786,17 @@ int main(int argc, char ** argv) { } // check for reverse prompt using special tokens - llama_token last_token = common_sampler_last(smpl); - for (auto token : antiprompt_token) { - if (token == last_token) { - if (params.interactive) { - is_interacting = true; + // avoid calling common_sampler_last() if last_output is empty + if (!last_output.empty()) { + llama_token last_token = common_sampler_last(smpl); + for (auto token : antiprompt_token) { + if (token == last_token) { + if (params.interactive) { + is_interacting = true; + } + is_antiprompt = true; + break; } - is_antiprompt = true; - break; } } diff --git a/tools/server/server.cpp b/tools/server/server.cpp index 0afe213af..256a2928b 100644 --- a/tools/server/server.cpp +++ b/tools/server/server.cpp @@ -4516,9 +4516,10 @@ int main(int argc, char ** argv) { json tokens_response = json::array(); if (body.count("content") != 0) { const bool add_special = json_value(body, "add_special", false); + const bool parse_special = json_value(body, "parse_special", true); const bool with_pieces = json_value(body, "with_pieces", false); - llama_tokens tokens = tokenize_mixed(ctx_server.vocab, body.at("content"), add_special, true); + llama_tokens tokens = tokenize_mixed(ctx_server.vocab, body.at("content"), add_special, parse_special); if (with_pieces) { for (const auto& token : tokens) {