mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 09:34:37 +00:00
Merge commit 'c917b67f06
' into concedo_experimental
# Conflicts: # .devops/tools.sh # Makefile # ggml/src/ggml-cuda/mmq.cuh # tests/test-double-float.cpp # tests/test-quantize-fns.cpp # tests/test-quantize-perf.cpp
This commit is contained in:
commit
602661ba49
25 changed files with 1339 additions and 1504 deletions
|
@ -5955,13 +5955,6 @@ static bool llm_load_tensors(
|
|||
|
||||
auto & hparams = model.hparams;
|
||||
|
||||
#ifdef GGML_USE_SYCL
|
||||
// disable MoE with SYCL until mul_mat_id is updated
|
||||
if (hparams.n_expert > 0) {
|
||||
n_gpu_layers = 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
model.split_mode = split_mode;
|
||||
model.main_gpu = main_gpu;
|
||||
model.n_gpu_layers = n_gpu_layers;
|
||||
|
@ -21500,7 +21493,7 @@ int32_t llama_token_to_piece(const struct llama_model * model, llama_token token
|
|||
size--;
|
||||
}
|
||||
if (length < (int32_t)size) {
|
||||
return (int32_t) -size;
|
||||
return -(int32_t) size;
|
||||
}
|
||||
memcpy(buf, token, size);
|
||||
return (int32_t) size;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue