Merge commit 'c917b67f06' into concedo_experimental

# Conflicts:
#	.devops/tools.sh
#	Makefile
#	ggml/src/ggml-cuda/mmq.cuh
#	tests/test-double-float.cpp
#	tests/test-quantize-fns.cpp
#	tests/test-quantize-perf.cpp
This commit is contained in:
Concedo 2024-07-14 11:38:20 +08:00
commit 602661ba49
25 changed files with 1339 additions and 1504 deletions

View file

@ -5955,13 +5955,6 @@ static bool llm_load_tensors(
auto & hparams = model.hparams;
#ifdef GGML_USE_SYCL
// disable MoE with SYCL until mul_mat_id is updated
if (hparams.n_expert > 0) {
n_gpu_layers = 0;
}
#endif
model.split_mode = split_mode;
model.main_gpu = main_gpu;
model.n_gpu_layers = n_gpu_layers;
@ -21500,7 +21493,7 @@ int32_t llama_token_to_piece(const struct llama_model * model, llama_token token
size--;
}
if (length < (int32_t)size) {
return (int32_t) -size;
return -(int32_t) size;
}
memcpy(buf, token, size);
return (int32_t) size;