Merge branch 'master' into concedo_experimental

# Conflicts:
#	.devops/nix/sif.nix
#	.github/workflows/build.yml
#	.github/workflows/python-check-requirements.yml
#	README-sycl.md
#	README.md
#	flake.lock
#	flake.nix
#	requirements/requirements-convert-hf-to-gguf.txt
#	scripts/compare-llama-bench.py
This commit is contained in:
Concedo 2024-03-04 15:33:33 +08:00
commit 7c64845dea
41 changed files with 3325 additions and 2053 deletions

View file

@ -941,7 +941,6 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
llama_ctx_params.seed = -1;
llama_ctx_params.offload_kqv = !inputs.low_vram;
llama_ctx_params.mul_mat_q = inputs.use_mmq;
llama_ctx_params.logits_all = false;
model_params.use_mmap = inputs.use_mmap;
model_params.use_mlock = inputs.use_mlock;
@ -967,6 +966,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
{
printf("CUBLAS: Set main device to %d\n",cu_parseinfo_maindevice);
}
ggml_cuda_set_mul_mat_q(inputs.use_mmq);
#endif
model_params.main_gpu = cu_parseinfo_maindevice;