Merge commit '280345968d' into concedo_experimental

# Conflicts:
#	.devops/full-cuda.Dockerfile
#	.devops/llama-cpp-cuda.srpm.spec
#	.devops/main-cuda.Dockerfile
#	.devops/nix/package.nix
#	.devops/server-cuda.Dockerfile
#	.github/workflows/build.yml
#	CMakeLists.txt
#	Makefile
#	README.md
#	ci/run.sh
#	docs/token_generation_performance_tips.md
#	flake.lock
#	llama.cpp
#	scripts/LlamaConfig.cmake.in
#	scripts/compare-commits.sh
#	scripts/server-llm.sh
#	tests/test-quantize-fns.cpp
This commit is contained in:
Concedo 2024-04-07 20:27:17 +08:00
commit a530afa1e4
33 changed files with 124 additions and 1280 deletions

View file

@ -2511,15 +2511,15 @@ static void server_params_parse(int argc, char ** argv, server_params & sparams,
invalid_param = true;
break;
}
#ifndef GGML_USE_CUBLAS
fprintf(stderr, "warning: llama.cpp was compiled without cuBLAS. Setting the split mode has no effect.\n");
#endif // GGML_USE_CUBLAS
#ifndef GGML_USE_CUDA
fprintf(stderr, "warning: llama.cpp was compiled without CUDA. Setting the split mode has no effect.\n");
#endif // GGML_USE_CUDA
} else if (arg == "--tensor-split" || arg == "-ts") {
if (++i >= argc) {
invalid_param = true;
break;
}
#if defined(GGML_USE_CUBLAS) || defined(GGML_USE_SYCL)
#if defined(GGML_USE_CUDA) || defined(GGML_USE_SYCL)
std::string arg_next = argv[i];
// split string by , and /
@ -2536,17 +2536,17 @@ static void server_params_parse(int argc, char ** argv, server_params & sparams,
}
}
#else
LOG_WARNING("llama.cpp was compiled without cuBLAS. It is not possible to set a tensor split.\n", {});
#endif // GGML_USE_CUBLAS
LOG_WARNING("llama.cpp was compiled without CUDA. It is not possible to set a tensor split.\n", {});
#endif // GGML_USE_CUDA
} else if (arg == "--main-gpu" || arg == "-mg") {
if (++i >= argc) {
invalid_param = true;
break;
}
#if defined(GGML_USE_CUBLAS) || defined(GGML_USE_SYCL)
#if defined(GGML_USE_CUDA) || defined(GGML_USE_SYCL)
params.main_gpu = std::stoi(argv[i]);
#else
LOG_WARNING("llama.cpp was compiled without cuBLAS. It is not possible to set a main GPU.", {});
LOG_WARNING("llama.cpp was compiled without CUDA. It is not possible to set a main GPU.", {});
#endif
} else if (arg == "--lora") {
if (++i >= argc) {