Merge branch 'upstream' into concedo_experimental

# Conflicts:
#	.github/workflows/build.yml
#	Makefile
#	flake.lock
#	ggml-cuda.cu
#	ggml-cuda.h
This commit is contained in:
Concedo 2024-03-19 18:57:22 +08:00
commit a3fa919c67
33 changed files with 1777 additions and 1484 deletions

View file

@ -105,6 +105,7 @@ struct gpt_params {
struct llama_sampling_params sparams;
std::string model = "models/7B/ggml-model-f16.gguf"; // model path
std::string model_url = ""; // model url to download
std::string model_draft = ""; // draft model for speculative decoding
std::string model_alias = "unknown"; // model alias
std::string prompt = "";
@ -207,6 +208,9 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
struct llama_model_params llama_model_params_from_gpt_params (const gpt_params & params);
struct llama_context_params llama_context_params_from_gpt_params(const gpt_params & params);
struct llama_model * llama_load_model_from_url(const char * model_url, const char * path_model,
struct llama_model_params params);
// Batch utils
void llama_batch_clear(struct llama_batch & batch);