From 6a709be50ace092710144a39f033114b619d459b Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Thu, 27 Mar 2025 10:27:20 +0800 Subject: [PATCH] replace deprecated --- expose.cpp | 2 +- gpttype_adapter.cpp | 4 ++-- otherarch/embeddings_adapter.cpp | 2 +- otherarch/tts_adapter.cpp | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/expose.cpp b/expose.cpp index f817241fc..5cbfe44c2 100644 --- a/expose.cpp +++ b/expose.cpp @@ -188,7 +188,7 @@ extern "C" } else if(file_format==FileFormat::GGUF_GENERIC) { - printf("\n---\nIdentified as GGUF model: (ver %d)\nAttempting to Load...\n---\n", file_format); + printf("\n---\nIdentified as GGUF model.\nAttempting to Load...\n---\n", file_format); } else if(file_format==FileFormat::GGML || file_format==FileFormat::GGHF || file_format==FileFormat::GGJT || file_format==FileFormat::GGJT_2 || file_format==FileFormat::GGJT_3) { diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index 487d5d573..cedca3fa0 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -583,7 +583,7 @@ static void speculative_decoding_setup(std::string spec_model_filename, const ll draft_ctx_params.type_v = base_ctx_params.type_v; llama_model * draftmodel = llama_model_load_from_file(spec_model_filename.c_str(), draft_model_params); - draft_ctx = llama_new_context_with_model(draftmodel, draft_ctx_params); + draft_ctx = llama_init_from_model(draftmodel, draft_ctx_params); if(draft_ctx == NULL) { printf("Error: failed to load speculative decoding draft model '%s'\n", spec_model_filename.c_str()); @@ -2227,7 +2227,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in llama_ctx_params.flash_attn = kcpp_data->flash_attn; llama_ctx_params.type_k = (inputs.quant_k>1?GGML_TYPE_Q4_0:(inputs.quant_k==1?GGML_TYPE_Q8_0:GGML_TYPE_F16)); llama_ctx_params.type_v = (inputs.quant_v>1?GGML_TYPE_Q4_0:(inputs.quant_v==1?GGML_TYPE_Q8_0:GGML_TYPE_F16)); - llama_ctx_v4 = llama_new_context_with_model(llamamodel, llama_ctx_params); + llama_ctx_v4 = llama_init_from_model(llamamodel, llama_ctx_params); if (llama_ctx_v4 == NULL) { diff --git a/otherarch/embeddings_adapter.cpp b/otherarch/embeddings_adapter.cpp index 982ecea1f..753ced241 100644 --- a/otherarch/embeddings_adapter.cpp +++ b/otherarch/embeddings_adapter.cpp @@ -142,7 +142,7 @@ bool embeddingstype_load_model(const embeddings_load_model_inputs inputs) ctx_params.n_threads_batch = nthreads; ctx_params.flash_attn = inputs.flash_attention; - embeddings_ctx = llama_new_context_with_model(embeddingsmodel, ctx_params); + embeddings_ctx = llama_init_from_model(embeddingsmodel, ctx_params); if (embeddings_ctx == nullptr) { printf("\nEmbeddings Model Load Error: Failed to initialize context!\n"); diff --git a/otherarch/tts_adapter.cpp b/otherarch/tts_adapter.cpp index 3d1e5b987..336d70e9f 100644 --- a/otherarch/tts_adapter.cpp +++ b/otherarch/tts_adapter.cpp @@ -542,7 +542,7 @@ bool ttstype_load_model(const tts_load_model_inputs inputs) tts_ctx_params.flash_attn = inputs.flash_attention; llama_model * ttcmodel = llama_model_load_from_file(modelfile_ttc.c_str(), tts_model_params); - ttc_ctx = llama_new_context_with_model(ttcmodel, tts_ctx_params); + ttc_ctx = llama_init_from_model(ttcmodel, tts_ctx_params); if (ttc_ctx == nullptr) { printf("\nTTS Load Error: Failed to initialize ttc context!\n"); @@ -552,7 +552,7 @@ bool ttstype_load_model(const tts_load_model_inputs inputs) llama_model * ctsmodel = llama_model_load_from_file(modelfile_cts.c_str(), tts_model_params); tts_ctx_params.embeddings = true; //this requires embeddings instead - cts_ctx = llama_new_context_with_model(ctsmodel, tts_ctx_params); + cts_ctx = llama_init_from_model(ctsmodel, tts_ctx_params); if (cts_ctx == nullptr) { printf("\nTTS Load Error: Failed to initialize cts context!\n");