From e77d5662685edb12d3ba41f8676bcbf4d5a788f5 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Mon, 13 Jan 2025 23:50:54 +0800 Subject: [PATCH] some tweaks and cleanup --- otherarch/tts_adapter.cpp | 8 ++++++-- src/llama-context.cpp | 2 +- src/llama.cpp | 2 +- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/otherarch/tts_adapter.cpp b/otherarch/tts_adapter.cpp index 7db7dbffb..92fc3ce79 100644 --- a/otherarch/tts_adapter.cpp +++ b/otherarch/tts_adapter.cpp @@ -697,7 +697,7 @@ tts_generation_outputs ttstype_generate(const tts_generation_inputs inputs) if(!inputs.quiet) { - printf("\nTTS Generating (%d input tokens)...", prompt_inp.size()); + printf("\nTTS Processing (%d input tokens)...\n", prompt_inp.size()); } prompt_add(prompt_inp, model_ttc, "<|text_end|>\n<|audio_start|>\n", false, true); @@ -771,6 +771,10 @@ tts_generation_outputs ttstype_generate(const tts_generation_inputs inputs) output.status = 0; return output; } + if(!inputs.quiet) + { + printf("\rTTS Generating (%d AudioTokens)", n_decode); + } } if(!inputs.quiet && ttsdebugmode==1) @@ -818,7 +822,7 @@ tts_generation_outputs ttstype_generate(const tts_generation_inputs inputs) audio[i] = 0.0f; } //add some silence at the end - for (int i = 0; i < 24000/20; ++i) { + for (int i = 0; i < 24000/10; ++i) { audio.push_back(0.0f); } diff --git a/src/llama-context.cpp b/src/llama-context.cpp index 38a55fb2c..f507150f4 100644 --- a/src/llama-context.cpp +++ b/src/llama-context.cpp @@ -87,7 +87,7 @@ void llama_set_inputs(llama_context & lctx, const llama_ubatch & ubatch) { //GGML_ASSERT(lctx.inp_out_ids && "every model that can must skip unused outputs"); if (!lctx.inp_out_ids) { - LLAMA_LOG_WARN("%s: 'lctx.inp_out_ids' is not created\n", __func__); + //LLAMA_LOG_WARN("%s: 'lctx.inp_out_ids' is not created\n", __func__); } else { const int64_t n_tokens = ubatch.n_tokens; diff --git a/src/llama.cpp b/src/llama.cpp index 09a483238..0f02c314d 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -11124,7 +11124,7 @@ static int llama_decode_impl( GGML_ASSERT_CONTINUE(n_tokens_all <= cparams.n_batch); - GGML_ASSERT_CONTINUE((cparams.causal_attn || cparams.n_ubatch >= n_tokens_all) && "non-causal attention requires n_ubatch >= n_tokens"); + //GGML_ASSERT_CONTINUE((cparams.causal_attn || cparams.n_ubatch >= n_tokens_all) && "non-causal attention requires n_ubatch >= n_tokens"); if (lctx.t_compute_start_us == 0) { lctx.t_compute_start_us = ggml_time_us();