some tweaks and cleanup

2025-09-11 17:44:38 +00:00 · 2025-01-13 23:50:54 +08:00 · 2025-01-13 23:50:54 +08:00 · e77d566268
commit e77d566268
parent 636beac6d2
3 changed files with 8 additions and 4 deletions
--- a/otherarch/tts_adapter.cpp
+++ b/otherarch/tts_adapter.cpp
@ -697,7 +697,7 @@ tts_generation_outputs ttstype_generate(const tts_generation_inputs inputs)

    if(!inputs.quiet)
    {
-        printf("\nTTS Generating (%d input tokens)...", prompt_inp.size());
+        printf("\nTTS Processing (%d input tokens)...\n", prompt_inp.size());
    }

    prompt_add(prompt_inp, model_ttc, "<|text_end|>\n<|audio_start|>\n", false, true);
@ -771,6 +771,10 @@ tts_generation_outputs ttstype_generate(const tts_generation_inputs inputs)
            output.status = 0;
            return output;
        }
+        if(!inputs.quiet)
+        {
+            printf("\rTTS Generating (%d AudioTokens)", n_decode);
+        }
    }

    if(!inputs.quiet && ttsdebugmode==1)
@ -818,7 +822,7 @@ tts_generation_outputs ttstype_generate(const tts_generation_inputs inputs)
            audio[i] = 0.0f;
        }
        //add some silence at the end
-        for (int i = 0; i < 24000/20; ++i) {
+        for (int i = 0; i < 24000/10; ++i) {
            audio.push_back(0.0f);
        }

--- a/src/llama-context.cpp
+++ b/src/llama-context.cpp
@ -87,7 +87,7 @@ void llama_set_inputs(llama_context & lctx, const llama_ubatch & ubatch) {
        //GGML_ASSERT(lctx.inp_out_ids && "every model that can must skip unused outputs");

        if (!lctx.inp_out_ids) {
-            LLAMA_LOG_WARN("%s: 'lctx.inp_out_ids' is not created\n", __func__);
+            //LLAMA_LOG_WARN("%s: 'lctx.inp_out_ids' is not created\n", __func__);
        } else {
            const int64_t n_tokens = ubatch.n_tokens;

--- a/src/llama.cpp
+++ b/src/llama.cpp
@ -11124,7 +11124,7 @@ static int llama_decode_impl(

    GGML_ASSERT_CONTINUE(n_tokens_all <= cparams.n_batch);

-    GGML_ASSERT_CONTINUE((cparams.causal_attn || cparams.n_ubatch >= n_tokens_all) && "non-causal attention requires n_ubatch >= n_tokens");
+    //GGML_ASSERT_CONTINUE((cparams.causal_attn || cparams.n_ubatch >= n_tokens_all) && "non-causal attention requires n_ubatch >= n_tokens");

    if (lctx.t_compute_start_us == 0) {
        lctx.t_compute_start_us = ggml_time_us();