tts cpp model is now loadable in kcpp

2025-09-10 17:14:36 +00:00 · 2025-08-17 15:47:22 +08:00 · 2025-08-17 15:47:22 +08:00 · 52606e9b1d
commit 52606e9b1d
parent 9935ac093f
14 changed files with 131 additions and 223 deletions
--- a/2
+++ b/2
@ -729,7 +729,7 @@ mainvk: tools/main/main.cpp common/arg.cpp build-info.h ggml_v4_vulkan.o ggml-cp
 	$(CXX) $(CXXFLAGS) -DGGML_USE_VULKAN -DSD_USE_VULKAN $(filter-out %.h,$^) -o $@ $(LDFLAGS)
 embedding: examples/embedding/embedding.cpp common/arg.cpp build-info.h ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o ggml-repack.o $(OBJS_FULL) $(OBJS)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
-ttscppmain: otherarch/ttscpp/cli/cli.cpp otherarch/ttscpp/cli/playback.cpp otherarch/ttscpp/cli/playback.h otherarch/ttscpp/cli/write_file.cpp otherarch/ttscpp/cli/write_file.h otherarch/ttscpp/cli/vad.cpp otherarch/ttscpp/cli/vad.h otherarch/ttscpp/src/tts.cpp otherarch/ttscpp/src/ttstokenizer.cpp otherarch/ttscpp/src/ttssampler.cpp otherarch/ttscpp/src/parler_model.cpp otherarch/ttscpp/src/dac_model.cpp otherarch/ttscpp/src/ttsutil.cpp otherarch/ttscpp/src/ttsargs.cpp otherarch/ttscpp/src/ttst5_encoder_model.cpp otherarch/ttscpp/src/phonemizer.cpp otherarch/ttscpp/src/tts_model.cpp otherarch/ttscpp/src/kokoro_model.cpp otherarch/ttscpp/src/dia_model.cpp otherarch/ttscpp/src/orpheus_model.cpp otherarch/ttscpp/src/snac_model.cpp otherarch/ttscpp/src/general_neural_audio_codec.cpp ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o ggml-repack.o $(OBJS_FULL) $(OBJS)
+ttscppmain: otherarch/ttscpp/cli/cli.cpp otherarch/ttscpp/cli/playback.cpp otherarch/ttscpp/cli/playback.h otherarch/ttscpp/cli/write_file.cpp otherarch/ttscpp/cli/write_file.h otherarch/ttscpp/cli/vad.cpp otherarch/ttscpp/cli/vad.h otherarch/ttscpp/src/ttscpp.cpp otherarch/ttscpp/src/ttstokenizer.cpp otherarch/ttscpp/src/ttssampler.cpp otherarch/ttscpp/src/parler_model.cpp otherarch/ttscpp/src/dac_model.cpp otherarch/ttscpp/src/ttsutil.cpp otherarch/ttscpp/src/ttsargs.cpp otherarch/ttscpp/src/ttst5_encoder_model.cpp otherarch/ttscpp/src/phonemizer.cpp otherarch/ttscpp/src/tts_model.cpp otherarch/ttscpp/src/kokoro_model.cpp otherarch/ttscpp/src/dia_model.cpp otherarch/ttscpp/src/orpheus_model.cpp otherarch/ttscpp/src/snac_model.cpp otherarch/ttscpp/src/general_neural_audio_codec.cpp ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o ggml-repack.o $(OBJS_FULL) $(OBJS)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)

 ggml/src/ggml-vulkan-shaders.cpp:
--- a/examples/diffusion/CMakeLists.txt
+++ b/examples/diffusion/CMakeLists.txt
@ -1,5 +0,0 @@
-set(TARGET llama-diffusion-cli)
-add_executable(${TARGET} diffusion-cli.cpp)
-install(TARGETS ${TARGET} RUNTIME)
-target_link_libraries(${TARGET} PRIVATE llama common ${CMAKE_THREAD_LIBS_INIT})
-target_compile_features(${TARGET} PRIVATE cxx_std_17)
--- a/ggml/src/ggml-webgpu/CMakeLists.txt
+++ b/ggml/src/ggml-webgpu/CMakeLists.txt
@ -1,54 +0,0 @@
-cmake_minimum_required(VERSION 3.13)
-
-find_package(Python3 REQUIRED)
-
-# Shader locations
-set(SHADER_DIR "${CMAKE_CURRENT_SOURCE_DIR}/wgsl-shaders")
-set(SHADER_OUTPUT_DIR "${CMAKE_CURRENT_BINARY_DIR}/generated")
-set(SHADER_HEADER "${SHADER_OUTPUT_DIR}/ggml-wgsl-shaders.hpp")
-file(MAKE_DIRECTORY ${SHADER_OUTPUT_DIR})
-
-message(STATUS "Shader output dir: ${SHADER_OUTPUT_DIR}")
-
-# Find all WGSL files
-file(GLOB WGSL_SHADER_FILES "${SHADER_DIR}/*.wgsl")
-
-# Generate the header using a Python script
-add_custom_command(
-    OUTPUT ${SHADER_HEADER}
-    COMMAND ${CMAKE_COMMAND} -E echo "Embedding WGSL shaders to ggml-wgsl-shaders.hpp"
-    COMMAND ${CMAKE_COMMAND} -E make_directory ${SHADER_OUTPUT_DIR}
-    COMMAND ${CMAKE_COMMAND} -E env PYTHONIOENCODING=utf-8
-        ${Python3_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/wgsl-shaders/embed_wgsl.py
-            --input "${SHADER_DIR}"
-            --output "${SHADER_HEADER}"
-    DEPENDS ${WGSL_SHADER_FILES} ${CMAKE_CURRENT_SOURCE_DIR}/wgsl-shaders/embed_wgsl.py
-    VERBATIM
-)
-
-add_custom_target(generate_shaders DEPENDS ${SHADER_HEADER})
-
-ggml_add_backend_library(ggml-webgpu
-    ggml-webgpu.cpp
-    ${SHADER_HEADER}
-    ../../include/ggml-webgpu.h
-)
-
-add_dependencies(ggml-webgpu generate_shaders)
-
-if(EMSCRIPTEN)
-    set(EMDAWNWEBGPU_DIR "" CACHE PATH "Path to emdawnwebgpu_pkg")
-
-    target_compile_options(ggml-webgpu PRIVATE "--use-port=${EMDAWNWEBGPU_DIR}/emdawnwebgpu.port.py")
-    target_link_options(ggml-webgpu PRIVATE "--use-port=${EMDAWNWEBGPU_DIR}/emdawnwebgpu.port.py")
-else()
-    find_package(Dawn REQUIRED)
-    set(DawnWebGPU_TARGET dawn::webgpu_dawn)
-endif()
-
-if (GGML_WEBGPU_DEBUG)
-    target_compile_definitions(ggml-webgpu PRIVATE GGML_WEBGPU_DEBUG=1)
-endif()
-
-target_include_directories(ggml-webgpu PRIVATE ${SHADER_OUTPUT_DIR})
-target_link_libraries(ggml-webgpu PRIVATE ${DawnWebGPU_TARGET})
--- a/ggml/src/ggml-zdnn/CMakeLists.txt
+++ b/ggml/src/ggml-zdnn/CMakeLists.txt
@ -1,36 +0,0 @@
-if (DEFINED ZDNN_ROOT)
-    message(STATUS "zdnn: using ZDNN_ROOT override: ${ZDNN_ROOT}")
-    set(ZDNN_HINT "${ZDNN_ROOT}")
-else()
-    set(ZDNN_HINT "")
-endif()
-
-find_path(ZDNN_INCLUDE
-            NAMES zdnn.h
-            HINTS ${ZDNN_HINT} /usr /usr/local
-            PATH_SUFFIXES include)
-if (ZDNN_INCLUDE)
-    message(STATUS "zdnn: found include: ${ZDNN_INCLUDE}")
-else()
-    message(FATAL_ERROR "zdnn: include directory not found, please set ZDNN_ROOT to the proper path if necessary")
-endif()
-
-find_library(ZDNN_LIB
-                NAMES zdnn
-                HINTS ${ZDNN_HINT} /usr /usr/local
-                PATH_SUFFIXES lib lib64)
-if (ZDNN_LIB)
-    message(STATUS "zdnn: found library: ${ZDNN_LIB}")
-else()
-    message(FATAL_ERROR "zdnn: library not found, please set ZDNN_ROOT to the proper path if necessary")
-endif()
-
-file(GLOB GGML_SOURCES_ZDNN "*.c" "*.cpp")
-file(GLOB GGML_HEADERS_ZDNN "*.h" "*.hpp")
-
-ggml_add_backend_library(ggml-zdnn ${GGML_HEADERS_ZDNN} ${GGML_SOURCES_ZDNN})
-target_link_libraries(ggml-zdnn PRIVATE ${ZDNN_LIB})
-target_include_directories(ggml-zdnn PRIVATE ${ZDNN_INCLUDE})
-target_link_directories(ggml-zdnn PRIVATE ${ZDNN_LIB})
-
-target_compile_definitions(ggml-zdnn PRIVATE GGML_USE_ZDNN)
--- a/koboldcpp.py
+++ b/koboldcpp.py
@ -1826,8 +1826,8 @@ def whisper_generate(genparams):
 def tts_load_model(ttc_model_filename,cts_model_filename):
    global args
    inputs = tts_load_model_inputs()
-    inputs.ttc_model_filename = ttc_model_filename.encode("UTF-8")
-    inputs.cts_model_filename = cts_model_filename.encode("UTF-8")
+    inputs.ttc_model_filename = ttc_model_filename.encode("UTF-8") if ttc_model_filename else "".encode("UTF-8")
+    inputs.cts_model_filename = cts_model_filename.encode("UTF-8") if cts_model_filename else "".encode("UTF-8")
    inputs.gpulayers = (999 if args.ttsgpu else 0)
    inputs.flash_attention =  args.flashattention
    thds = args.threads
@ -5602,7 +5602,7 @@ def show_gui():
            args.embeddingsmaxctx = (0 if embeddings_ctx_var.get()=="" else int(embeddings_ctx_var.get()))
        args.embeddingsgpu = (embeddings_gpu_var.get()==1)

-        if tts_model_var.get() != "" and wavtokenizer_var.get() != "":
+        if tts_model_var.get() != "":
            args.ttsthreads = (0 if tts_threads_var.get()=="" else int(tts_threads_var.get()))
            args.ttsmodel = tts_model_var.get()
            args.ttswavtokenizer = wavtokenizer_var.get()
@ -7201,8 +7201,8 @@ def kcpp_main_process(launch_args, g_memory=None, gui_launcher=False):
                exit_with_error(3,"Could not load whisper model: " + whispermodel)

    #handle tts model
-    if args.ttsmodel and args.ttsmodel!="" and args.ttswavtokenizer and args.ttswavtokenizer!="":
-        if not os.path.exists(args.ttsmodel) or not os.path.exists(args.ttswavtokenizer):
+    if args.ttsmodel and args.ttsmodel!="":
+        if not os.path.exists(args.ttsmodel) or (args.ttswavtokenizer and args.ttswavtokenizer!="" and not os.path.exists(args.ttswavtokenizer)):
            if args.ignoremissing:
                print("Ignoring missing TTS model files!")
                args.ttsmodel = None
@ -7214,6 +7214,7 @@ def kcpp_main_process(launch_args, g_memory=None, gui_launcher=False):
            ttsmodelpath = args.ttsmodel
            ttsmodelpath = os.path.abspath(ttsmodelpath)
            wavtokpath = args.ttswavtokenizer
+            if wavtokpath:
                wavtokpath = os.path.abspath(wavtokpath)
            loadok = tts_load_model(ttsmodelpath,wavtokpath)
            print("Load TTS Model OK: " + str(loadok))
--- a/model_adapter.cpp
+++ b/model_adapter.cpp
@ -115,6 +115,20 @@ bool gguf_tensor_exists(const std::string & gguf_filename, std::string tensor_na
    return found;
 }

+std::string gguf_get_model_arch(const std::string & gguf_filename)
+{
+    struct gguf_init_params ggufparams;
+    ggufparams.no_alloc = true;
+    ggufparams.ctx = NULL;
+    struct gguf_context * ctx = gguf_init_from_file(gguf_filename.c_str(), ggufparams);
+    if (!ctx) return "";
+    auto keyidx = gguf_find_key(ctx, "general.architecture");
+    std::string modelarch = "";
+    if (keyidx != -1) { modelarch = gguf_get_val_str(ctx, keyidx); }
+    gguf_free(ctx);
+    return modelarch;
+}
+
 //return val: 0=fail, 1=(original ggml, alpaca), 2=(ggmf), 3=(ggjt)
 FileFormat check_file_format(const std::string & fname, FileFormatExtraMeta * fileformatmeta)
 {
--- a/model_adapter.h
+++ b/model_adapter.h
@ -132,6 +132,7 @@ void ContextFastForward(std::vector<int> &current_context_tokens, std::vector<in
 int &n_past, std::vector<int> &last_n_tokens, const int nctx, std::vector<int> &smartcontext,
 const bool useSmartContext, const bool requireFullSubset);
 bool gguf_tensor_exists(const std::string & filename, std::string tensor_name, bool exactmatch);
+std::string gguf_get_model_arch(const std::string & filename);

 size_t gpttype_calc_new_state_kv();
 size_t gpttype_calc_new_state_tokencount();
--- a/otherarch/sdcpp/util.cpp
+++ b/otherarch/sdcpp/util.cpp
@ -357,7 +357,7 @@ void pretty_progress(int step, int steps, float time) {
        }
    }
    progress += "|";
-    printf(time > 1.0f ? "\r%s %i/%i - %.2fs/it" : "\r%s %i/%i - %.2fit/s",
+    printf(time > 1.0f ? "\r%s %i/%i - %.2fs/it" : "\r%s %i/%i - %.2fit/s    ",
           progress.c_str(), step, steps,
           time > 1.0f || time == 0 ? time : (1.0f / time));
    fflush(stdout);  // for linux
--- a/otherarch/tts_adapter.cpp
+++ b/otherarch/tts_adapter.cpp
@ -26,7 +26,8 @@
 #endif

 //imports required for tts.cpp to work
-#include "tts.cpp"
+#include "ttscommon.h"
+#include "ttscpp.cpp"
 #include "ttstokenizer.cpp"
 #include "ttssampler.cpp"
 #include "parler_model.cpp"
@ -497,6 +498,10 @@ static int code_terminate_id = 151670;
 static int nthreads = 4;
 static int tts_max_len = 4096;

+//ttscpp specific
+static generation_configuration * ttscpp_config = nullptr;
+static struct tts_runner * ttscpp_runner = nullptr;
+
 int total_tts_gens = 0;

 bool ttstype_load_model(const tts_load_model_inputs inputs)
@ -532,11 +537,32 @@ bool ttstype_load_model(const tts_load_model_inputs inputs)

    std::string modelfile_ttc = inputs.ttc_model_filename;
    std::string modelfile_cts = inputs.cts_model_filename;
-    printf("\nLoading TTS Model, OuteTTS: %s \nWavTokenizer: %s \n",modelfile_ttc.c_str(),modelfile_cts.c_str());
+    std::string detectedarch = gguf_get_model_arch(modelfile_ttc);
+
+    bool is_ttscpp_file = false;
+    if (detectedarch!="" && SUPPORTED_ARCHITECTURES.find(detectedarch) != SUPPORTED_ARCHITECTURES.end()) {
+        is_ttscpp_file = true;
+        printf("\nLoading TTS.CPP Model Arch: %s \n", detectedarch.c_str());
+    }else{
+        printf("\nLoading OuteTTS Model, OuteTTS: %s \nWavTokenizer: %s \n",modelfile_ttc.c_str(),modelfile_cts.c_str());
+        if(modelfile_ttc=="" || modelfile_cts=="")
+        {
+             printf("\nWarning: KCPP OuteTTS missing a file! Make sure both TTS and WavTokenizer models are loaded.\n");
+              return false;
+        }
+    }

    ttsdebugmode = inputs.debugmode;

    // tts init
+    if (is_ttscpp_file) {
+        ttscpp_config = new generation_configuration("af_alloy", 50, 1.0, 1.0, true, "", 0, 1.0);
+        ttscpp_runner = runner_from_file(modelfile_ttc, inputs.threads, ttscpp_config, true);
+        if (ttscpp_runner == nullptr) {
+            printf("\nTTS Load Error: Failed to initialize TTSCPP!\n");
+            return false;
+        }
+    } else { //outetts only
        llama_model_params tts_model_params = llama_model_default_params();
        llama_context_params tts_ctx_params = llama_context_default_params();

@ -608,6 +634,7 @@ bool ttstype_load_model(const tts_load_model_inputs inputs)
        if (testoks.size() == 1) {
            cts_offset = testoks[0];
        }
+    }

    printf("\nTTS Load Complete.\n");
    return true;
--- a/otherarch/ttscpp/cli/cli.cpp
+++ b/otherarch/ttscpp/cli/cli.cpp
@ -1,4 +1,4 @@
-#include "tts.h"
+#include "ttscpp.h"
 #include "ttsargs.h"
 #include "ttscommon.h"
 #include "playback.h"
--- a/otherarch/ttscpp/include/ttscpp.h
+++ b/otherarch/ttscpp/include/ttscpp.h
--- a/otherarch/ttscpp/src/ttscpp.cpp
+++ b/otherarch/ttscpp/src/ttscpp.cpp
@ -1,4 +1,4 @@
-#include "tts.h"
+#include "ttscpp.h"
 #include <mutex>

 // A list of all of the top level GGUF names under kokoro.duration_predictor that have quantization compatible tensors.
@ -133,15 +133,18 @@ struct tts_runner * runner_from_file(const std::string & fname, int n_threads, g
    };
    gguf_context * meta_ctx = gguf_init_from_file(fname.c_str(), params);
    if (!meta_ctx) {
-        TTS_ABORT("%s failed for file %s\n", __func__, fname.c_str());
+        fprintf(stdout,"%s failed for file %s\n", __func__, fname.c_str());
+        return nullptr;
    }
    int arch_key = gguf_find_key(meta_ctx, "general.architecture");
    if (arch_key == -1) {
-        TTS_ABORT("%s failed for file %s. No architecture is set.\n", __func__, fname.c_str());
+        fprintf(stdout,"%s failed for file %s. No architecture is set.\n", __func__, fname.c_str());
+        return nullptr;
    }
    std::string arch = std::string(gguf_get_val_str(meta_ctx, arch_key));
    if (SUPPORTED_ARCHITECTURES.find(arch) == SUPPORTED_ARCHITECTURES.end()) {
-        TTS_ABORT("%s failed for file %s. The architecture '%s' is not supported.", __func__, fname.c_str(), arch.c_str());
+        fprintf(stdout,"%s failed for file %s. The architecture '%s' is not supported.", __func__, fname.c_str(), arch.c_str());
+        return nullptr;
    }
    tts_arch arch_type = SUPPORTED_ARCHITECTURES.at(arch);
    switch(arch_type) {
@ -154,7 +157,8 @@ struct tts_runner * runner_from_file(const std::string & fname, int n_threads, g
        case ORPHEUS_ARCH:
            return orpheus_from_file(meta_ctx, weight_ctx, n_threads, config, arch_type, cpu_only);
        default:
-            TTS_ABORT("%s failed for file %s. The architecture '%s' is not supported.", __func__, fname.c_str(), arch.c_str());
+            fprintf(stdout,"%s failed for file %s. The architecture '%s' is not supported.", __func__, fname.c_str(), arch.c_str());
+            return nullptr;
    }
 }

--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@ -1,39 +0,0 @@
-# dependencies
-
-find_package(Threads REQUIRED)
-
-# third-party
-
-# ...
-
-# flags
-
-llama_add_compile_flags()
-
-# tools
-
-if (EMSCRIPTEN)
-else()
-    add_subdirectory(batched-bench)
-    add_subdirectory(gguf-split)
-    add_subdirectory(imatrix)
-    add_subdirectory(llama-bench)
-    add_subdirectory(main)
-    add_subdirectory(perplexity)
-    add_subdirectory(quantize)
-    if (LLAMA_BUILD_SERVER)
-        add_subdirectory(server)
-    endif()
-    add_subdirectory(run)
-    add_subdirectory(tokenize)
-    add_subdirectory(tts)
-    add_subdirectory(mtmd)
-    if (GGML_RPC)
-        add_subdirectory(rpc)
-    endif()
-    if (NOT GGML_BACKEND_DL)
-        # these examples use the backends directly and cannot be built with dynamic loading
-        add_subdirectory(cvector-generator)
-        add_subdirectory(export-lora)
-    endif()
-endif()
--- a/tools/tts/CMakeLists.txt
+++ b/tools/tts/CMakeLists.txt
@ -1,5 +0,0 @@
-set(TARGET llama-tts)
-add_executable(${TARGET} tts.cpp)
-install(TARGETS ${TARGET} RUNTIME)
-target_link_libraries(${TARGET} PRIVATE llama common ${CMAKE_THREAD_LIBS_INIT})
-target_compile_features(${TARGET} PRIVATE cxx_std_17)