mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 17:14:36 +00:00
tts cpp model is now loadable in kcpp
This commit is contained in:
parent
9935ac093f
commit
52606e9b1d
14 changed files with 131 additions and 223 deletions
2
Makefile
2
Makefile
|
@ -729,7 +729,7 @@ mainvk: tools/main/main.cpp common/arg.cpp build-info.h ggml_v4_vulkan.o ggml-cp
|
||||||
$(CXX) $(CXXFLAGS) -DGGML_USE_VULKAN -DSD_USE_VULKAN $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) -DGGML_USE_VULKAN -DSD_USE_VULKAN $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||||
embedding: examples/embedding/embedding.cpp common/arg.cpp build-info.h ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o ggml-repack.o $(OBJS_FULL) $(OBJS)
|
embedding: examples/embedding/embedding.cpp common/arg.cpp build-info.h ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o ggml-repack.o $(OBJS_FULL) $(OBJS)
|
||||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||||
ttscppmain: otherarch/ttscpp/cli/cli.cpp otherarch/ttscpp/cli/playback.cpp otherarch/ttscpp/cli/playback.h otherarch/ttscpp/cli/write_file.cpp otherarch/ttscpp/cli/write_file.h otherarch/ttscpp/cli/vad.cpp otherarch/ttscpp/cli/vad.h otherarch/ttscpp/src/tts.cpp otherarch/ttscpp/src/ttstokenizer.cpp otherarch/ttscpp/src/ttssampler.cpp otherarch/ttscpp/src/parler_model.cpp otherarch/ttscpp/src/dac_model.cpp otherarch/ttscpp/src/ttsutil.cpp otherarch/ttscpp/src/ttsargs.cpp otherarch/ttscpp/src/ttst5_encoder_model.cpp otherarch/ttscpp/src/phonemizer.cpp otherarch/ttscpp/src/tts_model.cpp otherarch/ttscpp/src/kokoro_model.cpp otherarch/ttscpp/src/dia_model.cpp otherarch/ttscpp/src/orpheus_model.cpp otherarch/ttscpp/src/snac_model.cpp otherarch/ttscpp/src/general_neural_audio_codec.cpp ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o ggml-repack.o $(OBJS_FULL) $(OBJS)
|
ttscppmain: otherarch/ttscpp/cli/cli.cpp otherarch/ttscpp/cli/playback.cpp otherarch/ttscpp/cli/playback.h otherarch/ttscpp/cli/write_file.cpp otherarch/ttscpp/cli/write_file.h otherarch/ttscpp/cli/vad.cpp otherarch/ttscpp/cli/vad.h otherarch/ttscpp/src/ttscpp.cpp otherarch/ttscpp/src/ttstokenizer.cpp otherarch/ttscpp/src/ttssampler.cpp otherarch/ttscpp/src/parler_model.cpp otherarch/ttscpp/src/dac_model.cpp otherarch/ttscpp/src/ttsutil.cpp otherarch/ttscpp/src/ttsargs.cpp otherarch/ttscpp/src/ttst5_encoder_model.cpp otherarch/ttscpp/src/phonemizer.cpp otherarch/ttscpp/src/tts_model.cpp otherarch/ttscpp/src/kokoro_model.cpp otherarch/ttscpp/src/dia_model.cpp otherarch/ttscpp/src/orpheus_model.cpp otherarch/ttscpp/src/snac_model.cpp otherarch/ttscpp/src/general_neural_audio_codec.cpp ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o ggml-repack.o $(OBJS_FULL) $(OBJS)
|
||||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||||
|
|
||||||
ggml/src/ggml-vulkan-shaders.cpp:
|
ggml/src/ggml-vulkan-shaders.cpp:
|
||||||
|
|
|
@ -1,5 +0,0 @@
|
||||||
set(TARGET llama-diffusion-cli)
|
|
||||||
add_executable(${TARGET} diffusion-cli.cpp)
|
|
||||||
install(TARGETS ${TARGET} RUNTIME)
|
|
||||||
target_link_libraries(${TARGET} PRIVATE llama common ${CMAKE_THREAD_LIBS_INIT})
|
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_17)
|
|
|
@ -1,54 +0,0 @@
|
||||||
cmake_minimum_required(VERSION 3.13)
|
|
||||||
|
|
||||||
find_package(Python3 REQUIRED)
|
|
||||||
|
|
||||||
# Shader locations
|
|
||||||
set(SHADER_DIR "${CMAKE_CURRENT_SOURCE_DIR}/wgsl-shaders")
|
|
||||||
set(SHADER_OUTPUT_DIR "${CMAKE_CURRENT_BINARY_DIR}/generated")
|
|
||||||
set(SHADER_HEADER "${SHADER_OUTPUT_DIR}/ggml-wgsl-shaders.hpp")
|
|
||||||
file(MAKE_DIRECTORY ${SHADER_OUTPUT_DIR})
|
|
||||||
|
|
||||||
message(STATUS "Shader output dir: ${SHADER_OUTPUT_DIR}")
|
|
||||||
|
|
||||||
# Find all WGSL files
|
|
||||||
file(GLOB WGSL_SHADER_FILES "${SHADER_DIR}/*.wgsl")
|
|
||||||
|
|
||||||
# Generate the header using a Python script
|
|
||||||
add_custom_command(
|
|
||||||
OUTPUT ${SHADER_HEADER}
|
|
||||||
COMMAND ${CMAKE_COMMAND} -E echo "Embedding WGSL shaders to ggml-wgsl-shaders.hpp"
|
|
||||||
COMMAND ${CMAKE_COMMAND} -E make_directory ${SHADER_OUTPUT_DIR}
|
|
||||||
COMMAND ${CMAKE_COMMAND} -E env PYTHONIOENCODING=utf-8
|
|
||||||
${Python3_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/wgsl-shaders/embed_wgsl.py
|
|
||||||
--input "${SHADER_DIR}"
|
|
||||||
--output "${SHADER_HEADER}"
|
|
||||||
DEPENDS ${WGSL_SHADER_FILES} ${CMAKE_CURRENT_SOURCE_DIR}/wgsl-shaders/embed_wgsl.py
|
|
||||||
VERBATIM
|
|
||||||
)
|
|
||||||
|
|
||||||
add_custom_target(generate_shaders DEPENDS ${SHADER_HEADER})
|
|
||||||
|
|
||||||
ggml_add_backend_library(ggml-webgpu
|
|
||||||
ggml-webgpu.cpp
|
|
||||||
${SHADER_HEADER}
|
|
||||||
../../include/ggml-webgpu.h
|
|
||||||
)
|
|
||||||
|
|
||||||
add_dependencies(ggml-webgpu generate_shaders)
|
|
||||||
|
|
||||||
if(EMSCRIPTEN)
|
|
||||||
set(EMDAWNWEBGPU_DIR "" CACHE PATH "Path to emdawnwebgpu_pkg")
|
|
||||||
|
|
||||||
target_compile_options(ggml-webgpu PRIVATE "--use-port=${EMDAWNWEBGPU_DIR}/emdawnwebgpu.port.py")
|
|
||||||
target_link_options(ggml-webgpu PRIVATE "--use-port=${EMDAWNWEBGPU_DIR}/emdawnwebgpu.port.py")
|
|
||||||
else()
|
|
||||||
find_package(Dawn REQUIRED)
|
|
||||||
set(DawnWebGPU_TARGET dawn::webgpu_dawn)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (GGML_WEBGPU_DEBUG)
|
|
||||||
target_compile_definitions(ggml-webgpu PRIVATE GGML_WEBGPU_DEBUG=1)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
target_include_directories(ggml-webgpu PRIVATE ${SHADER_OUTPUT_DIR})
|
|
||||||
target_link_libraries(ggml-webgpu PRIVATE ${DawnWebGPU_TARGET})
|
|
|
@ -1,36 +0,0 @@
|
||||||
if (DEFINED ZDNN_ROOT)
|
|
||||||
message(STATUS "zdnn: using ZDNN_ROOT override: ${ZDNN_ROOT}")
|
|
||||||
set(ZDNN_HINT "${ZDNN_ROOT}")
|
|
||||||
else()
|
|
||||||
set(ZDNN_HINT "")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
find_path(ZDNN_INCLUDE
|
|
||||||
NAMES zdnn.h
|
|
||||||
HINTS ${ZDNN_HINT} /usr /usr/local
|
|
||||||
PATH_SUFFIXES include)
|
|
||||||
if (ZDNN_INCLUDE)
|
|
||||||
message(STATUS "zdnn: found include: ${ZDNN_INCLUDE}")
|
|
||||||
else()
|
|
||||||
message(FATAL_ERROR "zdnn: include directory not found, please set ZDNN_ROOT to the proper path if necessary")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
find_library(ZDNN_LIB
|
|
||||||
NAMES zdnn
|
|
||||||
HINTS ${ZDNN_HINT} /usr /usr/local
|
|
||||||
PATH_SUFFIXES lib lib64)
|
|
||||||
if (ZDNN_LIB)
|
|
||||||
message(STATUS "zdnn: found library: ${ZDNN_LIB}")
|
|
||||||
else()
|
|
||||||
message(FATAL_ERROR "zdnn: library not found, please set ZDNN_ROOT to the proper path if necessary")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
file(GLOB GGML_SOURCES_ZDNN "*.c" "*.cpp")
|
|
||||||
file(GLOB GGML_HEADERS_ZDNN "*.h" "*.hpp")
|
|
||||||
|
|
||||||
ggml_add_backend_library(ggml-zdnn ${GGML_HEADERS_ZDNN} ${GGML_SOURCES_ZDNN})
|
|
||||||
target_link_libraries(ggml-zdnn PRIVATE ${ZDNN_LIB})
|
|
||||||
target_include_directories(ggml-zdnn PRIVATE ${ZDNN_INCLUDE})
|
|
||||||
target_link_directories(ggml-zdnn PRIVATE ${ZDNN_LIB})
|
|
||||||
|
|
||||||
target_compile_definitions(ggml-zdnn PRIVATE GGML_USE_ZDNN)
|
|
13
koboldcpp.py
13
koboldcpp.py
|
@ -1826,8 +1826,8 @@ def whisper_generate(genparams):
|
||||||
def tts_load_model(ttc_model_filename,cts_model_filename):
|
def tts_load_model(ttc_model_filename,cts_model_filename):
|
||||||
global args
|
global args
|
||||||
inputs = tts_load_model_inputs()
|
inputs = tts_load_model_inputs()
|
||||||
inputs.ttc_model_filename = ttc_model_filename.encode("UTF-8")
|
inputs.ttc_model_filename = ttc_model_filename.encode("UTF-8") if ttc_model_filename else "".encode("UTF-8")
|
||||||
inputs.cts_model_filename = cts_model_filename.encode("UTF-8")
|
inputs.cts_model_filename = cts_model_filename.encode("UTF-8") if cts_model_filename else "".encode("UTF-8")
|
||||||
inputs.gpulayers = (999 if args.ttsgpu else 0)
|
inputs.gpulayers = (999 if args.ttsgpu else 0)
|
||||||
inputs.flash_attention = args.flashattention
|
inputs.flash_attention = args.flashattention
|
||||||
thds = args.threads
|
thds = args.threads
|
||||||
|
@ -5602,7 +5602,7 @@ def show_gui():
|
||||||
args.embeddingsmaxctx = (0 if embeddings_ctx_var.get()=="" else int(embeddings_ctx_var.get()))
|
args.embeddingsmaxctx = (0 if embeddings_ctx_var.get()=="" else int(embeddings_ctx_var.get()))
|
||||||
args.embeddingsgpu = (embeddings_gpu_var.get()==1)
|
args.embeddingsgpu = (embeddings_gpu_var.get()==1)
|
||||||
|
|
||||||
if tts_model_var.get() != "" and wavtokenizer_var.get() != "":
|
if tts_model_var.get() != "":
|
||||||
args.ttsthreads = (0 if tts_threads_var.get()=="" else int(tts_threads_var.get()))
|
args.ttsthreads = (0 if tts_threads_var.get()=="" else int(tts_threads_var.get()))
|
||||||
args.ttsmodel = tts_model_var.get()
|
args.ttsmodel = tts_model_var.get()
|
||||||
args.ttswavtokenizer = wavtokenizer_var.get()
|
args.ttswavtokenizer = wavtokenizer_var.get()
|
||||||
|
@ -7201,8 +7201,8 @@ def kcpp_main_process(launch_args, g_memory=None, gui_launcher=False):
|
||||||
exit_with_error(3,"Could not load whisper model: " + whispermodel)
|
exit_with_error(3,"Could not load whisper model: " + whispermodel)
|
||||||
|
|
||||||
#handle tts model
|
#handle tts model
|
||||||
if args.ttsmodel and args.ttsmodel!="" and args.ttswavtokenizer and args.ttswavtokenizer!="":
|
if args.ttsmodel and args.ttsmodel!="":
|
||||||
if not os.path.exists(args.ttsmodel) or not os.path.exists(args.ttswavtokenizer):
|
if not os.path.exists(args.ttsmodel) or (args.ttswavtokenizer and args.ttswavtokenizer!="" and not os.path.exists(args.ttswavtokenizer)):
|
||||||
if args.ignoremissing:
|
if args.ignoremissing:
|
||||||
print("Ignoring missing TTS model files!")
|
print("Ignoring missing TTS model files!")
|
||||||
args.ttsmodel = None
|
args.ttsmodel = None
|
||||||
|
@ -7214,7 +7214,8 @@ def kcpp_main_process(launch_args, g_memory=None, gui_launcher=False):
|
||||||
ttsmodelpath = args.ttsmodel
|
ttsmodelpath = args.ttsmodel
|
||||||
ttsmodelpath = os.path.abspath(ttsmodelpath)
|
ttsmodelpath = os.path.abspath(ttsmodelpath)
|
||||||
wavtokpath = args.ttswavtokenizer
|
wavtokpath = args.ttswavtokenizer
|
||||||
wavtokpath = os.path.abspath(wavtokpath)
|
if wavtokpath:
|
||||||
|
wavtokpath = os.path.abspath(wavtokpath)
|
||||||
loadok = tts_load_model(ttsmodelpath,wavtokpath)
|
loadok = tts_load_model(ttsmodelpath,wavtokpath)
|
||||||
print("Load TTS Model OK: " + str(loadok))
|
print("Load TTS Model OK: " + str(loadok))
|
||||||
if not loadok:
|
if not loadok:
|
||||||
|
|
|
@ -115,6 +115,20 @@ bool gguf_tensor_exists(const std::string & gguf_filename, std::string tensor_na
|
||||||
return found;
|
return found;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string gguf_get_model_arch(const std::string & gguf_filename)
|
||||||
|
{
|
||||||
|
struct gguf_init_params ggufparams;
|
||||||
|
ggufparams.no_alloc = true;
|
||||||
|
ggufparams.ctx = NULL;
|
||||||
|
struct gguf_context * ctx = gguf_init_from_file(gguf_filename.c_str(), ggufparams);
|
||||||
|
if (!ctx) return "";
|
||||||
|
auto keyidx = gguf_find_key(ctx, "general.architecture");
|
||||||
|
std::string modelarch = "";
|
||||||
|
if (keyidx != -1) { modelarch = gguf_get_val_str(ctx, keyidx); }
|
||||||
|
gguf_free(ctx);
|
||||||
|
return modelarch;
|
||||||
|
}
|
||||||
|
|
||||||
//return val: 0=fail, 1=(original ggml, alpaca), 2=(ggmf), 3=(ggjt)
|
//return val: 0=fail, 1=(original ggml, alpaca), 2=(ggmf), 3=(ggjt)
|
||||||
FileFormat check_file_format(const std::string & fname, FileFormatExtraMeta * fileformatmeta)
|
FileFormat check_file_format(const std::string & fname, FileFormatExtraMeta * fileformatmeta)
|
||||||
{
|
{
|
||||||
|
|
|
@ -132,6 +132,7 @@ void ContextFastForward(std::vector<int> ¤t_context_tokens, std::vector<in
|
||||||
int &n_past, std::vector<int> &last_n_tokens, const int nctx, std::vector<int> &smartcontext,
|
int &n_past, std::vector<int> &last_n_tokens, const int nctx, std::vector<int> &smartcontext,
|
||||||
const bool useSmartContext, const bool requireFullSubset);
|
const bool useSmartContext, const bool requireFullSubset);
|
||||||
bool gguf_tensor_exists(const std::string & filename, std::string tensor_name, bool exactmatch);
|
bool gguf_tensor_exists(const std::string & filename, std::string tensor_name, bool exactmatch);
|
||||||
|
std::string gguf_get_model_arch(const std::string & filename);
|
||||||
|
|
||||||
size_t gpttype_calc_new_state_kv();
|
size_t gpttype_calc_new_state_kv();
|
||||||
size_t gpttype_calc_new_state_tokencount();
|
size_t gpttype_calc_new_state_tokencount();
|
||||||
|
|
|
@ -357,7 +357,7 @@ void pretty_progress(int step, int steps, float time) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
progress += "|";
|
progress += "|";
|
||||||
printf(time > 1.0f ? "\r%s %i/%i - %.2fs/it" : "\r%s %i/%i - %.2fit/s",
|
printf(time > 1.0f ? "\r%s %i/%i - %.2fs/it" : "\r%s %i/%i - %.2fit/s ",
|
||||||
progress.c_str(), step, steps,
|
progress.c_str(), step, steps,
|
||||||
time > 1.0f || time == 0 ? time : (1.0f / time));
|
time > 1.0f || time == 0 ? time : (1.0f / time));
|
||||||
fflush(stdout); // for linux
|
fflush(stdout); // for linux
|
||||||
|
|
|
@ -26,7 +26,8 @@
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
//imports required for tts.cpp to work
|
//imports required for tts.cpp to work
|
||||||
#include "tts.cpp"
|
#include "ttscommon.h"
|
||||||
|
#include "ttscpp.cpp"
|
||||||
#include "ttstokenizer.cpp"
|
#include "ttstokenizer.cpp"
|
||||||
#include "ttssampler.cpp"
|
#include "ttssampler.cpp"
|
||||||
#include "parler_model.cpp"
|
#include "parler_model.cpp"
|
||||||
|
@ -497,6 +498,10 @@ static int code_terminate_id = 151670;
|
||||||
static int nthreads = 4;
|
static int nthreads = 4;
|
||||||
static int tts_max_len = 4096;
|
static int tts_max_len = 4096;
|
||||||
|
|
||||||
|
//ttscpp specific
|
||||||
|
static generation_configuration * ttscpp_config = nullptr;
|
||||||
|
static struct tts_runner * ttscpp_runner = nullptr;
|
||||||
|
|
||||||
int total_tts_gens = 0;
|
int total_tts_gens = 0;
|
||||||
|
|
||||||
bool ttstype_load_model(const tts_load_model_inputs inputs)
|
bool ttstype_load_model(const tts_load_model_inputs inputs)
|
||||||
|
@ -532,81 +537,103 @@ bool ttstype_load_model(const tts_load_model_inputs inputs)
|
||||||
|
|
||||||
std::string modelfile_ttc = inputs.ttc_model_filename;
|
std::string modelfile_ttc = inputs.ttc_model_filename;
|
||||||
std::string modelfile_cts = inputs.cts_model_filename;
|
std::string modelfile_cts = inputs.cts_model_filename;
|
||||||
printf("\nLoading TTS Model, OuteTTS: %s \nWavTokenizer: %s \n",modelfile_ttc.c_str(),modelfile_cts.c_str());
|
std::string detectedarch = gguf_get_model_arch(modelfile_ttc);
|
||||||
|
|
||||||
|
bool is_ttscpp_file = false;
|
||||||
|
if (detectedarch!="" && SUPPORTED_ARCHITECTURES.find(detectedarch) != SUPPORTED_ARCHITECTURES.end()) {
|
||||||
|
is_ttscpp_file = true;
|
||||||
|
printf("\nLoading TTS.CPP Model Arch: %s \n", detectedarch.c_str());
|
||||||
|
}else{
|
||||||
|
printf("\nLoading OuteTTS Model, OuteTTS: %s \nWavTokenizer: %s \n",modelfile_ttc.c_str(),modelfile_cts.c_str());
|
||||||
|
if(modelfile_ttc=="" || modelfile_cts=="")
|
||||||
|
{
|
||||||
|
printf("\nWarning: KCPP OuteTTS missing a file! Make sure both TTS and WavTokenizer models are loaded.\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
ttsdebugmode = inputs.debugmode;
|
ttsdebugmode = inputs.debugmode;
|
||||||
|
|
||||||
// tts init
|
// tts init
|
||||||
llama_model_params tts_model_params = llama_model_default_params();
|
if (is_ttscpp_file) {
|
||||||
llama_context_params tts_ctx_params = llama_context_default_params();
|
ttscpp_config = new generation_configuration("af_alloy", 50, 1.0, 1.0, true, "", 0, 1.0);
|
||||||
|
ttscpp_runner = runner_from_file(modelfile_ttc, inputs.threads, ttscpp_config, true);
|
||||||
nthreads = inputs.threads;
|
if (ttscpp_runner == nullptr) {
|
||||||
|
printf("\nTTS Load Error: Failed to initialize TTSCPP!\n");
|
||||||
tts_max_len = inputs.ttsmaxlen;
|
return false;
|
||||||
|
|
||||||
tts_model_params.use_mmap = false;
|
|
||||||
tts_model_params.use_mlock = false;
|
|
||||||
tts_model_params.n_gpu_layers = inputs.gpulayers; //offload if possible
|
|
||||||
tts_model_params.split_mode = llama_split_mode::LLAMA_SPLIT_MODE_LAYER;
|
|
||||||
int kcpp_parseinfo_maindevice = inputs.kcpp_main_gpu<=0?0:inputs.kcpp_main_gpu;
|
|
||||||
tts_model_params.main_gpu = kcpp_parseinfo_maindevice;
|
|
||||||
tts_ctx_params.n_ctx = 8192;
|
|
||||||
tts_ctx_params.offload_kqv = true;
|
|
||||||
tts_ctx_params.n_batch = 8192;
|
|
||||||
tts_ctx_params.n_ubatch = 512;
|
|
||||||
tts_ctx_params.n_threads = nthreads;
|
|
||||||
tts_ctx_params.n_threads_batch = nthreads;
|
|
||||||
tts_ctx_params.flash_attn = inputs.flash_attention;
|
|
||||||
tts_ctx_params.kv_unified = true;
|
|
||||||
|
|
||||||
llama_model * ttcmodel = llama_model_load_from_file(modelfile_ttc.c_str(), tts_model_params);
|
|
||||||
ttc_ctx = llama_init_from_model(ttcmodel, tts_ctx_params);
|
|
||||||
|
|
||||||
if (ttc_ctx == nullptr) {
|
|
||||||
printf("\nTTS Load Error: Failed to initialize ttc context!\n");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
llama_model * ctsmodel = llama_model_load_from_file(modelfile_cts.c_str(), tts_model_params);
|
|
||||||
|
|
||||||
tts_ctx_params.embeddings = true; //this requires embeddings instead
|
|
||||||
tts_ctx_params.n_ubatch = tts_ctx_params.n_batch;
|
|
||||||
cts_ctx = llama_init_from_model(ctsmodel, tts_ctx_params);
|
|
||||||
|
|
||||||
if (cts_ctx == nullptr) {
|
|
||||||
printf("\nTTS Load Error: Failed to initialize cts context!\n");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<int> tmp = {1, 2, 3, 4};
|
|
||||||
llama_memory_clear(llama_get_memory(ttc_ctx),true);
|
|
||||||
auto er = llama_decode(ttc_ctx, llama_batch_get_one(tmp.data(), tmp.size()));
|
|
||||||
if(er!=0)
|
|
||||||
{
|
|
||||||
printf("\nTTS Eval returned nonzero: %d\n",er);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
const llama_vocab * ttcvocab = llama_model_get_vocab(ttcmodel);
|
|
||||||
llama_tokens testoks = common_tokenize(ttcvocab,"<|space|>",false,true);
|
|
||||||
if (testoks.size() == 1) {
|
|
||||||
ttsver = TTS_VER_3;
|
|
||||||
printf("\nUsing v0.3 mode");
|
|
||||||
//note that the final word does NOT have a space at the end.
|
|
||||||
space_id = testoks[0];
|
|
||||||
testoks = common_tokenize(ttcvocab,"<|audio_end|>",false,true);
|
|
||||||
if (testoks.size() == 1) {
|
|
||||||
code_terminate_id = testoks[0];
|
|
||||||
}
|
}
|
||||||
} else {
|
} else { //outetts only
|
||||||
ttsver = TTS_VER_2;
|
llama_model_params tts_model_params = llama_model_default_params();
|
||||||
printf("\nUsing v0.2 mode");
|
llama_context_params tts_ctx_params = llama_context_default_params();
|
||||||
}
|
|
||||||
|
|
||||||
//determine offset of <|0|>
|
nthreads = inputs.threads;
|
||||||
testoks = common_tokenize(ttcvocab,"<|0|>",false,true);
|
|
||||||
if (testoks.size() == 1) {
|
tts_max_len = inputs.ttsmaxlen;
|
||||||
cts_offset = testoks[0];
|
|
||||||
|
tts_model_params.use_mmap = false;
|
||||||
|
tts_model_params.use_mlock = false;
|
||||||
|
tts_model_params.n_gpu_layers = inputs.gpulayers; //offload if possible
|
||||||
|
tts_model_params.split_mode = llama_split_mode::LLAMA_SPLIT_MODE_LAYER;
|
||||||
|
int kcpp_parseinfo_maindevice = inputs.kcpp_main_gpu<=0?0:inputs.kcpp_main_gpu;
|
||||||
|
tts_model_params.main_gpu = kcpp_parseinfo_maindevice;
|
||||||
|
tts_ctx_params.n_ctx = 8192;
|
||||||
|
tts_ctx_params.offload_kqv = true;
|
||||||
|
tts_ctx_params.n_batch = 8192;
|
||||||
|
tts_ctx_params.n_ubatch = 512;
|
||||||
|
tts_ctx_params.n_threads = nthreads;
|
||||||
|
tts_ctx_params.n_threads_batch = nthreads;
|
||||||
|
tts_ctx_params.flash_attn = inputs.flash_attention;
|
||||||
|
tts_ctx_params.kv_unified = true;
|
||||||
|
|
||||||
|
llama_model * ttcmodel = llama_model_load_from_file(modelfile_ttc.c_str(), tts_model_params);
|
||||||
|
ttc_ctx = llama_init_from_model(ttcmodel, tts_ctx_params);
|
||||||
|
|
||||||
|
if (ttc_ctx == nullptr) {
|
||||||
|
printf("\nTTS Load Error: Failed to initialize ttc context!\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
llama_model * ctsmodel = llama_model_load_from_file(modelfile_cts.c_str(), tts_model_params);
|
||||||
|
|
||||||
|
tts_ctx_params.embeddings = true; //this requires embeddings instead
|
||||||
|
tts_ctx_params.n_ubatch = tts_ctx_params.n_batch;
|
||||||
|
cts_ctx = llama_init_from_model(ctsmodel, tts_ctx_params);
|
||||||
|
|
||||||
|
if (cts_ctx == nullptr) {
|
||||||
|
printf("\nTTS Load Error: Failed to initialize cts context!\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<int> tmp = {1, 2, 3, 4};
|
||||||
|
llama_memory_clear(llama_get_memory(ttc_ctx),true);
|
||||||
|
auto er = llama_decode(ttc_ctx, llama_batch_get_one(tmp.data(), tmp.size()));
|
||||||
|
if(er!=0)
|
||||||
|
{
|
||||||
|
printf("\nTTS Eval returned nonzero: %d\n",er);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const llama_vocab * ttcvocab = llama_model_get_vocab(ttcmodel);
|
||||||
|
llama_tokens testoks = common_tokenize(ttcvocab,"<|space|>",false,true);
|
||||||
|
if (testoks.size() == 1) {
|
||||||
|
ttsver = TTS_VER_3;
|
||||||
|
printf("\nUsing v0.3 mode");
|
||||||
|
//note that the final word does NOT have a space at the end.
|
||||||
|
space_id = testoks[0];
|
||||||
|
testoks = common_tokenize(ttcvocab,"<|audio_end|>",false,true);
|
||||||
|
if (testoks.size() == 1) {
|
||||||
|
code_terminate_id = testoks[0];
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
ttsver = TTS_VER_2;
|
||||||
|
printf("\nUsing v0.2 mode");
|
||||||
|
}
|
||||||
|
|
||||||
|
//determine offset of <|0|>
|
||||||
|
testoks = common_tokenize(ttcvocab,"<|0|>",false,true);
|
||||||
|
if (testoks.size() == 1) {
|
||||||
|
cts_offset = testoks[0];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
printf("\nTTS Load Complete.\n");
|
printf("\nTTS Load Complete.\n");
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
#include "tts.h"
|
#include "ttscpp.h"
|
||||||
#include "ttsargs.h"
|
#include "ttsargs.h"
|
||||||
#include "ttscommon.h"
|
#include "ttscommon.h"
|
||||||
#include "playback.h"
|
#include "playback.h"
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
#include "tts.h"
|
#include "ttscpp.h"
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
|
|
||||||
// A list of all of the top level GGUF names under kokoro.duration_predictor that have quantization compatible tensors.
|
// A list of all of the top level GGUF names under kokoro.duration_predictor that have quantization compatible tensors.
|
||||||
|
@ -133,15 +133,18 @@ struct tts_runner * runner_from_file(const std::string & fname, int n_threads, g
|
||||||
};
|
};
|
||||||
gguf_context * meta_ctx = gguf_init_from_file(fname.c_str(), params);
|
gguf_context * meta_ctx = gguf_init_from_file(fname.c_str(), params);
|
||||||
if (!meta_ctx) {
|
if (!meta_ctx) {
|
||||||
TTS_ABORT("%s failed for file %s\n", __func__, fname.c_str());
|
fprintf(stdout,"%s failed for file %s\n", __func__, fname.c_str());
|
||||||
|
return nullptr;
|
||||||
}
|
}
|
||||||
int arch_key = gguf_find_key(meta_ctx, "general.architecture");
|
int arch_key = gguf_find_key(meta_ctx, "general.architecture");
|
||||||
if (arch_key == -1) {
|
if (arch_key == -1) {
|
||||||
TTS_ABORT("%s failed for file %s. No architecture is set.\n", __func__, fname.c_str());
|
fprintf(stdout,"%s failed for file %s. No architecture is set.\n", __func__, fname.c_str());
|
||||||
|
return nullptr;
|
||||||
}
|
}
|
||||||
std::string arch = std::string(gguf_get_val_str(meta_ctx, arch_key));
|
std::string arch = std::string(gguf_get_val_str(meta_ctx, arch_key));
|
||||||
if (SUPPORTED_ARCHITECTURES.find(arch) == SUPPORTED_ARCHITECTURES.end()) {
|
if (SUPPORTED_ARCHITECTURES.find(arch) == SUPPORTED_ARCHITECTURES.end()) {
|
||||||
TTS_ABORT("%s failed for file %s. The architecture '%s' is not supported.", __func__, fname.c_str(), arch.c_str());
|
fprintf(stdout,"%s failed for file %s. The architecture '%s' is not supported.", __func__, fname.c_str(), arch.c_str());
|
||||||
|
return nullptr;
|
||||||
}
|
}
|
||||||
tts_arch arch_type = SUPPORTED_ARCHITECTURES.at(arch);
|
tts_arch arch_type = SUPPORTED_ARCHITECTURES.at(arch);
|
||||||
switch(arch_type) {
|
switch(arch_type) {
|
||||||
|
@ -154,7 +157,8 @@ struct tts_runner * runner_from_file(const std::string & fname, int n_threads, g
|
||||||
case ORPHEUS_ARCH:
|
case ORPHEUS_ARCH:
|
||||||
return orpheus_from_file(meta_ctx, weight_ctx, n_threads, config, arch_type, cpu_only);
|
return orpheus_from_file(meta_ctx, weight_ctx, n_threads, config, arch_type, cpu_only);
|
||||||
default:
|
default:
|
||||||
TTS_ABORT("%s failed for file %s. The architecture '%s' is not supported.", __func__, fname.c_str(), arch.c_str());
|
fprintf(stdout,"%s failed for file %s. The architecture '%s' is not supported.", __func__, fname.c_str(), arch.c_str());
|
||||||
|
return nullptr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,39 +0,0 @@
|
||||||
# dependencies
|
|
||||||
|
|
||||||
find_package(Threads REQUIRED)
|
|
||||||
|
|
||||||
# third-party
|
|
||||||
|
|
||||||
# ...
|
|
||||||
|
|
||||||
# flags
|
|
||||||
|
|
||||||
llama_add_compile_flags()
|
|
||||||
|
|
||||||
# tools
|
|
||||||
|
|
||||||
if (EMSCRIPTEN)
|
|
||||||
else()
|
|
||||||
add_subdirectory(batched-bench)
|
|
||||||
add_subdirectory(gguf-split)
|
|
||||||
add_subdirectory(imatrix)
|
|
||||||
add_subdirectory(llama-bench)
|
|
||||||
add_subdirectory(main)
|
|
||||||
add_subdirectory(perplexity)
|
|
||||||
add_subdirectory(quantize)
|
|
||||||
if (LLAMA_BUILD_SERVER)
|
|
||||||
add_subdirectory(server)
|
|
||||||
endif()
|
|
||||||
add_subdirectory(run)
|
|
||||||
add_subdirectory(tokenize)
|
|
||||||
add_subdirectory(tts)
|
|
||||||
add_subdirectory(mtmd)
|
|
||||||
if (GGML_RPC)
|
|
||||||
add_subdirectory(rpc)
|
|
||||||
endif()
|
|
||||||
if (NOT GGML_BACKEND_DL)
|
|
||||||
# these examples use the backends directly and cannot be built with dynamic loading
|
|
||||||
add_subdirectory(cvector-generator)
|
|
||||||
add_subdirectory(export-lora)
|
|
||||||
endif()
|
|
||||||
endif()
|
|
|
@ -1,5 +0,0 @@
|
||||||
set(TARGET llama-tts)
|
|
||||||
add_executable(${TARGET} tts.cpp)
|
|
||||||
install(TARGETS ${TARGET} RUNTIME)
|
|
||||||
target_link_libraries(${TARGET} PRIVATE llama common ${CMAKE_THREAD_LIBS_INIT})
|
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_17)
|
|
Loading…
Add table
Add a link
Reference in a new issue