standardize tts linting and formatting

2025-09-11 09:34:37 +00:00 · 2025-08-17 14:11:30 +08:00 · 2025-08-17 14:11:30 +08:00 · 9935ac093f
commit 9935ac093f
parent cfc1a0d4ef
24 changed files with 371 additions and 355 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -474,7 +474,7 @@ set_target_properties(whisper_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON)

 add_library(tts_adapter
            otherarch/tts_adapter.cpp)
-target_include_directories(tts_adapter PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./vendor/stb ./vendor ./tools ./common)
+target_include_directories(tts_adapter PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./vendor/stb ./vendor ./otherarch/ttscpp/include ./otherarch/ttscpp/src ./tools ./common)
 target_compile_features(tts_adapter PUBLIC cxx_std_17) # don't bump
 target_link_libraries(tts_adapter PRIVATE common2 ggml ${LLAMA_EXTRA_LIBS})
 set_target_properties(tts_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON)
--- a/2
+++ b/2
@ -729,7 +729,7 @@ mainvk: tools/main/main.cpp common/arg.cpp build-info.h ggml_v4_vulkan.o ggml-cp
 	$(CXX) $(CXXFLAGS) -DGGML_USE_VULKAN -DSD_USE_VULKAN $(filter-out %.h,$^) -o $@ $(LDFLAGS)
 embedding: examples/embedding/embedding.cpp common/arg.cpp build-info.h ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o ggml-repack.o $(OBJS_FULL) $(OBJS)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
-ttscppmain: otherarch/ttscpp/cli/cli.cpp otherarch/ttscpp/cli/playback.cpp otherarch/ttscpp/cli/playback.h otherarch/ttscpp/cli/write_file.cpp otherarch/ttscpp/cli/write_file.h otherarch/ttscpp/cli/vad.cpp otherarch/ttscpp/cli/vad.h otherarch/ttscpp/src/tts.cpp otherarch/ttscpp/src/tokenizer.cpp otherarch/ttscpp/src/sampler.cpp otherarch/ttscpp/src/parler_model.cpp otherarch/ttscpp/src/dac_model.cpp otherarch/ttscpp/src/ttsutil.cpp otherarch/ttscpp/src/args.cpp otherarch/ttscpp/src/t5_encoder_model.cpp otherarch/ttscpp/src/phonemizer.cpp otherarch/ttscpp/src/tts_model.cpp otherarch/ttscpp/src/kokoro_model.cpp otherarch/ttscpp/src/dia_model.cpp otherarch/ttscpp/src/orpheus_model.cpp otherarch/ttscpp/src/snac_model.cpp otherarch/ttscpp/src/general_neural_audio_codec.cpp ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o ggml-repack.o $(OBJS_FULL) $(OBJS)
+ttscppmain: otherarch/ttscpp/cli/cli.cpp otherarch/ttscpp/cli/playback.cpp otherarch/ttscpp/cli/playback.h otherarch/ttscpp/cli/write_file.cpp otherarch/ttscpp/cli/write_file.h otherarch/ttscpp/cli/vad.cpp otherarch/ttscpp/cli/vad.h otherarch/ttscpp/src/tts.cpp otherarch/ttscpp/src/ttstokenizer.cpp otherarch/ttscpp/src/ttssampler.cpp otherarch/ttscpp/src/parler_model.cpp otherarch/ttscpp/src/dac_model.cpp otherarch/ttscpp/src/ttsutil.cpp otherarch/ttscpp/src/ttsargs.cpp otherarch/ttscpp/src/ttst5_encoder_model.cpp otherarch/ttscpp/src/phonemizer.cpp otherarch/ttscpp/src/tts_model.cpp otherarch/ttscpp/src/kokoro_model.cpp otherarch/ttscpp/src/dia_model.cpp otherarch/ttscpp/src/orpheus_model.cpp otherarch/ttscpp/src/snac_model.cpp otherarch/ttscpp/src/general_neural_audio_codec.cpp ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o ggml-repack.o $(OBJS_FULL) $(OBJS)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)

 ggml/src/ggml-vulkan-shaders.cpp:
--- a/otherarch/tts_adapter.cpp
+++ b/otherarch/tts_adapter.cpp
@ -25,6 +25,22 @@
 #define M_PI		3.14159265358979323846
 #endif

+//imports required for tts.cpp to work
+#include "tts.cpp"
+#include "ttstokenizer.cpp"
+#include "ttssampler.cpp"
+#include "parler_model.cpp"
+#include "dac_model.cpp"
+#include "ttsutil.cpp"
+#include "ttst5_encoder_model.cpp"
+#include "phonemizer.cpp"
+#include "tts_model.cpp"
+#include "kokoro_model.cpp"
+#include "dia_model.cpp"
+#include "orpheus_model.cpp"
+#include "snac_model.cpp"
+#include "general_neural_audio_codec.cpp"
+
 enum TTS_VER
 {
    TTS_VER_2,
--- a/otherarch/ttscpp/include/phonemizer.h
+++ b/otherarch/ttscpp/include/phonemizer.h
@ -12,7 +12,7 @@
 #include <unordered_map>
 #include <map>
 #include <unordered_set>
-#include "tokenizer.h"
+#include "ttstokenizer.h"
 #include <algorithm>
 #include <mutex>

@ -323,7 +323,7 @@ public:
 #endif

 enum lookup_code {
-	SUCCESS = 100,
+	SUCCESS_TOTAL = 100,
 	SUCCESS_PARTIAL = 101,
 	FAILURE_UNFOUND = 200,
 	FAILURE_PHONETIC = 201,
--- a/otherarch/ttscpp/src/dia_model.h
+++ b/otherarch/ttscpp/src/dia_model.h
@ -1,7 +1,7 @@
 #pragma once

 #include "dac_model.h"
-#include "sampler.h"
+#include "ttssampler.h"

 struct dia_encoder_layer {
    struct ggml_tensor * k;
--- a/otherarch/ttscpp/src/kokoro_model.h
+++ b/otherarch/ttscpp/src/kokoro_model.h
@ -3,7 +3,7 @@

 #include <stdlib.h>
 #include "tts_model.h"
-#include "tokenizer.h"
+#include "ttstokenizer.h"
 #include "phonemizer.h"

 // Rather than using ISO 639-2 language codes, Kokoro voice pack specify their corresponding language via their first letter.
--- a/otherarch/ttscpp/src/orpheus_model.h
+++ b/otherarch/ttscpp/src/orpheus_model.h
@ -1,7 +1,7 @@
 #pragma once

-#include "sampler.h"
-#include "tokenizer.h"
+#include "ttssampler.h"
+#include "ttstokenizer.h"
 #include "snac_model.h"

 // Orpheus uses vLLM with a llama-3 architecture. The only critical difference from the normal llama architecture is the use of kv heads.
--- a/otherarch/ttscpp/src/parler_model.h
+++ b/otherarch/ttscpp/src/parler_model.h
@ -2,8 +2,8 @@
 #define parler_model_h

 #include "dac_model.h"
-#include "t5_encoder_model.h"
-#include "sampler.h"
+#include "ttst5_encoder_model.h"
+#include "ttssampler.h"

 enum parler_tensor {
    PARLER_EMBD,
--- a/otherarch/ttscpp/src/phonemizer.cpp
+++ b/otherarch/ttscpp/src/phonemizer.cpp
@ -543,7 +543,7 @@ dictionary_response * phoneme_dictionary::lookup(corpus * text, std::string valu
 	}
 	std::vector<dictionary_response*> possibilities = lookup_map.at(value);
 	for (auto possible : possibilities) {
-		if (possible->code == SUCCESS || (possible->code == SUCCESS_PARTIAL && possible->is_match(text, flags))) {
+		if (possible->code == SUCCESS_TOTAL || (possible->code == SUCCESS_PARTIAL && possible->is_match(text, flags))) {
 			return possible;
 		}
 	}
@ -818,7 +818,7 @@ bool phonemizer::process_word(corpus* text, std::string* output, std::string wor
 			output->append(" ");
 		}
 		flags->update_for_word(word);
-		if (response->code != SUCCESS) {
+		if (response->code != SUCCESS_TOTAL) {
 			word += response->after_match;
 			output->append(response->value);
 			text->size_pop(word.size()+unaccented_size_difference);
@ -1072,7 +1072,7 @@ dictionary_response * response_from_string(std::string value, std::string key) {
 	bool not_at_start = key[0] == '#';
 	bool not_at_end = key.back() == '#';
    if (!has_spacing) {
-    	dictionary_response * resp = new dictionary_response(SUCCESS, value);
+    	dictionary_response * resp = new dictionary_response(SUCCESS_TOTAL, value);
    	resp->expects_to_be_proceeded_by_number = expects_to_be_proceeded_by_number;
    	resp->not_at_clause_end = not_at_end;
    	resp->not_at_clause_start = not_at_start;
--- a/otherarch/ttscpp/src/ttsargs.cpp
+++ b/otherarch/ttscpp/src/ttsargs.cpp
--- a/otherarch/ttscpp/src/ttssampler.cpp
+++ b/otherarch/ttscpp/src/ttssampler.cpp
@ -1,4 +1,4 @@
-#include "sampler.h"
+#include "ttssampler.h"

 void sampler::sample(float * logits, std::vector<uint32_t> & output_tokens) {
    // assume that we are pointing to the start of the first token output;
--- a/otherarch/ttscpp/src/ttssampler.h
+++ b/otherarch/ttscpp/src/ttssampler.h
--- a/otherarch/ttscpp/src/ttst5_encoder_model.cpp
+++ b/otherarch/ttscpp/src/ttst5_encoder_model.cpp
@ -1,4 +1,4 @@
-#include "t5_encoder_model.h"
+#include "ttst5_encoder_model.h"

 static const std::map<std::string, t5_tensor> T5_TENSOR_GGUF_LOOKUP = {
    {"t5encoder.token_embd", T5_EMBD},
--- a/otherarch/ttscpp/src/ttst5_encoder_model.h
+++ b/otherarch/ttscpp/src/ttst5_encoder_model.h
@ -2,7 +2,7 @@
 #define t5_encoder_model_h

 #include "tts_model.h"
-#include "tokenizer.h"
+#include "ttstokenizer.h"


 enum t5_tensor {
--- a/otherarch/ttscpp/src/ttstokenizer.cpp
+++ b/otherarch/ttscpp/src/ttstokenizer.cpp
@ -1,4 +1,4 @@
-#include "tokenizer.h"
+#include "ttstokenizer.h"

 void token_trie::add(const std::string & gram, uint32_t token) {
    _add(gram, token, 0);
--- a/otherarch/ttscpp/src/ttstokenizer.h
+++ b/otherarch/ttscpp/src/ttstokenizer.h