standardize tts linting and formatting

This commit is contained in:
Concedo 2025-08-17 14:11:30 +08:00
parent cfc1a0d4ef
commit 9935ac093f
24 changed files with 371 additions and 355 deletions

View file

@ -474,7 +474,7 @@ set_target_properties(whisper_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON)
add_library(tts_adapter add_library(tts_adapter
otherarch/tts_adapter.cpp) otherarch/tts_adapter.cpp)
target_include_directories(tts_adapter PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./vendor/stb ./vendor ./tools ./common) target_include_directories(tts_adapter PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./vendor/stb ./vendor ./otherarch/ttscpp/include ./otherarch/ttscpp/src ./tools ./common)
target_compile_features(tts_adapter PUBLIC cxx_std_17) # don't bump target_compile_features(tts_adapter PUBLIC cxx_std_17) # don't bump
target_link_libraries(tts_adapter PRIVATE common2 ggml ${LLAMA_EXTRA_LIBS}) target_link_libraries(tts_adapter PRIVATE common2 ggml ${LLAMA_EXTRA_LIBS})
set_target_properties(tts_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON) set_target_properties(tts_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON)

View file

@ -729,7 +729,7 @@ mainvk: tools/main/main.cpp common/arg.cpp build-info.h ggml_v4_vulkan.o ggml-cp
$(CXX) $(CXXFLAGS) -DGGML_USE_VULKAN -DSD_USE_VULKAN $(filter-out %.h,$^) -o $@ $(LDFLAGS) $(CXX) $(CXXFLAGS) -DGGML_USE_VULKAN -DSD_USE_VULKAN $(filter-out %.h,$^) -o $@ $(LDFLAGS)
embedding: examples/embedding/embedding.cpp common/arg.cpp build-info.h ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o ggml-repack.o $(OBJS_FULL) $(OBJS) embedding: examples/embedding/embedding.cpp common/arg.cpp build-info.h ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o ggml-repack.o $(OBJS_FULL) $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
ttscppmain: otherarch/ttscpp/cli/cli.cpp otherarch/ttscpp/cli/playback.cpp otherarch/ttscpp/cli/playback.h otherarch/ttscpp/cli/write_file.cpp otherarch/ttscpp/cli/write_file.h otherarch/ttscpp/cli/vad.cpp otherarch/ttscpp/cli/vad.h otherarch/ttscpp/src/tts.cpp otherarch/ttscpp/src/tokenizer.cpp otherarch/ttscpp/src/sampler.cpp otherarch/ttscpp/src/parler_model.cpp otherarch/ttscpp/src/dac_model.cpp otherarch/ttscpp/src/ttsutil.cpp otherarch/ttscpp/src/args.cpp otherarch/ttscpp/src/t5_encoder_model.cpp otherarch/ttscpp/src/phonemizer.cpp otherarch/ttscpp/src/tts_model.cpp otherarch/ttscpp/src/kokoro_model.cpp otherarch/ttscpp/src/dia_model.cpp otherarch/ttscpp/src/orpheus_model.cpp otherarch/ttscpp/src/snac_model.cpp otherarch/ttscpp/src/general_neural_audio_codec.cpp ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o ggml-repack.o $(OBJS_FULL) $(OBJS) ttscppmain: otherarch/ttscpp/cli/cli.cpp otherarch/ttscpp/cli/playback.cpp otherarch/ttscpp/cli/playback.h otherarch/ttscpp/cli/write_file.cpp otherarch/ttscpp/cli/write_file.h otherarch/ttscpp/cli/vad.cpp otherarch/ttscpp/cli/vad.h otherarch/ttscpp/src/tts.cpp otherarch/ttscpp/src/ttstokenizer.cpp otherarch/ttscpp/src/ttssampler.cpp otherarch/ttscpp/src/parler_model.cpp otherarch/ttscpp/src/dac_model.cpp otherarch/ttscpp/src/ttsutil.cpp otherarch/ttscpp/src/ttsargs.cpp otherarch/ttscpp/src/ttst5_encoder_model.cpp otherarch/ttscpp/src/phonemizer.cpp otherarch/ttscpp/src/tts_model.cpp otherarch/ttscpp/src/kokoro_model.cpp otherarch/ttscpp/src/dia_model.cpp otherarch/ttscpp/src/orpheus_model.cpp otherarch/ttscpp/src/snac_model.cpp otherarch/ttscpp/src/general_neural_audio_codec.cpp ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o ggml-repack.o $(OBJS_FULL) $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
ggml/src/ggml-vulkan-shaders.cpp: ggml/src/ggml-vulkan-shaders.cpp:

View file

@ -25,6 +25,22 @@
#define M_PI 3.14159265358979323846 #define M_PI 3.14159265358979323846
#endif #endif
//imports required for tts.cpp to work
#include "tts.cpp"
#include "ttstokenizer.cpp"
#include "ttssampler.cpp"
#include "parler_model.cpp"
#include "dac_model.cpp"
#include "ttsutil.cpp"
#include "ttst5_encoder_model.cpp"
#include "phonemizer.cpp"
#include "tts_model.cpp"
#include "kokoro_model.cpp"
#include "dia_model.cpp"
#include "orpheus_model.cpp"
#include "snac_model.cpp"
#include "general_neural_audio_codec.cpp"
enum TTS_VER enum TTS_VER
{ {
TTS_VER_2, TTS_VER_2,

View file

@ -12,7 +12,7 @@
#include <unordered_map> #include <unordered_map>
#include <map> #include <map>
#include <unordered_set> #include <unordered_set>
#include "tokenizer.h" #include "ttstokenizer.h"
#include <algorithm> #include <algorithm>
#include <mutex> #include <mutex>
@ -323,7 +323,7 @@ public:
#endif #endif
enum lookup_code { enum lookup_code {
SUCCESS = 100, SUCCESS_TOTAL = 100,
SUCCESS_PARTIAL = 101, SUCCESS_PARTIAL = 101,
FAILURE_UNFOUND = 200, FAILURE_UNFOUND = 200,
FAILURE_PHONETIC = 201, FAILURE_PHONETIC = 201,

View file

@ -1,7 +1,7 @@
#pragma once #pragma once
#include "dac_model.h" #include "dac_model.h"
#include "sampler.h" #include "ttssampler.h"
struct dia_encoder_layer { struct dia_encoder_layer {
struct ggml_tensor * k; struct ggml_tensor * k;

View file

@ -3,7 +3,7 @@
#include <stdlib.h> #include <stdlib.h>
#include "tts_model.h" #include "tts_model.h"
#include "tokenizer.h" #include "ttstokenizer.h"
#include "phonemizer.h" #include "phonemizer.h"
// Rather than using ISO 639-2 language codes, Kokoro voice pack specify their corresponding language via their first letter. // Rather than using ISO 639-2 language codes, Kokoro voice pack specify their corresponding language via their first letter.

View file

@ -1,7 +1,7 @@
#pragma once #pragma once
#include "sampler.h" #include "ttssampler.h"
#include "tokenizer.h" #include "ttstokenizer.h"
#include "snac_model.h" #include "snac_model.h"
// Orpheus uses vLLM with a llama-3 architecture. The only critical difference from the normal llama architecture is the use of kv heads. // Orpheus uses vLLM with a llama-3 architecture. The only critical difference from the normal llama architecture is the use of kv heads.

View file

@ -2,8 +2,8 @@
#define parler_model_h #define parler_model_h
#include "dac_model.h" #include "dac_model.h"
#include "t5_encoder_model.h" #include "ttst5_encoder_model.h"
#include "sampler.h" #include "ttssampler.h"
enum parler_tensor { enum parler_tensor {
PARLER_EMBD, PARLER_EMBD,

View file

@ -543,7 +543,7 @@ dictionary_response * phoneme_dictionary::lookup(corpus * text, std::string valu
} }
std::vector<dictionary_response*> possibilities = lookup_map.at(value); std::vector<dictionary_response*> possibilities = lookup_map.at(value);
for (auto possible : possibilities) { for (auto possible : possibilities) {
if (possible->code == SUCCESS || (possible->code == SUCCESS_PARTIAL && possible->is_match(text, flags))) { if (possible->code == SUCCESS_TOTAL || (possible->code == SUCCESS_PARTIAL && possible->is_match(text, flags))) {
return possible; return possible;
} }
} }
@ -818,7 +818,7 @@ bool phonemizer::process_word(corpus* text, std::string* output, std::string wor
output->append(" "); output->append(" ");
} }
flags->update_for_word(word); flags->update_for_word(word);
if (response->code != SUCCESS) { if (response->code != SUCCESS_TOTAL) {
word += response->after_match; word += response->after_match;
output->append(response->value); output->append(response->value);
text->size_pop(word.size()+unaccented_size_difference); text->size_pop(word.size()+unaccented_size_difference);
@ -1072,7 +1072,7 @@ dictionary_response * response_from_string(std::string value, std::string key) {
bool not_at_start = key[0] == '#'; bool not_at_start = key[0] == '#';
bool not_at_end = key.back() == '#'; bool not_at_end = key.back() == '#';
if (!has_spacing) { if (!has_spacing) {
dictionary_response * resp = new dictionary_response(SUCCESS, value); dictionary_response * resp = new dictionary_response(SUCCESS_TOTAL, value);
resp->expects_to_be_proceeded_by_number = expects_to_be_proceeded_by_number; resp->expects_to_be_proceeded_by_number = expects_to_be_proceeded_by_number;
resp->not_at_clause_end = not_at_end; resp->not_at_clause_end = not_at_end;
resp->not_at_clause_start = not_at_start; resp->not_at_clause_start = not_at_start;

View file

@ -1,4 +1,4 @@
#include "sampler.h" #include "ttssampler.h"
void sampler::sample(float * logits, std::vector<uint32_t> & output_tokens) { void sampler::sample(float * logits, std::vector<uint32_t> & output_tokens) {
// assume that we are pointing to the start of the first token output; // assume that we are pointing to the start of the first token output;

View file

@ -1,4 +1,4 @@
#include "t5_encoder_model.h" #include "ttst5_encoder_model.h"
static const std::map<std::string, t5_tensor> T5_TENSOR_GGUF_LOOKUP = { static const std::map<std::string, t5_tensor> T5_TENSOR_GGUF_LOOKUP = {
{"t5encoder.token_embd", T5_EMBD}, {"t5encoder.token_embd", T5_EMBD},

View file

@ -2,7 +2,7 @@
#define t5_encoder_model_h #define t5_encoder_model_h
#include "tts_model.h" #include "tts_model.h"
#include "tokenizer.h" #include "ttstokenizer.h"
enum t5_tensor { enum t5_tensor {

View file

@ -1,4 +1,4 @@
#include "tokenizer.h" #include "ttstokenizer.h"
void token_trie::add(const std::string & gram, uint32_t token) { void token_trie::add(const std::string & gram, uint32_t token) {
_add(gram, token, 0); _add(gram, token, 0);