mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 09:34:37 +00:00
standardize tts linting and formatting
This commit is contained in:
parent
cfc1a0d4ef
commit
9935ac093f
24 changed files with 371 additions and 355 deletions
|
@ -474,7 +474,7 @@ set_target_properties(whisper_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||||
|
|
||||||
add_library(tts_adapter
|
add_library(tts_adapter
|
||||||
otherarch/tts_adapter.cpp)
|
otherarch/tts_adapter.cpp)
|
||||||
target_include_directories(tts_adapter PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./vendor/stb ./vendor ./tools ./common)
|
target_include_directories(tts_adapter PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./vendor/stb ./vendor ./otherarch/ttscpp/include ./otherarch/ttscpp/src ./tools ./common)
|
||||||
target_compile_features(tts_adapter PUBLIC cxx_std_17) # don't bump
|
target_compile_features(tts_adapter PUBLIC cxx_std_17) # don't bump
|
||||||
target_link_libraries(tts_adapter PRIVATE common2 ggml ${LLAMA_EXTRA_LIBS})
|
target_link_libraries(tts_adapter PRIVATE common2 ggml ${LLAMA_EXTRA_LIBS})
|
||||||
set_target_properties(tts_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
set_target_properties(tts_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||||
|
|
2
Makefile
2
Makefile
|
@ -729,7 +729,7 @@ mainvk: tools/main/main.cpp common/arg.cpp build-info.h ggml_v4_vulkan.o ggml-cp
|
||||||
$(CXX) $(CXXFLAGS) -DGGML_USE_VULKAN -DSD_USE_VULKAN $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) -DGGML_USE_VULKAN -DSD_USE_VULKAN $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||||
embedding: examples/embedding/embedding.cpp common/arg.cpp build-info.h ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o ggml-repack.o $(OBJS_FULL) $(OBJS)
|
embedding: examples/embedding/embedding.cpp common/arg.cpp build-info.h ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o ggml-repack.o $(OBJS_FULL) $(OBJS)
|
||||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||||
ttscppmain: otherarch/ttscpp/cli/cli.cpp otherarch/ttscpp/cli/playback.cpp otherarch/ttscpp/cli/playback.h otherarch/ttscpp/cli/write_file.cpp otherarch/ttscpp/cli/write_file.h otherarch/ttscpp/cli/vad.cpp otherarch/ttscpp/cli/vad.h otherarch/ttscpp/src/tts.cpp otherarch/ttscpp/src/tokenizer.cpp otherarch/ttscpp/src/sampler.cpp otherarch/ttscpp/src/parler_model.cpp otherarch/ttscpp/src/dac_model.cpp otherarch/ttscpp/src/ttsutil.cpp otherarch/ttscpp/src/args.cpp otherarch/ttscpp/src/t5_encoder_model.cpp otherarch/ttscpp/src/phonemizer.cpp otherarch/ttscpp/src/tts_model.cpp otherarch/ttscpp/src/kokoro_model.cpp otherarch/ttscpp/src/dia_model.cpp otherarch/ttscpp/src/orpheus_model.cpp otherarch/ttscpp/src/snac_model.cpp otherarch/ttscpp/src/general_neural_audio_codec.cpp ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o ggml-repack.o $(OBJS_FULL) $(OBJS)
|
ttscppmain: otherarch/ttscpp/cli/cli.cpp otherarch/ttscpp/cli/playback.cpp otherarch/ttscpp/cli/playback.h otherarch/ttscpp/cli/write_file.cpp otherarch/ttscpp/cli/write_file.h otherarch/ttscpp/cli/vad.cpp otherarch/ttscpp/cli/vad.h otherarch/ttscpp/src/tts.cpp otherarch/ttscpp/src/ttstokenizer.cpp otherarch/ttscpp/src/ttssampler.cpp otherarch/ttscpp/src/parler_model.cpp otherarch/ttscpp/src/dac_model.cpp otherarch/ttscpp/src/ttsutil.cpp otherarch/ttscpp/src/ttsargs.cpp otherarch/ttscpp/src/ttst5_encoder_model.cpp otherarch/ttscpp/src/phonemizer.cpp otherarch/ttscpp/src/tts_model.cpp otherarch/ttscpp/src/kokoro_model.cpp otherarch/ttscpp/src/dia_model.cpp otherarch/ttscpp/src/orpheus_model.cpp otherarch/ttscpp/src/snac_model.cpp otherarch/ttscpp/src/general_neural_audio_codec.cpp ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o ggml-repack.o $(OBJS_FULL) $(OBJS)
|
||||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||||
|
|
||||||
ggml/src/ggml-vulkan-shaders.cpp:
|
ggml/src/ggml-vulkan-shaders.cpp:
|
||||||
|
|
|
@ -25,6 +25,22 @@
|
||||||
#define M_PI 3.14159265358979323846
|
#define M_PI 3.14159265358979323846
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
//imports required for tts.cpp to work
|
||||||
|
#include "tts.cpp"
|
||||||
|
#include "ttstokenizer.cpp"
|
||||||
|
#include "ttssampler.cpp"
|
||||||
|
#include "parler_model.cpp"
|
||||||
|
#include "dac_model.cpp"
|
||||||
|
#include "ttsutil.cpp"
|
||||||
|
#include "ttst5_encoder_model.cpp"
|
||||||
|
#include "phonemizer.cpp"
|
||||||
|
#include "tts_model.cpp"
|
||||||
|
#include "kokoro_model.cpp"
|
||||||
|
#include "dia_model.cpp"
|
||||||
|
#include "orpheus_model.cpp"
|
||||||
|
#include "snac_model.cpp"
|
||||||
|
#include "general_neural_audio_codec.cpp"
|
||||||
|
|
||||||
enum TTS_VER
|
enum TTS_VER
|
||||||
{
|
{
|
||||||
TTS_VER_2,
|
TTS_VER_2,
|
||||||
|
|
|
@ -12,7 +12,7 @@
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <unordered_set>
|
#include <unordered_set>
|
||||||
#include "tokenizer.h"
|
#include "ttstokenizer.h"
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
|
|
||||||
|
@ -323,7 +323,7 @@ public:
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
enum lookup_code {
|
enum lookup_code {
|
||||||
SUCCESS = 100,
|
SUCCESS_TOTAL = 100,
|
||||||
SUCCESS_PARTIAL = 101,
|
SUCCESS_PARTIAL = 101,
|
||||||
FAILURE_UNFOUND = 200,
|
FAILURE_UNFOUND = 200,
|
||||||
FAILURE_PHONETIC = 201,
|
FAILURE_PHONETIC = 201,
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "dac_model.h"
|
#include "dac_model.h"
|
||||||
#include "sampler.h"
|
#include "ttssampler.h"
|
||||||
|
|
||||||
struct dia_encoder_layer {
|
struct dia_encoder_layer {
|
||||||
struct ggml_tensor * k;
|
struct ggml_tensor * k;
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
|
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include "tts_model.h"
|
#include "tts_model.h"
|
||||||
#include "tokenizer.h"
|
#include "ttstokenizer.h"
|
||||||
#include "phonemizer.h"
|
#include "phonemizer.h"
|
||||||
|
|
||||||
// Rather than using ISO 639-2 language codes, Kokoro voice pack specify their corresponding language via their first letter.
|
// Rather than using ISO 639-2 language codes, Kokoro voice pack specify their corresponding language via their first letter.
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "sampler.h"
|
#include "ttssampler.h"
|
||||||
#include "tokenizer.h"
|
#include "ttstokenizer.h"
|
||||||
#include "snac_model.h"
|
#include "snac_model.h"
|
||||||
|
|
||||||
// Orpheus uses vLLM with a llama-3 architecture. The only critical difference from the normal llama architecture is the use of kv heads.
|
// Orpheus uses vLLM with a llama-3 architecture. The only critical difference from the normal llama architecture is the use of kv heads.
|
||||||
|
|
|
@ -2,8 +2,8 @@
|
||||||
#define parler_model_h
|
#define parler_model_h
|
||||||
|
|
||||||
#include "dac_model.h"
|
#include "dac_model.h"
|
||||||
#include "t5_encoder_model.h"
|
#include "ttst5_encoder_model.h"
|
||||||
#include "sampler.h"
|
#include "ttssampler.h"
|
||||||
|
|
||||||
enum parler_tensor {
|
enum parler_tensor {
|
||||||
PARLER_EMBD,
|
PARLER_EMBD,
|
||||||
|
|
|
@ -543,7 +543,7 @@ dictionary_response * phoneme_dictionary::lookup(corpus * text, std::string valu
|
||||||
}
|
}
|
||||||
std::vector<dictionary_response*> possibilities = lookup_map.at(value);
|
std::vector<dictionary_response*> possibilities = lookup_map.at(value);
|
||||||
for (auto possible : possibilities) {
|
for (auto possible : possibilities) {
|
||||||
if (possible->code == SUCCESS || (possible->code == SUCCESS_PARTIAL && possible->is_match(text, flags))) {
|
if (possible->code == SUCCESS_TOTAL || (possible->code == SUCCESS_PARTIAL && possible->is_match(text, flags))) {
|
||||||
return possible;
|
return possible;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -818,7 +818,7 @@ bool phonemizer::process_word(corpus* text, std::string* output, std::string wor
|
||||||
output->append(" ");
|
output->append(" ");
|
||||||
}
|
}
|
||||||
flags->update_for_word(word);
|
flags->update_for_word(word);
|
||||||
if (response->code != SUCCESS) {
|
if (response->code != SUCCESS_TOTAL) {
|
||||||
word += response->after_match;
|
word += response->after_match;
|
||||||
output->append(response->value);
|
output->append(response->value);
|
||||||
text->size_pop(word.size()+unaccented_size_difference);
|
text->size_pop(word.size()+unaccented_size_difference);
|
||||||
|
@ -1072,7 +1072,7 @@ dictionary_response * response_from_string(std::string value, std::string key) {
|
||||||
bool not_at_start = key[0] == '#';
|
bool not_at_start = key[0] == '#';
|
||||||
bool not_at_end = key.back() == '#';
|
bool not_at_end = key.back() == '#';
|
||||||
if (!has_spacing) {
|
if (!has_spacing) {
|
||||||
dictionary_response * resp = new dictionary_response(SUCCESS, value);
|
dictionary_response * resp = new dictionary_response(SUCCESS_TOTAL, value);
|
||||||
resp->expects_to_be_proceeded_by_number = expects_to_be_proceeded_by_number;
|
resp->expects_to_be_proceeded_by_number = expects_to_be_proceeded_by_number;
|
||||||
resp->not_at_clause_end = not_at_end;
|
resp->not_at_clause_end = not_at_end;
|
||||||
resp->not_at_clause_start = not_at_start;
|
resp->not_at_clause_start = not_at_start;
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
#include "sampler.h"
|
#include "ttssampler.h"
|
||||||
|
|
||||||
void sampler::sample(float * logits, std::vector<uint32_t> & output_tokens) {
|
void sampler::sample(float * logits, std::vector<uint32_t> & output_tokens) {
|
||||||
// assume that we are pointing to the start of the first token output;
|
// assume that we are pointing to the start of the first token output;
|
|
@ -1,4 +1,4 @@
|
||||||
#include "t5_encoder_model.h"
|
#include "ttst5_encoder_model.h"
|
||||||
|
|
||||||
static const std::map<std::string, t5_tensor> T5_TENSOR_GGUF_LOOKUP = {
|
static const std::map<std::string, t5_tensor> T5_TENSOR_GGUF_LOOKUP = {
|
||||||
{"t5encoder.token_embd", T5_EMBD},
|
{"t5encoder.token_embd", T5_EMBD},
|
|
@ -2,7 +2,7 @@
|
||||||
#define t5_encoder_model_h
|
#define t5_encoder_model_h
|
||||||
|
|
||||||
#include "tts_model.h"
|
#include "tts_model.h"
|
||||||
#include "tokenizer.h"
|
#include "ttstokenizer.h"
|
||||||
|
|
||||||
|
|
||||||
enum t5_tensor {
|
enum t5_tensor {
|
|
@ -1,4 +1,4 @@
|
||||||
#include "tokenizer.h"
|
#include "ttstokenizer.h"
|
||||||
|
|
||||||
void token_trie::add(const std::string & gram, uint32_t token) {
|
void token_trie::add(const std::string & gram, uint32_t token) {
|
||||||
_add(gram, token, 0);
|
_add(gram, token, 0);
|
Loading…
Add table
Add a link
Reference in a new issue