diff --git a/CMakeLists.txt b/CMakeLists.txt
index 50ff864f0..9aec2b4e8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -474,7 +474,7 @@ set_target_properties(whisper_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON)
 
 add_library(tts_adapter
             otherarch/tts_adapter.cpp)
-target_include_directories(tts_adapter PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./vendor/stb ./vendor ./tools ./common)
+target_include_directories(tts_adapter PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./vendor/stb ./vendor ./otherarch/ttscpp/include ./otherarch/ttscpp/src ./tools ./common)
 target_compile_features(tts_adapter PUBLIC cxx_std_17) # don't bump
 target_link_libraries(tts_adapter PRIVATE common2 ggml ${LLAMA_EXTRA_LIBS})
 set_target_properties(tts_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON)
diff --git a/Makefile b/Makefile
index 2400dffff..0edece97d 100644
--- a/Makefile
+++ b/Makefile
@@ -729,7 +729,7 @@ mainvk: tools/main/main.cpp common/arg.cpp build-info.h ggml_v4_vulkan.o ggml-cp
 	$(CXX) $(CXXFLAGS) -DGGML_USE_VULKAN -DSD_USE_VULKAN $(filter-out %.h,$^) -o $@ $(LDFLAGS)
 embedding: examples/embedding/embedding.cpp common/arg.cpp build-info.h ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o ggml-repack.o $(OBJS_FULL) $(OBJS)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
-ttscppmain: otherarch/ttscpp/cli/cli.cpp otherarch/ttscpp/cli/playback.cpp otherarch/ttscpp/cli/playback.h otherarch/ttscpp/cli/write_file.cpp otherarch/ttscpp/cli/write_file.h otherarch/ttscpp/cli/vad.cpp otherarch/ttscpp/cli/vad.h otherarch/ttscpp/src/tts.cpp otherarch/ttscpp/src/tokenizer.cpp otherarch/ttscpp/src/sampler.cpp otherarch/ttscpp/src/parler_model.cpp otherarch/ttscpp/src/dac_model.cpp otherarch/ttscpp/src/ttsutil.cpp otherarch/ttscpp/src/args.cpp otherarch/ttscpp/src/t5_encoder_model.cpp otherarch/ttscpp/src/phonemizer.cpp otherarch/ttscpp/src/tts_model.cpp otherarch/ttscpp/src/kokoro_model.cpp otherarch/ttscpp/src/dia_model.cpp otherarch/ttscpp/src/orpheus_model.cpp otherarch/ttscpp/src/snac_model.cpp otherarch/ttscpp/src/general_neural_audio_codec.cpp ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o ggml-repack.o $(OBJS_FULL) $(OBJS)
+ttscppmain: otherarch/ttscpp/cli/cli.cpp otherarch/ttscpp/cli/playback.cpp otherarch/ttscpp/cli/playback.h otherarch/ttscpp/cli/write_file.cpp otherarch/ttscpp/cli/write_file.h otherarch/ttscpp/cli/vad.cpp otherarch/ttscpp/cli/vad.h otherarch/ttscpp/src/tts.cpp otherarch/ttscpp/src/ttstokenizer.cpp otherarch/ttscpp/src/ttssampler.cpp otherarch/ttscpp/src/parler_model.cpp otherarch/ttscpp/src/dac_model.cpp otherarch/ttscpp/src/ttsutil.cpp otherarch/ttscpp/src/ttsargs.cpp otherarch/ttscpp/src/ttst5_encoder_model.cpp otherarch/ttscpp/src/phonemizer.cpp otherarch/ttscpp/src/tts_model.cpp otherarch/ttscpp/src/kokoro_model.cpp otherarch/ttscpp/src/dia_model.cpp otherarch/ttscpp/src/orpheus_model.cpp otherarch/ttscpp/src/snac_model.cpp otherarch/ttscpp/src/general_neural_audio_codec.cpp ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o ggml-repack.o $(OBJS_FULL) $(OBJS)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
 
 ggml/src/ggml-vulkan-shaders.cpp:
diff --git a/otherarch/tts_adapter.cpp b/otherarch/tts_adapter.cpp
index 86caf67e5..81fb47b70 100644
--- a/otherarch/tts_adapter.cpp
+++ b/otherarch/tts_adapter.cpp
@@ -25,6 +25,22 @@
 #define M_PI		3.14159265358979323846
 #endif
 
+//imports required for tts.cpp to work
+#include "tts.cpp"
+#include "ttstokenizer.cpp"
+#include "ttssampler.cpp"
+#include "parler_model.cpp"
+#include "dac_model.cpp"
+#include "ttsutil.cpp"
+#include "ttst5_encoder_model.cpp"
+#include "phonemizer.cpp"
+#include "tts_model.cpp"
+#include "kokoro_model.cpp"
+#include "dia_model.cpp"
+#include "orpheus_model.cpp"
+#include "snac_model.cpp"
+#include "general_neural_audio_codec.cpp"
+
 enum TTS_VER
 {
     TTS_VER_2,
diff --git a/otherarch/ttscpp/cli/vad.cpp b/otherarch/ttscpp/cli/vad.cpp
index 9468ef2fa..ab966c3be 100644
--- a/otherarch/ttscpp/cli/vad.cpp
+++ b/otherarch/ttscpp/cli/vad.cpp
@@ -9,8 +9,8 @@ float energy(float * chunk, int count) {
 }
 
 void apply_energy_voice_inactivity_detection(
-	tts_response & data, 
-	float sample_rate, 
+	tts_response & data,
+	float sample_rate,
 	int ms_per_frame,
 	int frame_threshold,
 	float normalized_energy_threshold,
diff --git a/otherarch/ttscpp/include/audio_file.h b/otherarch/ttscpp/include/audio_file.h
index dd1f50fb3..cf79de446 100644
--- a/otherarch/ttscpp/include/audio_file.h
+++ b/otherarch/ttscpp/include/audio_file.h
@@ -81,23 +81,23 @@ template <class T>
 class AudioFile
 {
 public:
-    
+
     //=============================================================
     typedef std::vector<std::vector<T> > AudioBuffer;
-    
+
     //=============================================================
     /** Constructor */
     AudioFile();
-    
+
     /** Constructor, using a given file path to load a file */
     AudioFile (std::string filePath);
-        
+
     //=============================================================
     /** Loads an audio file from a given file path.
      * @Returns true if the file was successfully loaded
      */
     bool load (std::string filePath);
-    
+
     /** Saves an audio file to a given file path.
      * @Returns true if the file was successfully saved
      */
@@ -107,66 +107,66 @@ public:
      * @Returns true if the write was successful
      */
     bool writeData (std::vector<uint8_t>& fileData, AudioFileFormat format = AudioFileFormat::Wave);
-        
+
     //=============================================================
     /** Loads an audio file from data in memory */
     bool loadFromMemory (std::vector<uint8_t>& fileData);
-    
+
     //=============================================================
     /** @Returns the sample rate */
     uint32_t getSampleRate() const;
-    
+
     /** @Returns the number of audio channels in the buffer */
     int getNumChannels() const;
 
     /** @Returns true if the audio file is mono */
     bool isMono() const;
-    
+
     /** @Returns true if the audio file is stereo */
     bool isStereo() const;
-    
+
     /** @Returns the bit depth of each sample */
     int getBitDepth() const;
-    
+
     /** @Returns the number of samples per channel */
     int getNumSamplesPerChannel() const;
-    
+
     /** @Returns the length in seconds of the audio file based on the number of samples and sample rate */
     double getLengthInSeconds() const;
-    
+
     /** Prints a summary of the audio file to the console */
     void printSummary() const;
-    
+
     //=============================================================
-    
+
     /** Set the audio buffer for this AudioFile by copying samples from another buffer.
      * @Returns true if the buffer was copied successfully.
      */
     bool setAudioBuffer (AudioBuffer& newBuffer);
-    
+
     /** Sets the audio buffer to a given number of channels and number of samples per channel. This will try to preserve
      * the existing audio, adding zeros to any new channels or new samples in a given channel.
      */
     void setAudioBufferSize (int numChannels, int numSamples);
-    
+
     /** Sets the number of samples per channel in the audio buffer. This will try to preserve
      * the existing audio, adding zeros to new samples in a given channel if the number of samples is increased.
      */
     void setNumSamplesPerChannel (int numSamples);
-    
+
     /** Sets the number of channels. New channels will have the correct number of samples and be initialised to zero */
     void setNumChannels (int numChannels);
-    
+
     /** Sets the bit depth for the audio file. If you use the save() function, this bit depth rate will be used */
     void setBitDepth (int numBitsPerSample);
-    
+
     /** Sets the sample rate for the audio file. If you use the save() function, this sample rate will be used */
     void setSampleRate (uint32_t newSampleRate);
-    
+
     //=============================================================
     /** Sets whether the library should log error messages to the console. By default this is true */
     void shouldLogErrorsToConsole (bool logErrors);
-    
+
     //=============================================================
     /** A vector of vectors holding the audio samples for the AudioFile. You can
      * access the samples by channel and then by sample index, i.e:
@@ -174,21 +174,21 @@ public:
      *      samples[channel][sampleIndex]
      */
     AudioBuffer samples;
-    
+
     //=============================================================
     /** An optional iXML chunk that can be added to the AudioFile.
      */
     std::string iXMLChunk;
-    
+
 private:
-    
+
     //=============================================================
     enum class Endianness
     {
         LittleEndian,
         BigEndian
     };
-    
+
     //=============================================================
     AudioFileFormat determineAudioFileFormat (std::vector<uint8_t>& fileData);
     bool decodeWaveFile (std::vector<uint8_t>& fileData);
@@ -202,10 +202,10 @@ private:
     //=============================================================
     bool saveToWaveFile (std::string filePath);
     bool saveToAiffFile (std::string filePath);
-    
+
     //=============================================================
     void clearAudioBuffer();
-    
+
     //=============================================================
     int32_t fourBytesToInt (std::vector<uint8_t>& source, int startIndex, Endianness endianness = Endianness::LittleEndian);
     int16_t twoBytesToInt (std::vector<uint8_t>& source, int startIndex, Endianness endianness = Endianness::LittleEndian);
@@ -216,18 +216,18 @@ private:
     uint32_t getAiffSampleRate (std::vector<uint8_t>& fileData, int sampleRateStartIndex);
     bool tenByteMatch (std::vector<uint8_t>& v1, int startIndex1, std::vector<uint8_t>& v2, int startIndex2);
     void addSampleRateToAiffData (std::vector<uint8_t>& fileData, uint32_t sampleRate);
-    
+
     //=============================================================
     void addStringToFileData (std::vector<uint8_t>& fileData, std::string s);
     void addInt32ToFileData (std::vector<uint8_t>& fileData, int32_t i, Endianness endianness = Endianness::LittleEndian);
     void addInt16ToFileData (std::vector<uint8_t>& fileData, int16_t i, Endianness endianness = Endianness::LittleEndian);
-    
+
     //=============================================================
     bool writeDataToFile (std::vector<uint8_t>& fileData, std::string filePath);
-    
+
     //=============================================================
     void reportError (std::string errorMessage);
-    
+
     //=============================================================
     AudioFileFormat audioFileFormat;
     uint32_t sampleRate;
@@ -242,38 +242,38 @@ struct AudioSampleConverter
     //=============================================================
     /** Convert a signed 8-bit integer to an audio sample */
     static T signedByteToSample (int8_t sample);
-    
+
     /** Convert an audio sample to an signed 8-bit representation */
     static int8_t sampleToSignedByte (T sample);
-    
+
     //=============================================================
     /** Convert an unsigned 8-bit integer to an audio sample */
     static T unsignedByteToSample (uint8_t sample);
-    
+
     /** Convert an audio sample to an unsigned 8-bit representation */
     static uint8_t sampleToUnsignedByte (T sample);
-    
+
     //=============================================================
     /** Convert a 16-bit integer to an audio sample */
     static T sixteenBitIntToSample (int16_t sample);
-    
+
     /** Convert a an audio sample to a 16-bit integer */
     static int16_t sampleToSixteenBitInt (T sample);
-    
+
     //=============================================================
     /** Convert a 24-bit value (int a 32-bit int) to an audio sample */
     static T twentyFourBitIntToSample (int32_t sample);
-    
+
     /** Convert a an audio sample to a 24-bit value (in a 32-bit integer) */
     static int32_t sampleToTwentyFourBitInt (T sample);
-    
+
     //=============================================================
     /** Convert a 32-bit signed integer to an audio sample */
     static T thirtyTwoBitIntToSample (int32_t sample);
-    
+
     /** Convert a an audio sample to a 32-bit signed integer */
     static int32_t sampleToThirtyTwoBitInt (T sample);
-    
+
     //=============================================================
     /** Helper clamp function to enforce ranges */
     static T clamp (T v1, T minValue, T maxValue);
@@ -414,30 +414,30 @@ template <class T>
 bool AudioFile<T>::setAudioBuffer (AudioBuffer& newBuffer)
 {
     int numChannels = (int)newBuffer.size();
-    
+
     if (numChannels <= 0)
     {
         assert (false && "The buffer you are trying to use has no channels");
         return false;
     }
-    
+
     size_t numSamples = newBuffer[0].size();
-    
+
     // set the number of channels
     samples.resize (newBuffer.size());
-    
+
     for (int k = 0; k < getNumChannels(); k++)
     {
         assert (newBuffer[k].size() == numSamples);
-        
+
         samples[k].resize (numSamples);
-        
+
         for (size_t i = 0; i < numSamples; i++)
         {
             samples[k][i] = newBuffer[k][i];
         }
     }
-    
+
     return true;
 }
 
@@ -454,11 +454,11 @@ template <class T>
 void AudioFile<T>::setNumSamplesPerChannel (int numSamples)
 {
     int originalSize = getNumSamplesPerChannel();
-    
+
     for (int i = 0; i < getNumChannels();i++)
     {
         samples[i].resize (numSamples);
-        
+
         // set any new samples to zero
         if (numSamples > originalSize)
             std::fill (samples[i].begin() + originalSize, samples[i].end(), (T)0.);
@@ -471,9 +471,9 @@ void AudioFile<T>::setNumChannels (int numChannels)
 {
     int originalNumChannels = getNumChannels();
     int originalNumSamplesPerChannel = getNumSamplesPerChannel();
-    
+
     samples.resize (numChannels);
-    
+
     // make sure any new channels are set to the right size
     // and filled with zeros
     if (numChannels > originalNumChannels)
@@ -512,14 +512,14 @@ template <class T>
 bool AudioFile<T>::load (std::string filePath)
 {
     std::ifstream file (filePath, std::ios::binary);
-    
+
     // check the file exists
     if (! file.good())
     {
         reportError ("ERROR: File doesn't exist or otherwise can't load file\n"  + filePath);
         return false;
     }
-    
+
     std::vector<uint8_t> fileData;
 
     file.unsetf (std::ios::skipws);
@@ -539,7 +539,7 @@ bool AudioFile<T>::load (std::string filePath)
         reportError ("ERROR: Couldn't read entire file\n" + filePath);
         return false;
     }
-    
+
     // Handle very small files that will break our attempt to read the
     // first header info from them
     if (fileData.size() < 12)
@@ -559,7 +559,7 @@ bool AudioFile<T>::loadFromMemory (std::vector<uint8_t>& fileData)
 {
     // get audio file format
     audioFileFormat = determineAudioFileFormat (fileData);
-    
+
     if (audioFileFormat == AudioFileFormat::Wave)
     {
         return decodeWaveFile (fileData);
@@ -584,13 +584,13 @@ bool AudioFile<T>::decodeWaveFile (std::vector<uint8_t>& fileData)
     std::string headerChunkID (fileData.begin(), fileData.begin() + 4);
     //int32_t fileSizeInBytes = fourBytesToInt (fileData, 4) + 8;
     std::string format (fileData.begin() + 8, fileData.begin() + 12);
-    
+
     // -----------------------------------------------------------
     // try and find the start points of key chunks
     int indexOfDataChunk = getIndexOfChunk (fileData, "data", 12);
     int indexOfFormatChunk = getIndexOfChunk (fileData, "fmt ", 12);
     int indexOfXMLChunk = getIndexOfChunk (fileData, "iXML", 12);
-    
+
     // if we can't find the data or format chunks, or the IDs/formats don't seem to be as expected
     // then it is unlikely we'll able to read this file, so abort
     if (indexOfDataChunk == -1 || indexOfFormatChunk == -1 || headerChunkID != "RIFF" || format != "WAVE")
@@ -598,7 +598,7 @@ bool AudioFile<T>::decodeWaveFile (std::vector<uint8_t>& fileData)
         reportError ("ERROR: this doesn't seem to be a valid .WAV file");
         return false;
     }
-    
+
     // -----------------------------------------------------------
     // FORMAT CHUNK
     int f = indexOfFormatChunk;
@@ -610,7 +610,7 @@ bool AudioFile<T>::decodeWaveFile (std::vector<uint8_t>& fileData)
     uint32_t numBytesPerSecond = fourBytesToInt (fileData, f + 16);
     uint16_t numBytesPerBlock = twoBytesToInt (fileData, f + 20);
     bitDepth = (int) twoBytesToInt (fileData, f + 22);
-    
+
     if (bitDepth > sizeof (T) * 8)
     {
         std::string message = "ERROR: you are trying to read a ";
@@ -621,61 +621,61 @@ bool AudioFile<T>::decodeWaveFile (std::vector<uint8_t>& fileData)
         reportError (message);
         return false;
     }
-    
+
     uint16_t numBytesPerSample = static_cast<uint16_t> (bitDepth) / 8;
-    
+
     // check that the audio format is PCM or Float or extensible
     if (audioFormat != WavAudioFormat::PCM && audioFormat != WavAudioFormat::IEEEFloat && audioFormat != WavAudioFormat::Extensible)
     {
         reportError ("ERROR: this .WAV file is encoded in a format that this library does not support at present");
         return false;
     }
-    
+
     // check the number of channels is mono or stereo
     if (numChannels < 1 || numChannels > 128)
     {
         reportError ("ERROR: this WAV file seems to be an invalid number of channels (or corrupted?)");
         return false;
     }
-    
+
     // check header data is consistent
     if (numBytesPerSecond != static_cast<uint32_t> ((numChannels * sampleRate * bitDepth) / 8) || numBytesPerBlock != (numChannels * numBytesPerSample))
     {
         reportError ("ERROR: the header data in this WAV file seems to be inconsistent");
         return false;
     }
-    
+
     // check bit depth is either 8, 16, 24 or 32 bit
     if (bitDepth != 8 && bitDepth != 16 && bitDepth != 24 && bitDepth != 32)
     {
         reportError ("ERROR: this file has a bit depth that is not 8, 16, 24 or 32 bits");
         return false;
     }
-    
+
     // -----------------------------------------------------------
     // DATA CHUNK
     int d = indexOfDataChunk;
     std::string dataChunkID (fileData.begin() + d, fileData.begin() + d + 4);
     int32_t dataChunkSize = fourBytesToInt (fileData, d + 4);
-    
+
     int numSamples = dataChunkSize / (numChannels * bitDepth / 8);
     int samplesStartIndex = indexOfDataChunk + 8;
-    
+
     clearAudioBuffer();
     samples.resize (numChannels);
-    
+
     for (int i = 0; i < numSamples; i++)
     {
         for (int channel = 0; channel < numChannels; channel++)
         {
             int sampleIndex = samplesStartIndex + (numBytesPerBlock * i) + channel * numBytesPerSample;
-            
+
             if ((sampleIndex + (bitDepth / 8) - 1) >= fileData.size())
             {
                 reportError ("ERROR: read file error as the metadata indicates more samples than there are in the file data");
                 return false;
             }
-            
+
             if (bitDepth == 8)
             {
                 T sample = AudioSampleConverter<T>::unsignedByteToSample (fileData[sampleIndex]);
@@ -691,7 +691,7 @@ bool AudioFile<T>::decodeWaveFile (std::vector<uint8_t>& fileData)
             {
                 int32_t sampleAsInt = 0;
                 sampleAsInt = (fileData[sampleIndex + 2] << 16) | (fileData[sampleIndex + 1] << 8) | fileData[sampleIndex];
-                
+
                 if (sampleAsInt & 0x800000) //  if the 24th bit is set, this is a negative number in 24-bit world
                     sampleAsInt = sampleAsInt | ~0xFFFFFF; // so make sure sign is extended to the 32 bit float
 
@@ -702,7 +702,7 @@ bool AudioFile<T>::decodeWaveFile (std::vector<uint8_t>& fileData)
             {
                 int32_t sampleAsInt = fourBytesToInt (fileData, sampleIndex);
                 T sample;
-                
+
                 if (audioFormat == WavAudioFormat::IEEEFloat && std::is_floating_point_v<T>)
                 {
                     float f;
@@ -713,7 +713,7 @@ bool AudioFile<T>::decodeWaveFile (std::vector<uint8_t>& fileData)
                 {
                     sample = AudioSampleConverter<T>::thirtyTwoBitIntToSample (sampleAsInt);
                 }
-                
+
                 samples[channel].push_back (sample);
             }
             else
@@ -743,15 +743,15 @@ bool AudioFile<T>::decodeAiffFile (std::vector<uint8_t>& fileData)
     std::string headerChunkID (fileData.begin(), fileData.begin() + 4);
     //int32_t fileSizeInBytes = fourBytesToInt (fileData, 4, Endianness::BigEndian) + 8;
     std::string format (fileData.begin() + 8, fileData.begin() + 12);
-    
+
     int audioFormat = format == "AIFF" ? AIFFAudioFormat::Uncompressed : format == "AIFC" ? AIFFAudioFormat::Compressed : AIFFAudioFormat::Error;
-    
+
     // -----------------------------------------------------------
     // try and find the start points of key chunks
     int indexOfCommChunk = getIndexOfChunk (fileData, "COMM", 12, Endianness::BigEndian);
     int indexOfSoundDataChunk = getIndexOfChunk (fileData, "SSND", 12, Endianness::BigEndian);
     int indexOfXMLChunk = getIndexOfChunk (fileData, "iXML", 12, Endianness::BigEndian);
-    
+
     // if we can't find the data or format chunks, or the IDs/formats don't seem to be as expected
     // then it is unlikely we'll able to read this file, so abort
     if (indexOfSoundDataChunk == -1 || indexOfCommChunk == -1 || headerChunkID != "FORM" || audioFormat == AIFFAudioFormat::Error)
@@ -769,7 +769,7 @@ bool AudioFile<T>::decodeAiffFile (std::vector<uint8_t>& fileData)
     int32_t numSamplesPerChannel = fourBytesToInt (fileData, p + 10, Endianness::BigEndian);
     bitDepth = (int) twoBytesToInt (fileData, p + 14, Endianness::BigEndian);
     sampleRate = getAiffSampleRate (fileData, p + 16);
-    
+
     if (bitDepth > sizeof (T) * 8)
     {
         std::string message = "ERROR: you are trying to read a ";
@@ -780,28 +780,28 @@ bool AudioFile<T>::decodeAiffFile (std::vector<uint8_t>& fileData)
         reportError (message);
         return false;
     }
-    
+
     // check the sample rate was properly decoded
     if (sampleRate == 0)
     {
         reportError ("ERROR: this AIFF file has an unsupported sample rate");
         return false;
     }
-    
+
     // check the number of channels is mono or stereo
     if (numChannels < 1 ||numChannels > 2)
     {
         reportError ("ERROR: this AIFF file seems to be neither mono nor stereo (perhaps multi-track, or corrupted?)");
         return false;
     }
-    
+
     // check bit depth is either 8, 16, 24 or 32-bit
     if (bitDepth != 8 && bitDepth != 16 && bitDepth != 24 && bitDepth != 32)
     {
         reportError ("ERROR: this file has a bit depth that is not 8, 16, 24 or 32 bits");
         return false;
     }
-    
+
     // -----------------------------------------------------------
     // SSND CHUNK
     int s = indexOfSoundDataChunk;
@@ -809,34 +809,34 @@ bool AudioFile<T>::decodeAiffFile (std::vector<uint8_t>& fileData)
     int32_t soundDataChunkSize = fourBytesToInt (fileData, s + 4, Endianness::BigEndian);
     int32_t offset = fourBytesToInt (fileData, s + 8, Endianness::BigEndian);
     //int32_t blockSize = fourBytesToInt (fileData, s + 12, Endianness::BigEndian);
-    
+
     int numBytesPerSample = bitDepth / 8;
     int numBytesPerFrame = numBytesPerSample * numChannels;
     int totalNumAudioSampleBytes = numSamplesPerChannel * numBytesPerFrame;
     int samplesStartIndex = s + 16 + (int)offset;
-        
+
     // sanity check the data
     if ((soundDataChunkSize - 8) != totalNumAudioSampleBytes || totalNumAudioSampleBytes > static_cast<long>(fileData.size() - samplesStartIndex))
     {
         reportError ("ERROR: the metadatafor this file doesn't seem right");
         return false;
     }
-    
+
     clearAudioBuffer();
     samples.resize (numChannels);
-    
+
     for (int i = 0; i < numSamplesPerChannel; i++)
     {
         for (int channel = 0; channel < numChannels; channel++)
         {
             int sampleIndex = samplesStartIndex + (numBytesPerFrame * i) + channel * numBytesPerSample;
-            
+
             if ((sampleIndex + (bitDepth / 8) - 1) >= fileData.size())
             {
                 reportError ("ERROR: read file error as the metadata indicates more samples than there are in the file data");
                 return false;
             }
-            
+
             if (bitDepth == 8)
             {
                 T sample = AudioSampleConverter<T>::signedByteToSample (static_cast<int8_t> (fileData[sampleIndex]));
@@ -852,7 +852,7 @@ bool AudioFile<T>::decodeAiffFile (std::vector<uint8_t>& fileData)
             {
                 int32_t sampleAsInt = 0;
                 sampleAsInt = (fileData[sampleIndex] << 16) | (fileData[sampleIndex + 1] << 8) | fileData[sampleIndex + 2];
-                
+
                 if (sampleAsInt & 0x800000) //  if the 24th bit is set, this is a negative number in 24-bit world
                     sampleAsInt = sampleAsInt | ~0xFFFFFF; // so make sure sign is extended to the 32 bit float
 
@@ -863,12 +863,12 @@ bool AudioFile<T>::decodeAiffFile (std::vector<uint8_t>& fileData)
             {
                 int32_t sampleAsInt = fourBytesToInt (fileData, sampleIndex, Endianness::BigEndian);
                 T sample;
-                
+
                 if (audioFormat == AIFFAudioFormat::Compressed)
                     sample = (T)reinterpret_cast<float&> (sampleAsInt);
                 else // assume PCM
                     sample = AudioSampleConverter<T>::thirtyTwoBitIntToSample (sampleAsInt);
-                
+
                 samples[channel].push_back (sample);
             }
             else
@@ -885,7 +885,7 @@ bool AudioFile<T>::decodeAiffFile (std::vector<uint8_t>& fileData)
         int32_t chunkSize = fourBytesToInt (fileData, indexOfXMLChunk + 4);
         iXMLChunk = std::string ((const char*) &fileData[indexOfXMLChunk + 8], chunkSize);
     }
-    
+
     return true;
 }
 
@@ -898,7 +898,7 @@ uint32_t AudioFile<T>::getAiffSampleRate (std::vector<uint8_t>& fileData, int sa
         if (tenByteMatch (fileData, sampleRateStartIndex, it.second, 0))
             return it.first;
     }
-    
+
     return 0;
 }
 
@@ -911,7 +911,7 @@ bool AudioFile<T>::tenByteMatch (std::vector<uint8_t>& v1, int startIndex1, std:
         if (v1[startIndex1 + i] != v2[startIndex2 + i])
             return false;
     }
-    
+
     return true;
 }
 
@@ -938,7 +938,7 @@ bool AudioFile<T>::save (std::string filePath, AudioFileFormat format)
     {
         return saveToAiffFile (filePath);
     }
-    
+
     return false;
 }
 
@@ -954,7 +954,7 @@ bool AudioFile<T>::writeData (std::vector<uint8_t> & fileData, AudioFileFormat f
     {
         return writeToAiffData (fileData);
     }
-    
+
     return false;
 }
 
@@ -962,16 +962,16 @@ bool AudioFile<T>::writeData (std::vector<uint8_t> & fileData, AudioFileFormat f
 //=============================================================
 template <class T>
 bool AudioFile<T>::writeToWaveData (std::vector<uint8_t> & fileData)
-{    
+{
     int32_t dataChunkSize = getNumSamplesPerChannel() * (getNumChannels() * bitDepth / 8);
     int16_t audioFormat = bitDepth == 32 && std::is_floating_point_v<T> ? WavAudioFormat::IEEEFloat : WavAudioFormat::PCM;
     int32_t formatChunkSize = audioFormat == WavAudioFormat::PCM ? 16 : 18;
     int32_t iXMLChunkSize = static_cast<int32_t> (iXMLChunk.size());
-    
+
     // -----------------------------------------------------------
     // HEADER CHUNK
     addStringToFileData (fileData, "RIFF");
-    
+
     // The file size in bytes is the header chunk size (4, not counting RIFF and WAVE) + the format
     // chunk size (24) + the metadata part of the data chunk plus the actual data chunk size
     int32_t fileSizeInBytes = 4 + formatChunkSize + 8 + 8 + dataChunkSize;
@@ -981,9 +981,9 @@ bool AudioFile<T>::writeToWaveData (std::vector<uint8_t> & fileData)
     }
 
     addInt32ToFileData (fileData, fileSizeInBytes);
-    
+
     addStringToFileData (fileData, "WAVE");
-    
+
     // -----------------------------------------------------------
     // FORMAT CHUNK
     addStringToFileData (fileData, "fmt ");
@@ -991,23 +991,23 @@ bool AudioFile<T>::writeToWaveData (std::vector<uint8_t> & fileData)
     addInt16ToFileData (fileData, audioFormat); // audio format
     addInt16ToFileData (fileData, (int16_t)getNumChannels()); // num channels
     addInt32ToFileData (fileData, (int32_t)sampleRate); // sample rate
-    
+
     int32_t numBytesPerSecond = (int32_t) ((getNumChannels() * sampleRate * bitDepth) / 8);
     addInt32ToFileData (fileData, numBytesPerSecond);
-    
+
     int16_t numBytesPerBlock = getNumChannels() * (bitDepth / 8);
     addInt16ToFileData (fileData, numBytesPerBlock);
-    
+
     addInt16ToFileData (fileData, (int16_t)bitDepth);
-    
+
     if (audioFormat == WavAudioFormat::IEEEFloat)
         addInt16ToFileData (fileData, 0); // extension size
-    
+
     // -----------------------------------------------------------
     // DATA CHUNK
     addStringToFileData (fileData, "data");
     addInt32ToFileData (fileData, dataChunkSize);
-    
+
     for (int i = 0; i < getNumSamplesPerChannel(); i++)
     {
         for (int channel = 0; channel < getNumChannels(); channel++)
@@ -1025,12 +1025,12 @@ bool AudioFile<T>::writeToWaveData (std::vector<uint8_t> & fileData)
             else if (bitDepth == 24)
             {
                 int32_t sampleAsIntAgain = AudioSampleConverter<T>::sampleToTwentyFourBitInt (samples[channel][i]);
-                
+
                 uint8_t bytes[3];
                 bytes[2] = (uint8_t) (sampleAsIntAgain >> 16) & 0xFF;
                 bytes[1] = (uint8_t) (sampleAsIntAgain >>  8) & 0xFF;
                 bytes[0] = (uint8_t) sampleAsIntAgain & 0xFF;
-                
+
                 fileData.push_back (bytes[0]);
                 fileData.push_back (bytes[1]);
                 fileData.push_back (bytes[2]);
@@ -1038,12 +1038,12 @@ bool AudioFile<T>::writeToWaveData (std::vector<uint8_t> & fileData)
             else if (bitDepth == 32)
             {
                 int32_t sampleAsInt;
-                
+
                 if (audioFormat == WavAudioFormat::IEEEFloat)
                     sampleAsInt = (int32_t) reinterpret_cast<int32_t&> (samples[channel][i]);
                 else // assume PCM
                     sampleAsInt = AudioSampleConverter<T>::sampleToThirtyTwoBitInt (samples[channel][i]);
-                
+
                 addInt32ToFileData (fileData, sampleAsInt, Endianness::LittleEndian);
             }
             else
@@ -1053,7 +1053,7 @@ bool AudioFile<T>::writeToWaveData (std::vector<uint8_t> & fileData)
             }
         }
     }
-    
+
     // -----------------------------------------------------------
     // iXML CHUNK
     if (iXMLChunkSize > 0)
@@ -1062,24 +1062,24 @@ bool AudioFile<T>::writeToWaveData (std::vector<uint8_t> & fileData)
         addInt32ToFileData (fileData, iXMLChunkSize);
         addStringToFileData (fileData, iXMLChunk);
     }
-    
+
     return true;
 }
 
 //=============================================================
 template <class T>
 bool AudioFile<T>::writeToAiffData (std::vector<uint8_t> & fileData)
-{    
+{
     int32_t numBytesPerSample = bitDepth / 8;
     int32_t numBytesPerFrame = numBytesPerSample * getNumChannels();
     int32_t totalNumAudioSampleBytes = getNumSamplesPerChannel() * numBytesPerFrame;
     int32_t soundDataChunkSize = totalNumAudioSampleBytes + 8;
     int32_t iXMLChunkSize = static_cast<int32_t> (iXMLChunk.size());
-    
+
     // -----------------------------------------------------------
     // HEADER CHUNK
     addStringToFileData (fileData, "FORM");
-    
+
     // The file size in bytes is the header chunk size (4, not counting FORM and AIFF) + the COMM
     // chunk size (26) + the metadata part of the SSND chunk plus the actual data chunk size
     int32_t fileSizeInBytes = 4 + 26 + 16 + totalNumAudioSampleBytes;
@@ -1089,9 +1089,9 @@ bool AudioFile<T>::writeToAiffData (std::vector<uint8_t> & fileData)
     }
 
     addInt32ToFileData (fileData, fileSizeInBytes, Endianness::BigEndian);
-    
+
     addStringToFileData (fileData, "AIFF");
-    
+
     // -----------------------------------------------------------
     // COMM CHUNK
     addStringToFileData (fileData, "COMM");
@@ -1100,14 +1100,14 @@ bool AudioFile<T>::writeToAiffData (std::vector<uint8_t> & fileData)
     addInt32ToFileData (fileData, getNumSamplesPerChannel(), Endianness::BigEndian); // num samples per channel
     addInt16ToFileData (fileData, bitDepth, Endianness::BigEndian); // bit depth
     addSampleRateToAiffData (fileData, sampleRate);
-    
+
     // -----------------------------------------------------------
     // SSND CHUNK
     addStringToFileData (fileData, "SSND");
     addInt32ToFileData (fileData, soundDataChunkSize, Endianness::BigEndian);
     addInt32ToFileData (fileData, 0, Endianness::BigEndian); // offset
     addInt32ToFileData (fileData, 0, Endianness::BigEndian); // block size
-    
+
     for (int i = 0; i < getNumSamplesPerChannel(); i++)
     {
         for (int channel = 0; channel < getNumChannels(); channel++)
@@ -1125,12 +1125,12 @@ bool AudioFile<T>::writeToAiffData (std::vector<uint8_t> & fileData)
             else if (bitDepth == 24)
             {
                 int32_t sampleAsIntAgain = AudioSampleConverter<T>::sampleToTwentyFourBitInt (samples[channel][i]);
-                
+
                 uint8_t bytes[3];
                 bytes[0] = (uint8_t) (sampleAsIntAgain >> 16) & 0xFF;
                 bytes[1] = (uint8_t) (sampleAsIntAgain >>  8) & 0xFF;
                 bytes[2] = (uint8_t) sampleAsIntAgain & 0xFF;
-                
+
                 fileData.push_back (bytes[0]);
                 fileData.push_back (bytes[1]);
                 fileData.push_back (bytes[2]);
@@ -1165,16 +1165,16 @@ template <class T>
 bool AudioFile<T>::saveToWaveFile (std::string filePath)
 {
     std::vector<uint8_t> fileData;
-    
+
     int32_t dataChunkSize = getNumSamplesPerChannel() * (getNumChannels() * bitDepth / 8);
     int16_t audioFormat = bitDepth == 32 && std::is_floating_point_v<T> ? WavAudioFormat::IEEEFloat : WavAudioFormat::PCM;
     int32_t formatChunkSize = audioFormat == WavAudioFormat::PCM ? 16 : 18;
     int32_t iXMLChunkSize = static_cast<int32_t> (iXMLChunk.size());
-    
+
     // -----------------------------------------------------------
     // HEADER CHUNK
     addStringToFileData (fileData, "RIFF");
-    
+
     // The file size in bytes is the header chunk size (4, not counting RIFF and WAVE) + the format
     // chunk size (24) + the metadata part of the data chunk plus the actual data chunk size
     int32_t fileSizeInBytes = 4 + formatChunkSize + 8 + 8 + dataChunkSize;
@@ -1184,9 +1184,9 @@ bool AudioFile<T>::saveToWaveFile (std::string filePath)
     }
 
     addInt32ToFileData (fileData, fileSizeInBytes);
-    
+
     addStringToFileData (fileData, "WAVE");
-    
+
     // -----------------------------------------------------------
     // FORMAT CHUNK
     addStringToFileData (fileData, "fmt ");
@@ -1194,23 +1194,23 @@ bool AudioFile<T>::saveToWaveFile (std::string filePath)
     addInt16ToFileData (fileData, audioFormat); // audio format
     addInt16ToFileData (fileData, (int16_t)getNumChannels()); // num channels
     addInt32ToFileData (fileData, (int32_t)sampleRate); // sample rate
-    
+
     int32_t numBytesPerSecond = (int32_t) ((getNumChannels() * sampleRate * bitDepth) / 8);
     addInt32ToFileData (fileData, numBytesPerSecond);
-    
+
     int16_t numBytesPerBlock = getNumChannels() * (bitDepth / 8);
     addInt16ToFileData (fileData, numBytesPerBlock);
-    
+
     addInt16ToFileData (fileData, (int16_t)bitDepth);
-    
+
     if (audioFormat == WavAudioFormat::IEEEFloat)
         addInt16ToFileData (fileData, 0); // extension size
-    
+
     // -----------------------------------------------------------
     // DATA CHUNK
     addStringToFileData (fileData, "data");
     addInt32ToFileData (fileData, dataChunkSize);
-    
+
     for (int i = 0; i < getNumSamplesPerChannel(); i++)
     {
         for (int channel = 0; channel < getNumChannels(); channel++)
@@ -1228,12 +1228,12 @@ bool AudioFile<T>::saveToWaveFile (std::string filePath)
             else if (bitDepth == 24)
             {
                 int32_t sampleAsIntAgain = AudioSampleConverter<T>::sampleToTwentyFourBitInt (samples[channel][i]);
-                
+
                 uint8_t bytes[3];
                 bytes[2] = (uint8_t) (sampleAsIntAgain >> 16) & 0xFF;
                 bytes[1] = (uint8_t) (sampleAsIntAgain >>  8) & 0xFF;
                 bytes[0] = (uint8_t) sampleAsIntAgain & 0xFF;
-                
+
                 fileData.push_back (bytes[0]);
                 fileData.push_back (bytes[1]);
                 fileData.push_back (bytes[2]);
@@ -1241,12 +1241,12 @@ bool AudioFile<T>::saveToWaveFile (std::string filePath)
             else if (bitDepth == 32)
             {
                 int32_t sampleAsInt;
-                
+
                 if (audioFormat == WavAudioFormat::IEEEFloat)
                     sampleAsInt = (int32_t) reinterpret_cast<int32_t&> (samples[channel][i]);
                 else // assume PCM
                     sampleAsInt = AudioSampleConverter<T>::sampleToThirtyTwoBitInt (samples[channel][i]);
-                
+
                 addInt32ToFileData (fileData, sampleAsInt, Endianness::LittleEndian);
             }
             else
@@ -1256,7 +1256,7 @@ bool AudioFile<T>::saveToWaveFile (std::string filePath)
             }
         }
     }
-    
+
     // -----------------------------------------------------------
     // iXML CHUNK
     if (iXMLChunkSize > 0)
@@ -1265,14 +1265,14 @@ bool AudioFile<T>::saveToWaveFile (std::string filePath)
         addInt32ToFileData (fileData, iXMLChunkSize);
         addStringToFileData (fileData, iXMLChunk);
     }
-    
+
     // check that the various sizes we put in the metadata are correct
     if (fileSizeInBytes != static_cast<int32_t> (fileData.size() - 8) || dataChunkSize != (getNumSamplesPerChannel() * getNumChannels() * (bitDepth / 8)))
     {
         reportError ("ERROR: couldn't save file to " + filePath);
         return false;
     }
-    
+
     // try to write the file
     return writeDataToFile (fileData, filePath);
 }
@@ -1282,17 +1282,17 @@ template <class T>
 bool AudioFile<T>::saveToAiffFile (std::string filePath)
 {
     std::vector<uint8_t> fileData;
-    
+
     int32_t numBytesPerSample = bitDepth / 8;
     int32_t numBytesPerFrame = numBytesPerSample * getNumChannels();
     int32_t totalNumAudioSampleBytes = getNumSamplesPerChannel() * numBytesPerFrame;
     int32_t soundDataChunkSize = totalNumAudioSampleBytes + 8;
     int32_t iXMLChunkSize = static_cast<int32_t> (iXMLChunk.size());
-    
+
     // -----------------------------------------------------------
     // HEADER CHUNK
     addStringToFileData (fileData, "FORM");
-    
+
     // The file size in bytes is the header chunk size (4, not counting FORM and AIFF) + the COMM
     // chunk size (26) + the metadata part of the SSND chunk plus the actual data chunk size
     int32_t fileSizeInBytes = 4 + 26 + 16 + totalNumAudioSampleBytes;
@@ -1302,9 +1302,9 @@ bool AudioFile<T>::saveToAiffFile (std::string filePath)
     }
 
     addInt32ToFileData (fileData, fileSizeInBytes, Endianness::BigEndian);
-    
+
     addStringToFileData (fileData, "AIFF");
-    
+
     // -----------------------------------------------------------
     // COMM CHUNK
     addStringToFileData (fileData, "COMM");
@@ -1313,14 +1313,14 @@ bool AudioFile<T>::saveToAiffFile (std::string filePath)
     addInt32ToFileData (fileData, getNumSamplesPerChannel(), Endianness::BigEndian); // num samples per channel
     addInt16ToFileData (fileData, bitDepth, Endianness::BigEndian); // bit depth
     addSampleRateToAiffData (fileData, sampleRate);
-    
+
     // -----------------------------------------------------------
     // SSND CHUNK
     addStringToFileData (fileData, "SSND");
     addInt32ToFileData (fileData, soundDataChunkSize, Endianness::BigEndian);
     addInt32ToFileData (fileData, 0, Endianness::BigEndian); // offset
     addInt32ToFileData (fileData, 0, Endianness::BigEndian); // block size
-    
+
     for (int i = 0; i < getNumSamplesPerChannel(); i++)
     {
         for (int channel = 0; channel < getNumChannels(); channel++)
@@ -1338,12 +1338,12 @@ bool AudioFile<T>::saveToAiffFile (std::string filePath)
             else if (bitDepth == 24)
             {
                 int32_t sampleAsIntAgain = AudioSampleConverter<T>::sampleToTwentyFourBitInt (samples[channel][i]);
-                
+
                 uint8_t bytes[3];
                 bytes[0] = (uint8_t) (sampleAsIntAgain >> 16) & 0xFF;
                 bytes[1] = (uint8_t) (sampleAsIntAgain >>  8) & 0xFF;
                 bytes[2] = (uint8_t) sampleAsIntAgain & 0xFF;
-                
+
                 fileData.push_back (bytes[0]);
                 fileData.push_back (bytes[1]);
                 fileData.push_back (bytes[2]);
@@ -1370,14 +1370,14 @@ bool AudioFile<T>::saveToAiffFile (std::string filePath)
         addInt32ToFileData (fileData, iXMLChunkSize, Endianness::BigEndian);
         addStringToFileData (fileData, iXMLChunk);
     }
-    
+
     // check that the various sizes we put in the metadata are correct
     if (fileSizeInBytes != static_cast<int32_t> (fileData.size() - 8) || soundDataChunkSize != getNumSamplesPerChannel() *  numBytesPerFrame + 8)
     {
         reportError ("ERROR: couldn't save file to " + filePath);
         return false;
     }
-    
+
     // try to write the file
     return writeDataToFile (fileData, filePath);
 }
@@ -1387,7 +1387,7 @@ template <class T>
 bool AudioFile<T>::writeDataToFile (std::vector<uint8_t>& fileData, std::string filePath)
 {
     std::ofstream outputFile (filePath, std::ios::binary);
-    
+
     if (outputFile.is_open())
     {
         for (size_t i = 0; i < fileData.size(); i++)
@@ -1395,12 +1395,12 @@ bool AudioFile<T>::writeDataToFile (std::vector<uint8_t>& fileData, std::string
             char value = (char) fileData[i];
             outputFile.write (&value, sizeof (char));
         }
-        
+
         outputFile.close();
-        
+
         return true;
     }
-    
+
     return false;
 }
 
@@ -1417,7 +1417,7 @@ template <class T>
 void AudioFile<T>::addInt32ToFileData (std::vector<uint8_t>& fileData, int32_t i, Endianness endianness)
 {
     uint8_t bytes[4];
-    
+
     if (endianness == Endianness::LittleEndian)
     {
         bytes[3] = (i >> 24) & 0xFF;
@@ -1432,7 +1432,7 @@ void AudioFile<T>::addInt32ToFileData (std::vector<uint8_t>& fileData, int32_t i
         bytes[2] = (i >> 8) & 0xFF;
         bytes[3] = i & 0xFF;
     }
-    
+
     for (int i = 0; i < 4; i++)
         fileData.push_back (bytes[i]);
 }
@@ -1442,7 +1442,7 @@ template <class T>
 void AudioFile<T>::addInt16ToFileData (std::vector<uint8_t>& fileData, int16_t i, Endianness endianness)
 {
     uint8_t bytes[2];
-    
+
     if (endianness == Endianness::LittleEndian)
     {
         bytes[1] = (i >> 8) & 0xFF;
@@ -1453,7 +1453,7 @@ void AudioFile<T>::addInt16ToFileData (std::vector<uint8_t>& fileData, int16_t i
         bytes[0] = (i >> 8) & 0xFF;
         bytes[1] = i & 0xFF;
     }
-    
+
     fileData.push_back (bytes[0]);
     fileData.push_back (bytes[1]);
 }
@@ -1466,7 +1466,7 @@ void AudioFile<T>::clearAudioBuffer()
     {
         samples[i].clear();
     }
-    
+
     samples.clear();
 }
 
@@ -1475,7 +1475,7 @@ template <class T>
 AudioFileFormat AudioFile<T>::determineAudioFileFormat (std::vector<uint8_t>& fileData)
 {
     std::string header (fileData.begin(), fileData.begin() + 4);
-    
+
     if (header == "RIFF")
         return AudioFileFormat::Wave;
     else if (header == "FORM")
@@ -1491,12 +1491,12 @@ int32_t AudioFile<T>::fourBytesToInt (std::vector<uint8_t>& source, int startInd
     if (source.size() >= (startIndex + 4))
     {
         int32_t result;
-        
+
         if (endianness == Endianness::LittleEndian)
             result = (source[startIndex + 3] << 24) | (source[startIndex + 2] << 16) | (source[startIndex + 1] << 8) | source[startIndex];
         else
             result = (source[startIndex] << 24) | (source[startIndex + 1] << 16) | (source[startIndex + 2] << 8) | source[startIndex + 3];
-        
+
         return result;
     }
     else
@@ -1511,12 +1511,12 @@ template <class T>
 int16_t AudioFile<T>::twoBytesToInt (std::vector<uint8_t>& source, int startIndex, Endianness endianness)
 {
     int16_t result;
-    
+
     if (endianness == Endianness::LittleEndian)
         result = (source[startIndex + 1] << 8) | source[startIndex];
     else
         result = (source[startIndex] << 8) | source[startIndex + 1];
-    
+
     return result;
 }
 
@@ -1526,18 +1526,18 @@ int AudioFile<T>::getIndexOfString (std::vector<uint8_t>& source, std::string st
 {
     int index = -1;
     int stringLength = (int)stringToSearchFor.length();
-    
+
     for (size_t i = 0; i < source.size() - stringLength;i++)
     {
         std::string section (source.begin() + i, source.begin() + i + stringLength);
-        
+
         if (section == stringToSearchFor)
         {
             index = static_cast<int> (i);
             break;
         }
     }
-    
+
     return index;
 }
 
@@ -1546,7 +1546,7 @@ template <class T>
 int AudioFile<T>::getIndexOfChunk (std::vector<uint8_t>& source, const std::string& chunkHeaderID, int startIndex, Endianness endianness)
 {
     constexpr int dataLen = 4;
-    
+
     if (chunkHeaderID.size() != dataLen)
     {
         assert (false && "Invalid chunk header ID string");
@@ -1562,11 +1562,11 @@ int AudioFile<T>::getIndexOfChunk (std::vector<uint8_t>& source, const std::stri
         }
 
         i += dataLen;
-        
+
         // If somehow we don't have 4 bytes left to read, then exit with -1
         if ((i + 4) >= source.size())
             return -1;
-        
+
         auto chunkSize = fourBytesToInt (source, i, endianness);
         i += (dataLen + chunkSize);
     }
@@ -1587,9 +1587,9 @@ template <typename SignedType>
 typename std::make_unsigned<SignedType>::type convertSignedToUnsigned (SignedType signedValue)
 {
     static_assert (std::is_signed<SignedType>::value, "The input value must be signed");
-    
+
     typename std::make_unsigned<SignedType>::type unsignedValue = static_cast<typename std::make_unsigned<SignedType>::type> (1) + std::numeric_limits<SignedType>::max();
-    
+
     unsignedValue += signedValue;
     return unsignedValue;
 }
diff --git a/otherarch/ttscpp/include/phonemizer.h b/otherarch/ttscpp/include/phonemizer.h
index 6167a6818..0e401de74 100644
--- a/otherarch/ttscpp/include/phonemizer.h
+++ b/otherarch/ttscpp/include/phonemizer.h
@@ -12,7 +12,7 @@
 #include <unordered_map>
 #include <map>
 #include <unordered_set>
-#include "tokenizer.h"
+#include "ttstokenizer.h"
 #include <algorithm>
 #include <mutex>
 
@@ -33,16 +33,16 @@ static const std::unordered_set<std::string> ONE_LETTER_WORDS = {
 	"i",
 };
 /*
- * The two letter and three letter words listed below have been filtered down from the complete list of english two and three letter words 
+ * The two letter and three letter words listed below have been filtered down from the complete list of english two and three letter words
  * via several criteria:
  *   1. All non-EN-US words have been removed
  * 	 2. All three letter acronyms have been removed (as these lists are used to identify acronyms)
- *   3. All archaic, deprecated, or poetic words have been removed. 
- * 	 4. All literary, abbreviative, and slang words have been removed if they see no more than a mean of 30 daily searches via google (over the 
- *	 last 10 years). 
- * 
- * After the lists were filtered by the criteria described above, removed items were reviewed. Any item which had entered the common EN-US 
- * vernacular but was not identified as of American origin was reintroduced into the sets below. 
+ *   3. All archaic, deprecated, or poetic words have been removed.
+ * 	 4. All literary, abbreviative, and slang words have been removed if they see no more than a mean of 30 daily searches via google (over the
+ *	 last 10 years).
+ *
+ * After the lists were filtered by the criteria described above, removed items were reviewed. Any item which had entered the common EN-US
+ * vernacular but was not identified as of American origin was reintroduced into the sets below.
  */
 static const std::unordered_set<std::string> TWO_LETTER_WORDS = {
 	"ab", "ah", "am", "an", "as", "at", "aw", "ax", "ay", "be", "bo", "br",
@@ -50,7 +50,7 @@ static const std::unordered_set<std::string> TWO_LETTER_WORDS = {
 	"id", "if", "in", "is", "it", "la", "lo", "ma", "me", "mm", "my", "na",
 	"no", "of", "oh", "oi", "on", "oo", "or", "ow", "ox", "oy", "pa", "qi",
 	"re", "sh", "so", "to", "uh", "um", "un", "up", "us", "we", "wo", "ya",
-	"ye", "yo", 
+	"ye", "yo",
 };
 static const std::unordered_set<std::string> THREE_LETTER_WORDS = {
 	"aah", "abs", "aby", "ace", "ach", "ack", "act", "add", "ado", "ads", "aft", "age",
@@ -292,7 +292,7 @@ static std::string STOPPING_TOKENS = ".,:;!?";
 
 #ifdef ESPEAK_INSTALL
 /**
- * espeak-ng uses globals to persist and manage its state so it is not compatible with 
+ * espeak-ng uses globals to persist and manage its state so it is not compatible with
  * threaded parallelism (https://github.com/espeak-ng/espeak-ng/issues/1527).
  * This singleton acts as a mutex wrapped provider for all espeak phonemization methods such
  * that multiple instances of the kokoro_runner can be initialized and called in parallel.
@@ -323,7 +323,7 @@ public:
 #endif
 
 enum lookup_code {
-	SUCCESS = 100,
+	SUCCESS_TOTAL = 100,
 	SUCCESS_PARTIAL = 101,
 	FAILURE_UNFOUND = 200,
 	FAILURE_PHONETIC = 201,
@@ -368,7 +368,7 @@ struct conditions {
 	void update_for_word(std::string word,bool allow_for_upper_check = true);
 };
 
-/* 
+/*
  * The corpus struct is simply a small wrapper class that is used to perform simple look forward and backwards in the text
  * which is being phonemized. This can be used to discern how to convert chunks of text in a consistent and protective fashion
  * in order to accurately phonemize complicated text.
@@ -376,7 +376,7 @@ struct conditions {
 struct corpus {
 	corpus(const char * text, size_t size): size(size), text(text) {};
 	size_t location = 0;
-	size_t size; 
+	size_t size;
 	const char * text;
 
 	/*
@@ -397,9 +397,9 @@ struct corpus {
 	std::string after_until(int after, std::string val);
 };
 
-/* 
+/*
  * The TTS phonemizer works by splitting each word into distinct graphemes, and for each grapheme the phonemizer will look at the grapheme that came
- * before, after, and for any word specific exceptions in order to compile a 
+ * before, after, and for any word specific exceptions in order to compile a
  */
 struct phonemizer_rule {
 	~phonemizer_rule() {
@@ -436,10 +436,10 @@ private:
 
 struct word_phonemizer * word_phonemizer_from_gguf(gguf_context * meta);
 
-/* 
+/*
  * The general translation approach that espeak uses is to lookup words in the dictionary and return a list of possible matches per lookup.
  * Each match contains flags which describe the match's conditions and limitations and optionally a pronunciation. When a pronunciation is not returned,
- * it usually means that the word needs to be pronounced phonetically, the word belongs to another language, or that the original content is a 
+ * it usually means that the word needs to be pronounced phonetically, the word belongs to another language, or that the original content is a
  * token representation of a different word (e.g. with numbers).
  *
  * Since it does not make sense to have the core lexer reperform this lookup operation with represented words or via distinct languages, those behaviors
@@ -470,7 +470,7 @@ struct phoneme_dictionary {
 
 struct phoneme_dictionary * phoneme_dictionary_from_gguf(gguf_context * meta);
 
-/* 
+/*
  * In general, I would like to avoid requiring the installation of otherwise broad and technically complicated libraries,
  * like espeak, especially when they are only being used for a small portion of their overall functionality. While avoiding these
  * requirements will keep the default installation cost of TTS.cpp down, it is also unlikely that TTS.cpp will support
@@ -478,8 +478,8 @@ struct phoneme_dictionary * phoneme_dictionary_from_gguf(gguf_context * meta);
  * espeak. As such, the phonemizer struct described below will support simple text to IPA phoneme functionality out of the box,
  * while also optionally acting as an interface for espeak phonemization.
  *
- * Phonemization seems to use a pattern close to the common lexer, such that at each index or chunk of text forward and backward context 
- * views are used to support single pass translation. As such, the TTS.cpp phonemization pattern I've decided to implement behaves 
+ * Phonemization seems to use a pattern close to the common lexer, such that at each index or chunk of text forward and backward context
+ * views are used to support single pass translation. As such, the TTS.cpp phonemization pattern I've decided to implement behaves
  * effecively like a simple router lexer. It will only support utf-8 encoded text and english IPA conversion.
  */
 struct phonemizer {
diff --git a/otherarch/ttscpp/include/ttscommon.h b/otherarch/ttscpp/include/ttscommon.h
index c3a1a1c80..df380704e 100644
--- a/otherarch/ttscpp/include/ttscommon.h
+++ b/otherarch/ttscpp/include/ttscommon.h
@@ -28,7 +28,7 @@ const std::map<std::string, tts_arch> SUPPORTED_ARCHITECTURES = {
 	{ "orpheus", ORPHEUS_ARCH }
 };
 
-/// Given a map from keys to values, creates a new map from values to keys 
+/// Given a map from keys to values, creates a new map from values to keys
 template<typename K, typename V>
 static std::map<V, K> reverse_map(const std::map<K, V>& m) {
     std::map<V, K> r;
@@ -43,10 +43,10 @@ const std::map<tts_arch, std::string> ARCHITECTURE_NAMES = reverse_map(SUPPORTED
 struct generation_configuration {
     generation_configuration(
     	std::string voice = "",
-    	int top_k = 50, 
-    	float temperature = 1.0, 
-    	float repetition_penalty = 1.0, 
-    	bool use_cross_attn = true, 
+    	int top_k = 50,
+    	float temperature = 1.0,
+    	float repetition_penalty = 1.0,
+    	bool use_cross_attn = true,
     	std::string espeak_voice_id = "",
     	int max_tokens = 0,
     	float top_p = 1.0,
diff --git a/otherarch/ttscpp/src/dac_model.h b/otherarch/ttscpp/src/dac_model.h
index be43ad02d..f0ae96d03 100644
--- a/otherarch/ttscpp/src/dac_model.h
+++ b/otherarch/ttscpp/src/dac_model.h
@@ -22,13 +22,13 @@ struct dac_quantize_layer {
 // this struct maintains the static tensors for the dac audio decoder graph.
 // As such, this is designed to contain basic configuration and ggml tensor support for DAC.
 // The dac_runner describes how the graph is built and run.
-struct dac_model : tts_model {    
+struct dac_model : tts_model {
     // These configs  are essentially built for the 44khZ 8kbps standard DAC model audio encoder and decoder
     uint32_t n_layers = 4;
     uint32_t n_heads = 9;
     uint32_t up_sampling_factor = 512;
     uint32_t max_generation_size = 2580;
-    
+
     struct ggml_tensor * in_conv_kernel;
     struct ggml_tensor * in_conv_bias;
     struct ggml_tensor * out_conv_kernel;
@@ -53,11 +53,11 @@ void assign_to_audio_encoder(dac_model * model, std::string name, ggml_tensor *
 // the context used for running the dac model
 struct dac_context : runner_context {
     dac_context(dac_model * model, int n_threads): runner_context(n_threads), model(model) {};
-    
+
     struct dac_model * model;
-        
+
     struct ggml_tensor * inp_tokens;
-    
+
     void build_schedule() {
         runner_context::build_schedule(model->max_nodes());
     }
@@ -85,11 +85,11 @@ struct dac_runner : tts_runner {
     }
     dac_model * model;
     dac_context * dctx;
-    
+
     void init_build() {
         tts_runner::init_build(&dctx->buf_compute_meta);
     }
-    
+
     void prepare_post_load();
     struct ggml_cgraph * build_dac_graph(dac_ubatch & batch);
     void run(uint32_t * input_tokens, uint32_t sequence_length, struct tts_response * outputs);
diff --git a/otherarch/ttscpp/src/dia_model.cpp b/otherarch/ttscpp/src/dia_model.cpp
index bd6dfd43a..d7ec2685f 100644
--- a/otherarch/ttscpp/src/dia_model.cpp
+++ b/otherarch/ttscpp/src/dia_model.cpp
@@ -119,7 +119,7 @@ void dia_model::assign_to_decoder_layer(std::string part, dia_decoder_layer * la
         set_tensor(layer->self_attn_norm, tensor);
     } else if (part == "pre_mlp_norm") {
         layer->mlp_norm = ggml_dup_tensor(ctx, tensor);
-        set_tensor(layer->mlp_norm, tensor);    
+        set_tensor(layer->mlp_norm, tensor);
     } else if (part == "pre_ca_norm") {
         layer->cross_attn_norm = ggml_dup_tensor(ctx, tensor);
         set_tensor(layer->cross_attn_norm, tensor);
@@ -151,7 +151,7 @@ void dia_model::prep_layers() {
         dia_decoder_layer * l = new dia_decoder_layer;
         decoder->layers.push_back(l);
     }
-    
+
     decoder->embds.reserve((size_t) n_output_heads);
     decoder->heads.reserve((size_t) n_output_heads);
     for (int i = 0; i < n_output_heads; i++) {
@@ -196,7 +196,7 @@ void dia_model::prep_constants(gguf_context * meta) {
     int encoder_attn_heads_key = gguf_find_key(meta, "dia.encoder.attn_heads");
     if (encoder_attn_heads_key != -1) {
         encoder_attn_heads = gguf_get_val_u32(meta, encoder_attn_heads_key);
-    }    
+    }
 
     int head_size_key = gguf_find_key(meta, "dia.attn_head_size");
     if (head_size_key != -1) {
@@ -271,7 +271,7 @@ struct dia_context * build_new_dia_context(struct dia_model * model, int n_threa
     return dctx;
 }
 
-static bool dia_kv_cache_init(struct dia_kv_cache * cache, dia_model * model, dia_context * dctx) {    
+static bool dia_kv_cache_init(struct dia_kv_cache * cache, dia_model * model, dia_context * dctx) {
     ggml_backend_buffer_type_t buft = nullptr;
     // this will only really support cpu or metal for the time being;
     if (dctx->backend != nullptr) {
@@ -382,7 +382,7 @@ static struct ggml_tensor * build_dia_encoder(ggml_context * ctx, dia_model * mo
     struct ggml_tensor * cur = ggml_reshape_3d(ctx, ggml_get_rows(ctx, model->encoder->embedding, dctx->inp_tokens), model->encoder_hidden_size, model->max_encoder_context_length, 2);
     for (auto layer : model->encoder->layers) {
         struct ggml_tensor * residual = cur;
-        
+
         cur = dia_layer_norm(ctx, cur, layer->self_attn_norm);
         // self-attention
         {
@@ -402,7 +402,7 @@ static struct ggml_tensor * build_dia_encoder(ggml_context * ctx, dia_model * mo
             struct ggml_tensor * kqv_merged = ggml_permute(ctx, kqv, 2, 0, 1, 3);
 
             // It is unclear why the attention ops in Dia's encoder don't project to the embedding dimension size as is standard. Instead they up project to the decoder's embedding dimension
-            // then down project back the the encoder embedding dimension. 
+            // then down project back the the encoder embedding dimension.
             cur = ggml_cont_3d(ctx, kqv_merged, model->decoder_hidden_size, model->max_encoder_context_length, 2);
             cur = ggml_mul_mat(ctx, layer->o, cur);
         }
@@ -443,10 +443,10 @@ static struct ggml_tensor * repeat_interleave_dim1(ggml_context * ctx, struct gg
 static void build_dia_self_kv_store(ggml_context * ctx, dia_context * dctx, dia_model * model, dia_kv_cache * kv, ggml_cgraph * gf, struct ggml_tensor * k, struct ggml_tensor * v, dia_ubatch & batch, int layer_index) {
     int64_t attn_size = model->head_size * model->decoder_attn_heads;
 
-    struct ggml_tensor * k_cache_view = 
+    struct ggml_tensor * k_cache_view =
         ggml_view_2d(
-                ctx, kv->k_l[layer_index], attn_size, 2, 
-                attn_size * model->max_generation_size * ggml_element_size(kv->k_l[layer_index]), 
+                ctx, kv->k_l[layer_index], attn_size, 2,
+                attn_size * model->max_generation_size * ggml_element_size(kv->k_l[layer_index]),
                 attn_size*dctx->current_position*ggml_element_size(kv->k_l[layer_index]));
 
     k = ggml_rope(ctx, ggml_cont(ctx, ggml_reshape_4d(ctx, k, model->head_size, model->decoder_attn_heads / model->decoder_query_heads, batch.sequence_length, 2)), dctx->positions, model->head_size, 2);
@@ -461,8 +461,8 @@ static void build_dia_self_kv_store(ggml_context * ctx, dia_context * dctx, dia_
     struct ggml_tensor * v_cache_view = nullptr;
 
     v_cache_view = ggml_view_2d(
-            ctx, kv->v_l[layer_index], attn_size, 2, 
-            attn_size * model->max_generation_size * ggml_element_size(kv->v_l[layer_index]), 
+            ctx, kv->v_l[layer_index], attn_size, 2,
+            attn_size * model->max_generation_size * ggml_element_size(kv->v_l[layer_index]),
             attn_size*dctx->current_position*ggml_element_size(kv->v_l[layer_index]));
 
     // Since the sequence length should always be 1 here this is the most pertinent time to repeat the heads for grouped query attention.
@@ -476,11 +476,11 @@ static void build_dia_self_kv_store(ggml_context * ctx, dia_context * dctx, dia_
 static void build_dia_cross_kv_store(ggml_context * ctx, dia_context * dctx, dia_model * model, dia_kv_cache * kv, ggml_cgraph * gf, struct ggml_tensor * encoder_hidden_states, int layer_index) {
     dia_decoder_layer * layer = model->decoder->layers[layer_index];
     struct ggml_tensor * encoder_states_key_view = ggml_cont(ctx, ggml_view_3d(
-        ctx, 
-        encoder_hidden_states, 
-        model->encoder_hidden_size, 
-        dctx->prompt_size, 
-        2, 
+        ctx,
+        encoder_hidden_states,
+        model->encoder_hidden_size,
+        dctx->prompt_size,
+        2,
         model->encoder_hidden_size * ggml_element_size(encoder_hidden_states), model->encoder_hidden_size * model->max_encoder_context_length * ggml_element_size(encoder_hidden_states), 0));
 
     struct ggml_tensor * k = ggml_mul_mat(ctx, layer->cross_attn_k, encoder_states_key_view);
@@ -491,8 +491,8 @@ static void build_dia_cross_kv_store(ggml_context * ctx, dia_context * dctx, dia
 
     struct ggml_tensor * k_cache_view =
         ggml_view_4d(
-                ctx, kv->cross_k_l[layer_index], model->head_size, model->decoder_attn_heads, 2, dctx->prompt_size, 
-                model->head_size*ggml_element_size(kv->cross_k_l[layer_index]), 
+                ctx, kv->cross_k_l[layer_index], model->head_size, model->decoder_attn_heads, 2, dctx->prompt_size,
+                model->head_size*ggml_element_size(kv->cross_k_l[layer_index]),
                 model->head_size*model->decoder_attn_heads*ggml_element_size(kv->cross_k_l[layer_index]),
                 model->head_size*model->decoder_attn_heads*2*ggml_element_size(kv->cross_k_l[layer_index]),
                 0);
@@ -504,10 +504,10 @@ static void build_dia_cross_kv_store(ggml_context * ctx, dia_context * dctx, dia
 
     struct ggml_tensor * v_cache_view =
         ggml_view_4d(
-                ctx, kv->cross_v_l[layer_index], model->max_encoder_context_length, model->head_size, model->decoder_attn_heads, 2, 
-                model->max_encoder_context_length*ggml_element_size(kv->cross_v_l[layer_index]), 
-                model->head_size*model->max_encoder_context_length*ggml_element_size(kv->cross_v_l[layer_index]), 
-                model->head_size*model->max_encoder_context_length*model->decoder_attn_heads*ggml_element_size(kv->cross_v_l[layer_index]), 
+                ctx, kv->cross_v_l[layer_index], model->max_encoder_context_length, model->head_size, model->decoder_attn_heads, 2,
+                model->max_encoder_context_length*ggml_element_size(kv->cross_v_l[layer_index]),
+                model->head_size*model->max_encoder_context_length*ggml_element_size(kv->cross_v_l[layer_index]),
+                model->head_size*model->max_encoder_context_length*model->decoder_attn_heads*ggml_element_size(kv->cross_v_l[layer_index]),
                 0);
 
     ggml_build_forward_expand(gf, ggml_cpy(ctx, v, v_cache_view));
@@ -515,11 +515,11 @@ static void build_dia_cross_kv_store(ggml_context * ctx, dia_context * dctx, dia
 
 static struct ggml_tensor * build_dia_decoder(
         ggml_cgraph * gf,
-        ggml_context * ctx, 
-        dia_model * model, 
-        dia_context * dctx, 
-        dia_kv_cache * cache, 
-        dia_ubatch & batch, 
+        ggml_context * ctx,
+        dia_model * model,
+        dia_context * dctx,
+        dia_kv_cache * cache,
+        dia_ubatch & batch,
         struct ggml_tensor * encoder_hidden_states) {
     dctx->positions = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, batch.sequence_length);
     ggml_set_input(dctx->positions);
@@ -528,7 +528,7 @@ static struct ggml_tensor * build_dia_decoder(
     for (int l = 0; l < model->decoder->layers.size(); l++){
         dia_decoder_layer * layer = model->decoder->layers[l];
         struct ggml_tensor * residual = cur;
-        
+
         cur = dia_layer_norm(ctx, cur, layer->self_attn_norm);
         // self-attention
         {
@@ -546,13 +546,13 @@ static struct ggml_tensor * build_dia_decoder(
                         0);
             k = ggml_cont(ctx, ggml_permute(ctx, k, 0, 2, 1, 3));
 
-            struct ggml_tensor * v = 
+            struct ggml_tensor * v =
                 ggml_view_3d(ctx, cache->v_l[l],
                         model->head_size * model->decoder_attn_heads, dctx->current_position + 1, 2,
                         ggml_element_size(cache->v_l[l]) * model->decoder_attn_heads * model->head_size,
                         ggml_element_size(cache->v_l[l]) * model->decoder_attn_heads * model->head_size * model->max_generation_size,
                         0);
-            v = ggml_cont_4d(ctx, ggml_transpose(ctx, v), dctx->current_position + 1, model->head_size, model->decoder_attn_heads, 2); 
+            v = ggml_cont_4d(ctx, ggml_transpose(ctx, v), dctx->current_position + 1, model->head_size, model->decoder_attn_heads, 2);
 
             // As noted in the encoder Dia uses the Neo-X protocol for RoPE.
             Qcur = ggml_rope(ctx, ggml_cont(ctx, ggml_reshape_4d(ctx, Qcur, model->head_size, model->decoder_attn_heads, batch.sequence_length, 2)), dctx->positions, model->head_size, 2);
@@ -583,22 +583,22 @@ static struct ggml_tensor * build_dia_decoder(
                 build_dia_cross_kv_store(ctx, dctx, model, cache, gf, encoder_hidden_states, l);
             }
 
-            struct ggml_tensor * cross_k = 
+            struct ggml_tensor * cross_k =
                 ggml_view_4d(
                         ctx, cache->cross_k_l[l], model->head_size, model->decoder_attn_heads, 2,
-                        model->max_encoder_context_length, model->head_size*ggml_element_size(cache->cross_k_l[l]), 
-                        model->head_size*model->decoder_attn_heads*ggml_element_size(cache->cross_k_l[l]), 
-                        model->head_size*model->decoder_attn_heads*2*ggml_element_size(cache->cross_k_l[l]),                 
+                        model->max_encoder_context_length, model->head_size*ggml_element_size(cache->cross_k_l[l]),
+                        model->head_size*model->decoder_attn_heads*ggml_element_size(cache->cross_k_l[l]),
+                        model->head_size*model->decoder_attn_heads*2*ggml_element_size(cache->cross_k_l[l]),
                         0);
             // the double permute operation shouldn't be necessary here, but it seems that currently ggml permute only currently alows for a single
             // axis pair to be transposed.
             cross_k = ggml_cont(ctx, ggml_permute(ctx, ggml_permute(ctx, cross_k, 0, 1, 3, 2), 0, 2, 1, 3));
 
-            struct ggml_tensor * cross_v = 
+            struct ggml_tensor * cross_v =
                 ggml_cont(ctx, ggml_view_4d(
                         ctx, cache->cross_v_l[l], model->max_encoder_context_length, model->head_size, model->decoder_attn_heads, 2,
-                        model->max_encoder_context_length*ggml_element_size(cache->cross_v_l[l]), 
-                        model->head_size*model->max_encoder_context_length*ggml_element_size(cache->cross_v_l[l]), 
+                        model->max_encoder_context_length*ggml_element_size(cache->cross_v_l[l]),
+                        model->head_size*model->max_encoder_context_length*ggml_element_size(cache->cross_v_l[l]),
                         model->head_size*model->max_encoder_context_length*model->decoder_attn_heads*ggml_element_size(cache->cross_v_l[l]),
                         0));
 
@@ -637,10 +637,10 @@ static struct ggml_tensor * build_dia_decoder(
 }
 
 void dia_runner::tokenize_sentence(std::string sentence, dia_ubatch & batch) {
-    // Dia's tokenization process is unusual. Essentially Dia takes the byte value for each character and uses that as 
-    // a token array. Additionally, because Dia performs a cfg-scale adjustment before sampling tokens, it is necessary to 
+    // Dia's tokenization process is unusual. Essentially Dia takes the byte value for each character and uses that as
+    // a token array. Additionally, because Dia performs a cfg-scale adjustment before sampling tokens, it is necessary to
     // generate with a conditioned context (i.e. with the text) and an unconditioned context (i.e. without any text) so that
-    // proper adjustments can be perfored at each generation step. This means that we need to pad the end of our tokens to the 
+    // proper adjustments can be perfored at each generation step. This means that we need to pad the end of our tokens to the
     // max context size for both the conditional and unconditional sequence.
 
     // if the sentence isn't prepended by dialogue start tokens, [S1] or [S2], then append one.
@@ -699,7 +699,7 @@ dia_ubatch dia_runner::batch_from_sentence(std::string sentence) {
  * 1.  Dia cleans its output generation by adding the difference between its text based output (its conditional output) and its unconditional output
  *     to the conditional ouput before sampling. This is why the batch is set to two throughout the graph.
  *
- * 2.  Dia's decoder attends across the entire encoded space including the pad buffer which receives a unique attention mask. This is why the 
+ * 2.  Dia's decoder attends across the entire encoded space including the pad buffer which receives a unique attention mask. This is why the
  *     encoder sequence is always max length.
  */
 struct ggml_cgraph * dia_runner::build_dia_graph(dia_ubatch & batch) {
@@ -716,7 +716,7 @@ struct ggml_cgraph * dia_runner::build_dia_graph(dia_ubatch & batch) {
     ggml_set_name(cur, "decoder_output");
     ggml_build_forward_expand(gf, cur);
     free_build();
-    
+
     return gf;
 }
 
@@ -758,11 +758,11 @@ int dia_runner::decode(dia_ubatch & batch) {
         dctx->output_tokens.reserve(dctx->max_generation_size * model->n_output_heads);
     }
     ggml_backend_sched_reset(dctx->sched);
-        
+
     const size_t logits_size = model->output_vocab_size * dctx->max_generation_size * model->n_output_heads;
     const size_t prev_size = dctx->buf_output ? ggml_backend_buffer_get_size(dctx->buf_output) : 0;
     const size_t new_size  = logits_size * sizeof(float);
-    
+
     if (!dctx->buf_output || prev_size < new_size) {
         if (dctx->buf_output) {
             ggml_backend_buffer_free(dctx->buf_output);
@@ -772,7 +772,7 @@ int dia_runner::decode(dia_ubatch & batch) {
 
         dctx->buf_output = ggml_backend_buft_alloc_buffer(dctx->backend_cpu_buffer, new_size);
     }
-    
+
     dctx->logits = (float *) ggml_backend_buffer_get_base(dctx->buf_output);
 
     ggml_cgraph * gf = build_dia_graph(batch);
@@ -817,7 +817,7 @@ bool dia_runner::check_stopping(dia_ubatch & batch) {
     if (dctx->delay_steps == -1 && (batch.audio_tokens[0] == model->eos_token_id || dctx->current_position >= dctx->max_generation_size - model->max_delay)) {
         dctx->delay_steps = model->max_delay;
     }
-    
+
     if (dctx->delay_steps > 0) {
         int step_after_eos = model->max_delay - dctx->delay_steps;
         for (int i = 0; i < model->delay_pattern.size(); i++) {
@@ -907,5 +907,5 @@ void dia_runner::assign_weight(std::string name, ggml_tensor * tensor) {
         dac_runner->model->assign_weight(name.substr(14), tensor);
     } else {
         model->assign_weight(name, tensor);
-    }   
+    }
 }
diff --git a/otherarch/ttscpp/src/dia_model.h b/otherarch/ttscpp/src/dia_model.h
index bdca91d8c..6936b3945 100644
--- a/otherarch/ttscpp/src/dia_model.h
+++ b/otherarch/ttscpp/src/dia_model.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include "dac_model.h"
-#include "sampler.h"
+#include "ttssampler.h"
 
 struct dia_encoder_layer {
     struct ggml_tensor * k;
@@ -22,7 +22,7 @@ struct dia_decoder_layer {
     struct ggml_tensor * self_attn_v;
     struct ggml_tensor * self_attn_o;
     struct ggml_tensor * self_attn_norm;
-    
+
     struct ggml_tensor * cross_attn_k;
     struct ggml_tensor * cross_attn_q;
     struct ggml_tensor * cross_attn_v;
@@ -76,7 +76,7 @@ struct dia_model : tts_model {
 
     dia_encoder * encoder;
     dia_decoder * decoder;
-    
+
     void assign_weight(std::string name, ggml_tensor * tensor);
     void assign_to_encoder(std::vector<std::string> parts, struct ggml_tensor * tensor, std::string name);
     void assign_to_decoder(std::vector<std::string> parts, struct ggml_tensor * tensor, std::string name);
@@ -103,15 +103,15 @@ struct dia_context : runner_context {
     uint32_t max_generation_size; // this is set by the generation context or defaults to the config set on dia model.
 
     std::vector<uint32_t> output_tokens;
-    struct dia_model * model;    
-    
+    struct dia_model * model;
+
     struct ggml_tensor * inp_tokens;
     struct ggml_tensor * audio_inp_tokens;
     struct ggml_tensor * positions;
     struct ggml_tensor * encode_positions;
     struct ggml_tensor * encode_attn_mask;
     struct ggml_tensor * cross_attn_mask;
-    
+
     void build_schedule() {
         runner_context::build_schedule(model->max_nodes());
     }
@@ -126,11 +126,11 @@ struct dia_kv_cache {
 
     std::vector<struct ggml_tensor *> k_l;
     std::vector<struct ggml_tensor *> v_l;
-    
+
     struct ggml_context * ctx;
     ggml_backend_buffer_type_t buft;
     ggml_backend_buffer_t buf;
-    
+
     void free() {
         ggml_free(ctx);
         ggml_backend_buffer_free(buf);
diff --git a/otherarch/ttscpp/src/general_neural_audio_codec.h b/otherarch/ttscpp/src/general_neural_audio_codec.h
index 1ec0a42b7..97180e49a 100644
--- a/otherarch/ttscpp/src/general_neural_audio_codec.h
+++ b/otherarch/ttscpp/src/general_neural_audio_codec.h
@@ -53,7 +53,7 @@ namespace general_neural_audio_codec {
 
         uint32_t padding;
         uint32_t stride;
-        
+
         std::vector<residual_unit> residual_blocks;
     };
 
diff --git a/otherarch/ttscpp/src/kokoro_model.h b/otherarch/ttscpp/src/kokoro_model.h
index b4f4f9671..7ffa9eba6 100644
--- a/otherarch/ttscpp/src/kokoro_model.h
+++ b/otherarch/ttscpp/src/kokoro_model.h
@@ -3,11 +3,11 @@
 
 #include <stdlib.h>
 #include "tts_model.h"
-#include "tokenizer.h"
+#include "ttstokenizer.h"
 #include "phonemizer.h"
 
 // Rather than using ISO 639-2 language codes, Kokoro voice pack specify their corresponding language via their first letter.
-// Below is a map that describes the relationship between those designations and espeak-ng's voice identifiers so that the 
+// Below is a map that describes the relationship between those designations and espeak-ng's voice identifiers so that the
 // appropriate phonemization protocol can inferred from the Kokoro voice.
 static std::map<char, std::string> KOKORO_LANG_TO_ESPEAK_ID = {
 	{'a', "gmw/en-US"},
@@ -22,7 +22,7 @@ static std::map<char, std::string> KOKORO_LANG_TO_ESPEAK_ID = {
 };
 
 struct lstm_cell {
-	std::vector<ggml_tensor*> weights; 
+	std::vector<ggml_tensor*> weights;
 	std::vector<ggml_tensor*> biases;
 	std::vector<ggml_tensor*> reverse_weights;
 	std::vector<ggml_tensor*> reverse_biases;
@@ -197,8 +197,8 @@ struct kokoro_model : tts_model {
 	// standard configuration for duration prediction
 	uint32_t f0_n_blocks = 3;
 	uint32_t n_duration_prediction_layers = 3;
-	// while it is technically possible for the duration predictor to assign 50 values per token there is no practical need to 
-	// allocate that many items to the sequence as it is impossible for all tokens to require such long durations and each 
+	// while it is technically possible for the duration predictor to assign 50 values per token there is no practical need to
+	// allocate that many items to the sequence as it is impossible for all tokens to require such long durations and each
 	// allocation increases node allocation size by O(N)
 	uint32_t max_duration_per_token = 20;
 	uint32_t style_half_size = 128;
@@ -221,7 +221,7 @@ struct kokoro_model : tts_model {
 	float noise_std = 0.003f;
 	float voice_threshold = 10.0f;
 	float sample_rate = 24000.0f;
-	std::string window = "hann"; 
+	std::string window = "hann";
 
 	// It is really annoying that ggml doesn't allow using non ggml tensors as the operator for simple math ops.
 	// This is just the constant defined above as a tensor.
@@ -259,7 +259,7 @@ struct kokoro_model : tts_model {
 	// Decoding and Generation portion of the model
 	struct kokoro_decoder * decoder;
 
-	// the default hidden states need to be initialized 
+	// the default hidden states need to be initialized
 	std::vector<lstm*> lstms;
 
 	size_t duration_node_counter = 0;
@@ -317,15 +317,15 @@ struct kokoro_duration_context : runner_context {
     ~kokoro_duration_context() {
         ggml_backend_buffer_free(buf_len_output);
     }
-    
+
     std::string voice = "af_alloy";
     struct kokoro_model * model;
     ggml_backend_buffer_t buf_len_output = nullptr;
 
-    
+
     size_t  logits_size = 0; // capacity (of floats) for logits
     float * lens 		= nullptr;
-    
+
     struct ggml_tensor * inp_tokens;
     struct ggml_tensor * positions;
     struct ggml_tensor * attn_mask;
@@ -356,7 +356,7 @@ struct kokoro_duration_response {
 };
 
 // This struct is intended to manage graph and compute for the duration prediction portion of the kokoro model.
-// Duration computation and speech generation are separated into distinct graphs because the precomputed graph structure of ggml doesn't 
+// Duration computation and speech generation are separated into distinct graphs because the precomputed graph structure of ggml doesn't
 // support the tensor dependent views that would otherwise be necessary.
 struct kokoro_duration_runner : tts_runner {
     kokoro_duration_runner(kokoro_model * model, kokoro_duration_context * context, single_pass_tokenizer * tokenizer): model(model), kctx(context), tokenizer(tokenizer) {};
@@ -375,7 +375,7 @@ struct kokoro_duration_runner : tts_runner {
     void init_build() {
         tts_runner::init_build(&kctx->buf_compute_meta);
     }
-    
+
     void prepare_post_load();
     struct kokoro_ubatch build_worst_case_batch();
     void set_inputs(kokoro_ubatch & batch);
@@ -397,7 +397,7 @@ struct kokoro_context : runner_context {
     }
 
     std::string voice = "af_alloy";
-    
+
     struct kokoro_model * model;
 
     uint32_t total_duration;
@@ -408,7 +408,7 @@ struct kokoro_context : runner_context {
     struct ggml_tensor * duration_mask;
     struct ggml_tensor * window_sq_sum; // needs to be calculatd from the generator window.
     struct ggml_tensor * uv_noise_data;
-    
+
     void build_schedule() {
         runner_context::build_schedule(model->max_gen_nodes()*30);
     }
diff --git a/otherarch/ttscpp/src/orpheus_model.cpp b/otherarch/ttscpp/src/orpheus_model.cpp
index 4866af208..1fba00ee1 100644
--- a/otherarch/ttscpp/src/orpheus_model.cpp
+++ b/otherarch/ttscpp/src/orpheus_model.cpp
@@ -150,7 +150,7 @@ orpheus_context * build_new_orpheus_context(orpheus_model * model, int n_threads
     return octx;
 }
 
-void orpheus_runner::orpheus_kv_cache_init() {    
+void orpheus_runner::orpheus_kv_cache_init() {
     ggml_backend_buffer_type_t buft = nullptr;
     if (octx->backend != nullptr) {
 #ifdef GGML_USE_METAL
@@ -192,21 +192,21 @@ void orpheus_runner::orpheus_kv_cache_init() {
  }
 
  void orpheus_runner::orpheus_build_kv_store(struct ggml_context * ctx, struct ggml_cgraph * graph, struct ggml_tensor * k_cur, struct ggml_tensor * v_cur, int index, uint32_t n_tokens, int repeat) {
-    k_cur = ggml_rope_ext(ctx, ggml_cont(ctx, ggml_reshape_3d(ctx, k_cur, model->head_size, model->n_kv_attn_heads, n_tokens)), octx->positions, model->rope_frequencies, 
+    k_cur = ggml_rope_ext(ctx, ggml_cont(ctx, ggml_reshape_3d(ctx, k_cur, model->head_size, model->n_kv_attn_heads, n_tokens)), octx->positions, model->rope_frequencies,
                 model->head_size, 2,0, 500000.0f,
                 1.0f, 0.0f, 1.0f, 0.0f, 0.0f);
 
     // A performance comparison between this method, i.e. performing 3 incremental copy operations in order to achieve repeat_interleave,
     // and performing the repeat operation upfront before performign a single copy needs to be performed in order to better optimize this function.
-    // Additionally, it might be more performant for the values transposition to be performed prior to appending it to the cache, as it would save us 
+    // Additionally, it might be more performant for the values transposition to be performed prior to appending it to the cache, as it would save us
     // from incrementally larger transpositions with generation.
     for (int i = 0; i < repeat; i++) {
         struct ggml_tensor * k_cache_view = ggml_view_3d(
-            ctx, 
-            kv_self->k_l[index], 
+            ctx,
+            kv_self->k_l[index],
             model->head_size,
             model->n_kv_attn_heads,
-            n_tokens, 
+            n_tokens,
             ggml_element_size(kv_self->k_l[index]) * model->head_size * repeat,
             ggml_element_size(kv_self->k_l[index]) * model->n_attn_heads * model->head_size,
             ggml_element_size(kv_self->k_l[index]) * model->n_attn_heads * model->head_size * octx->current_position + i * ggml_element_size(kv_self->k_l[index]) * model->head_size
@@ -230,19 +230,19 @@ void orpheus_runner::orpheus_kv_cache_init() {
 struct ggml_cgraph * orpheus_runner::build_orpheus_graph(orpheus_ubatch & batch) {
     init_build();
     struct ggml_cgraph * gf = ggml_new_graph_custom(ctx, 8192, false);
-    
+
     struct ggml_tensor * cur;
     struct ggml_tensor * inpL;
-    
+
     const int32_t full_sequence_length = octx->current_position + (uint32_t) batch.n_tokens;
     octx->positions = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, batch.n_tokens);
     ggml_set_input(octx->positions);
     octx->inp_tokens = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, batch.n_tokens);
     ggml_set_input(octx->inp_tokens);
     inpL = ggml_get_rows(ctx, model->embd, octx->inp_tokens);
-    
+
     struct ggml_tensor * KQ_mask_dec = build_attn_mask(ctx, octx, batch);
-    
+
     for (int l = 0; l < model->n_layers; l++) {
         struct ggml_tensor * residual = inpL;
         cur = orpheus_build_layer_norm(ctx, inpL, model->layers[l].input_norm);
@@ -261,8 +261,8 @@ struct ggml_cgraph * orpheus_runner::build_orpheus_graph(orpheus_ubatch & batch)
                         model->head_size, full_sequence_length, model->n_attn_heads,
                         ggml_element_size(kv_self->k_l[l]) * model->n_attn_heads * model->head_size,
                         ggml_element_size(kv_self->k_l[l]) * model->head_size,
-                        0));            
-            
+                        0));
+
             struct ggml_tensor * v =
                 ggml_view_2d(ctx, kv_self->v_l[l],
                         model->hidden_size, full_sequence_length,
@@ -272,7 +272,7 @@ struct ggml_cgraph * orpheus_runner::build_orpheus_graph(orpheus_ubatch & batch)
             v = ggml_cont_3d(ctx, ggml_transpose(ctx, v), full_sequence_length, model->head_size, model->n_attn_heads);
 
             Qcur = ggml_rope_ext(
-                ctx, ggml_cont(ctx, ggml_reshape_3d(ctx, Qcur, model->head_size, model->n_attn_heads, batch.n_tokens)), 
+                ctx, ggml_cont(ctx, ggml_reshape_3d(ctx, Qcur, model->head_size, model->n_attn_heads, batch.n_tokens)),
                 octx->positions, model->rope_frequencies, model->head_size, 2, 0, 500000.0f, // rope theta
                 1.0f, 0.0f, 1.0f, 0.0f, 0.0f);
 
@@ -286,7 +286,7 @@ struct ggml_cgraph * orpheus_runner::build_orpheus_graph(orpheus_ubatch & batch)
         }
 
         cur = ggml_add(ctx, attn_out, residual);
-        
+
         struct ggml_tensor * residualffn = cur;
 
         // mlp
@@ -298,7 +298,7 @@ struct ggml_cgraph * orpheus_runner::build_orpheus_graph(orpheus_ubatch & batch)
         cur = ggml_add(ctx, cur, residualffn);
         inpL = cur;
     }
-    
+
     cur = orpheus_build_layer_norm(ctx, cur, model->output_norm);
     // only about 40k of the output head is actually uses for generation purposes. Ideally the head tensor should be shrunk and sampled tokens should be incremented.
     cur = ggml_mul_mat(ctx, model->head, cur);
@@ -307,15 +307,15 @@ struct ggml_cgraph * orpheus_runner::build_orpheus_graph(orpheus_ubatch & batch)
     }
     ggml_build_forward_expand(gf, cur);
     free_build();
-    
+
     return gf;
 }
 
 void orpheus_runner::decode(orpheus_ubatch & batch) {
     ggml_backend_sched_reset(octx->sched);
-    
+
     octx->output_tokens.reserve(model->max_generation_size);
-    
+
     const size_t new_size  = model->vocab_size * model->max_generation_size * sizeof(float);
     octx->prep_output_buffer(new_size);
 
@@ -324,10 +324,10 @@ void orpheus_runner::decode(orpheus_ubatch & batch) {
     // the output is always the last tensor in the graph
     struct ggml_tensor * res = gf->nodes[gf->n_nodes - 1];
     ggml_backend_sched_alloc_graph(octx->sched, gf);
-    
+
     set_inputs(batch);
     ggml_backend_sched_graph_compute_async(octx->sched, gf);
- 
+
     float * logits_out = octx->logits + octx->n_outputs * model->vocab_size;
     octx->get_ggml_node_data(res, logits_out, model->vocab_size * sizeof(float));
 
diff --git a/otherarch/ttscpp/src/orpheus_model.h b/otherarch/ttscpp/src/orpheus_model.h
index 9f02d7697..7be342d29 100644
--- a/otherarch/ttscpp/src/orpheus_model.h
+++ b/otherarch/ttscpp/src/orpheus_model.h
@@ -1,7 +1,7 @@
 #pragma once
 
-#include "sampler.h"
-#include "tokenizer.h"
+#include "ttssampler.h"
+#include "ttstokenizer.h"
 #include "snac_model.h"
 
 // Orpheus uses vLLM with a llama-3 architecture. The only critical difference from the normal llama architecture is the use of kv heads.
@@ -73,7 +73,7 @@ struct orpheus_context : runner_context {
     struct ggml_tensor * positions;
 };
 
-struct orpheus_kv_cache {    
+struct orpheus_kv_cache {
     ggml_type cache_type = GGML_TYPE_F32;
 
     std::vector<struct ggml_tensor *> k_l;
@@ -104,11 +104,11 @@ struct orpheus_ubatch {
 
 struct orpheus_runner : tts_runner {
     orpheus_runner(
-            orpheus_model * model, 
-            snac_runner * audio_decoder, 
-            orpheus_context * octx, 
-            bpe_tokenizer * bt, 
-            sampler * samp, 
+            orpheus_model * model,
+            snac_runner * audio_decoder,
+            orpheus_context * octx,
+            bpe_tokenizer * bt,
+            sampler * samp,
             orpheus_kv_cache * cache): model(model), srunner(audio_decoder), octx(octx), tokenizer(bt), generation_sampler(samp), kv_self(cache) {
         tts_runner::sampling_rate = 24000.0f;
         generation_sampler->n_output_heads = 1;
diff --git a/otherarch/ttscpp/src/parler_model.h b/otherarch/ttscpp/src/parler_model.h
index 463910f49..e6c3ba41f 100644
--- a/otherarch/ttscpp/src/parler_model.h
+++ b/otherarch/ttscpp/src/parler_model.h
@@ -2,8 +2,8 @@
 #define parler_model_h
 
 #include "dac_model.h"
-#include "t5_encoder_model.h"
-#include "sampler.h"
+#include "ttst5_encoder_model.h"
+#include "ttssampler.h"
 
 enum parler_tensor {
     PARLER_EMBD,
@@ -38,17 +38,17 @@ struct parler_layer {
     struct ggml_tensor * self_attn_o_proj;
     struct ggml_tensor * self_attn_norm;
     struct ggml_tensor * self_attn_norm_bias;
-    
+
     struct ggml_tensor * attn_k_proj;
     struct ggml_tensor * attn_q_proj;
     struct ggml_tensor * attn_v_proj;
     struct ggml_tensor * attn_o_proj;
     struct ggml_tensor * attn_norm;
     struct ggml_tensor * attn_norm_bias;
-    
+
     struct ggml_tensor * cross_k;
     struct ggml_tensor * cross_v;
-    
+
     struct ggml_tensor * fc1;
     struct ggml_tensor * fc2;
     struct ggml_tensor * final_norm;
@@ -74,18 +74,18 @@ struct parler_tts_model : tts_model {
     uint32_t prompt_vocab_size;
 
     bool use_cross_attn = true;
-    
+
     std::vector<struct ggml_tensor*> embds;
     std::vector<parler_layer*> layers;
     std::vector<struct ggml_tensor*> heads;
-    
+
     struct ggml_tensor * precomputed_input_emb;
     struct ggml_tensor * precomputed_positional_embds;
-    
+
     struct ggml_tensor * layer_norm;
     struct ggml_tensor * layer_norm_bias;
     struct ggml_tensor * prompt_embd;
-    
+
     void assign_weight(std::string name, ggml_tensor * tensor);
     void prep_constants(gguf_context * meta);
     void prep_layers(gguf_context * meta);
@@ -107,21 +107,21 @@ struct parler_context : runner_context {
     std::vector<bool> eos_seen;
 
     bool use_cache = true;
-    
+
     size_t  output_size = 0; // capacity (of tokens positions) for the output buffers
     int32_t n_outputs   = 0; // number of actually-used outputs in the current ubatch or last logical batch
     uint32_t current_position = 0; // current position in the active sequence
     uint32_t prompt_end_position = 0; // the position of the text prompt termination (used for adjusting the cache when incrementally generating)
     int32_t seq_id; // a unique identifier associated with the active sequence.
-    
+
     std::vector<uint32_t> output_tokens;
-    
+
     struct ggml_tensor * inp_tokens;
     struct ggml_tensor * audio_inp_tokens;
     struct ggml_tensor * positions;
     struct ggml_tensor * attn_mask;
     struct ggml_tensor * attn_mask_cross;
-    
+
     void build_schedule() {
         runner_context::build_schedule(model->max_nodes());
     }
@@ -130,17 +130,17 @@ struct parler_context : runner_context {
 
 struct parler_kv_cache {
     int32_t seq_id;
-    
+
     ggml_type type_k = GGML_TYPE_F32;
     ggml_type type_v = GGML_TYPE_F32;
 
     std::vector<struct ggml_tensor *> k_l;
     std::vector<struct ggml_tensor *> v_l;
-    
+
     struct ggml_context * ctx;
     ggml_backend_buffer_type_t buft;
     ggml_backend_buffer_t buf;
-    
+
     void free() {
         ggml_free(ctx);
         ggml_backend_buffer_free(buf);
@@ -152,8 +152,8 @@ struct parler_kv_cache {
 };
 
 struct parler_ubatch {
-    parler_ubatch(bool audio_generation, size_t n_tokens, size_t n_audio_tokens, size_t sequence_length, 
-        uint32_t * tokens, uint32_t * audio_tokens, uint32_t * positions, uint32_t * true_order, 
+    parler_ubatch(bool audio_generation, size_t n_tokens, size_t n_audio_tokens, size_t sequence_length,
+        uint32_t * tokens, uint32_t * audio_tokens, uint32_t * positions, uint32_t * true_order,
         int current_step): audio_generation(audio_generation), n_tokens(n_tokens), n_audio_tokens(n_audio_tokens), sequence_length(sequence_length), tokens(tokens), audio_tokens(audio_tokens), positions(positions), true_order(true_order), current_step(current_step) {};
     parler_ubatch() {};
     bool audio_generation; // whether we are receiving codebook decoded tokens or text tokens
diff --git a/otherarch/ttscpp/src/phonemizer.cpp b/otherarch/ttscpp/src/phonemizer.cpp
index 36da56723..9fc58133c 100644
--- a/otherarch/ttscpp/src/phonemizer.cpp
+++ b/otherarch/ttscpp/src/phonemizer.cpp
@@ -543,7 +543,7 @@ dictionary_response * phoneme_dictionary::lookup(corpus * text, std::string valu
 	}
 	std::vector<dictionary_response*> possibilities = lookup_map.at(value);
 	for (auto possible : possibilities) {
-		if (possible->code == SUCCESS || (possible->code == SUCCESS_PARTIAL && possible->is_match(text, flags))) {
+		if (possible->code == SUCCESS_TOTAL || (possible->code == SUCCESS_PARTIAL && possible->is_match(text, flags))) {
 			return possible;
 		}
 	}
@@ -818,7 +818,7 @@ bool phonemizer::process_word(corpus* text, std::string* output, std::string wor
 			output->append(" ");
 		}
 		flags->update_for_word(word);
-		if (response->code != SUCCESS) {
+		if (response->code != SUCCESS_TOTAL) {
 			word += response->after_match;
 			output->append(response->value);
 			text->size_pop(word.size()+unaccented_size_difference);
@@ -1072,7 +1072,7 @@ dictionary_response * response_from_string(std::string value, std::string key) {
 	bool not_at_start = key[0] == '#';
 	bool not_at_end = key.back() == '#';
     if (!has_spacing) {
-    	dictionary_response * resp = new dictionary_response(SUCCESS, value);
+    	dictionary_response * resp = new dictionary_response(SUCCESS_TOTAL, value);
     	resp->expects_to_be_proceeded_by_number = expects_to_be_proceeded_by_number;
     	resp->not_at_clause_end = not_at_end;
     	resp->not_at_clause_start = not_at_start;
diff --git a/otherarch/ttscpp/src/snac_model.h b/otherarch/ttscpp/src/snac_model.h
index 9450c1b75..8b546dc12 100644
--- a/otherarch/ttscpp/src/snac_model.h
+++ b/otherarch/ttscpp/src/snac_model.h
@@ -4,7 +4,7 @@
 
 // SNAC, Scale Neural Audio Codec, is another neural audio codec much like DAC.
 // The key differences are that it uses grouping in the residual units of its layers,
-// performs a repeat_interleave over the second and third input channels, applies 
+// performs a repeat_interleave over the second and third input channels, applies
 // a noise convolutional layer after input encoding for each layer, and applies
 // an extra convolutional layer before residual layers are applied.
 struct snac_model : tts_model {
@@ -19,7 +19,7 @@ struct snac_model : tts_model {
     uint32_t noise_steps[4] = {8, 64, 256, 512};
     uint32_t noise_steps_sum = 840;
     bool use_noise = true;
-    
+
     struct ggml_tensor * repeat_interleave_buffer;
 
     struct ggml_tensor * in_conv_kernel;
@@ -46,12 +46,12 @@ struct snac_model : tts_model {
 // the context used for running the snac model
 struct snac_context : runner_context {
     snac_context(snac_model * model, int n_threads): runner_context(n_threads), model(model) {};
-    
+
     struct snac_model * model;
-        
+
     struct ggml_tensor * inp_tokens;
     struct ggml_tensor * noise;
-    
+
     void build_schedule() {
         runner_context::build_schedule(model->max_nodes());
     }
@@ -74,11 +74,11 @@ struct snac_runner : tts_runner {
     }
     snac_model * model;
     snac_context * sctx;
-    
+
     void init_build() {
         tts_runner::init_build(&sctx->buf_compute_meta);
     }
-    
+
     void set_inputs(std::vector<std::vector<uint32_t>> & tokens);
     void prepare_post_load();
     struct ggml_cgraph * build_snac_graph(size_t sequence_length);
diff --git a/otherarch/ttscpp/src/args.cpp b/otherarch/ttscpp/src/ttsargs.cpp
similarity index 100%
rename from otherarch/ttscpp/src/args.cpp
rename to otherarch/ttscpp/src/ttsargs.cpp
diff --git a/otherarch/ttscpp/src/sampler.cpp b/otherarch/ttscpp/src/ttssampler.cpp
similarity index 98%
rename from otherarch/ttscpp/src/sampler.cpp
rename to otherarch/ttscpp/src/ttssampler.cpp
index b2f2cc1b7..26340db82 100644
--- a/otherarch/ttscpp/src/sampler.cpp
+++ b/otherarch/ttscpp/src/ttssampler.cpp
@@ -1,4 +1,4 @@
-#include "sampler.h"
+#include "ttssampler.h"
 
 void sampler::sample(float * logits, std::vector<uint32_t> & output_tokens) {
     // assume that we are pointing to the start of the first token output;
@@ -6,7 +6,7 @@ void sampler::sample(float * logits, std::vector<uint32_t> & output_tokens) {
         return max(logits, output_tokens);
     }
     std::vector<uint32_t> max_vals;
-    // the max_head_probs variable is used when top-p is applied but exists to address the case in which top-k and top-p cause the cumulative probability of the nucleus to beless than or 
+    // the max_head_probs variable is used when top-p is applied but exists to address the case in which top-k and top-p cause the cumulative probability of the nucleus to beless than or
     // equal to top_p;
     std::vector<float> max_head_probs;
 
@@ -189,7 +189,7 @@ void sampler::max(float * logits, std::vector<uint32_t> & output_tokens) {
         uint32_t token_id = 0;
         for (uint32_t ii = 0; ii < vocab_size; ii++) {
             float v = *(logits+i*vocab_size+ii);
-            // while repetition penalty will never be used for maximum token selection, it is used for the logarithmic stabilization of 
+            // while repetition penalty will never be used for maximum token selection, it is used for the logarithmic stabilization of
             // the softmax function in which case it is possible for repetition counts to be set.
             if (has_repetition_penalty && last_token_ids[i] == ii) {
                 v /= (pow(repetition_penalty, repetition_counts[i]));
diff --git a/otherarch/ttscpp/src/sampler.h b/otherarch/ttscpp/src/ttssampler.h
similarity index 99%
rename from otherarch/ttscpp/src/sampler.h
rename to otherarch/ttscpp/src/ttssampler.h
index 0b8941e4c..58dae542f 100644
--- a/otherarch/ttscpp/src/sampler.h
+++ b/otherarch/ttscpp/src/ttssampler.h
@@ -21,7 +21,7 @@ struct sampler {
     std::vector<uint32_t> repetition_counts;
     bool do_sample = true;
     bool apply_softmax = true;
-    
+
     void sample(float * logits, std::vector<uint32_t> & output_tokens);
     void softmax(float * logits, std::vector<std::vector<size_t>> picks, std::vector<uint32_t> max_indices);
     void max(float * logits, std::vector<uint32_t> & output_tokens);
diff --git a/otherarch/ttscpp/src/t5_encoder_model.cpp b/otherarch/ttscpp/src/ttst5_encoder_model.cpp
similarity index 99%
rename from otherarch/ttscpp/src/t5_encoder_model.cpp
rename to otherarch/ttscpp/src/ttst5_encoder_model.cpp
index 2dbc7614d..cce9afdf7 100644
--- a/otherarch/ttscpp/src/t5_encoder_model.cpp
+++ b/otherarch/ttscpp/src/ttst5_encoder_model.cpp
@@ -1,4 +1,4 @@
-#include "t5_encoder_model.h"
+#include "ttst5_encoder_model.h"
 
 static const std::map<std::string, t5_tensor> T5_TENSOR_GGUF_LOOKUP = {
     {"t5encoder.token_embd", T5_EMBD},
@@ -139,7 +139,7 @@ void t5_encoder::prep_constants(gguf_context * meta) {
     int bos_token_id_key = gguf_find_key(meta, "tokenizer.ggml.bos_token_id");
     if (bos_token_id_key != -1) {
         bos_token_id = gguf_get_val_u32(meta, bos_token_id_key);
-    }    
+    }
 
     int eos_token_id_key = gguf_find_key(meta, "tokenizer.ggml.eos_token_id");
     if (eos_token_id_key != -1) {
@@ -219,7 +219,7 @@ struct ggml_cgraph * t5_runner::build_t5_graph(t5_ubatch & batch) {
 
     struct ggml_tensor * cur;
     struct ggml_tensor * inpL;
-    
+
     //t5ctx->positions = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, batch.n_tokens);
     //ggml_set_input(t5ctx->positions);
 
@@ -233,7 +233,7 @@ struct ggml_cgraph * t5_runner::build_t5_graph(t5_ubatch & batch) {
 
     struct ggml_tensor * KQ_mask_dec = build_t5_attn_mask(ctx, t5ctx, batch);
     struct ggml_tensor * pos_bias = build_t5_pos_bias(ctx, t5ctx->inp_pos_bucket, model->relative_attn_bias);
-    
+
     for (int l = 0; l < model->n_layers; l++) {
         struct ggml_tensor * residual = inpL;
 
@@ -293,7 +293,7 @@ struct ggml_cgraph * t5_runner::build_t5_graph(t5_ubatch & batch) {
     ggml_build_forward_expand(gf, cur);
 
     free_build();
-    
+
     return gf;
 }
 
@@ -312,7 +312,7 @@ void t5_runner::set_inputs(t5_ubatch & batch) {
         for (int ii = 0; ii < batch.n_tokens; ii++) {
         	int ab_rpos = abs(i - ii);
         	int rpos = i - ii;
-            attn_mask[i*batch.n_tokens + ii] = 0.0f; //ii > i ? -INFINITY : 0.0f; 
+            attn_mask[i*batch.n_tokens + ii] = 0.0f; //ii > i ? -INFINITY : 0.0f;
             pos_bucket[i*batch.n_tokens + ii] = (uint32_t) (rpos > 0 ? n_buckets : 0) + (ab_rpos < max_exact ? ab_rpos : std::min((n_buckets - 1), (max_exact + (int)((log((ab_rpos / max_exact)) / logarithmic_denominator) * max_exact))));
         }
     }
@@ -324,10 +324,10 @@ void t5_runner::run(uint32_t * input_tokens, uint32_t sequence_length, struct tt
     batch.input_tokens = input_tokens;
     batch.n_tokens = sequence_length;
     ggml_backend_sched_reset(t5ctx->sched);
-    
+
     const size_t prev_size = t5ctx->buf_output ? ggml_backend_buffer_get_size(t5ctx->buf_output) : 0;
     const size_t new_size = model->max_context_length * model->output_size * sizeof(float);
-    
+
     if (!t5ctx->buf_output || prev_size < new_size) {
         if (t5ctx->buf_output) {
             ggml_backend_buffer_free(t5ctx->buf_output);
@@ -337,7 +337,7 @@ void t5_runner::run(uint32_t * input_tokens, uint32_t sequence_length, struct tt
 
         t5ctx->buf_output = ggml_backend_buft_alloc_buffer(t5ctx->backend_cpu_buffer, new_size);
     }
-    
+
     outputs->data = (float *) ggml_backend_buffer_get_base(t5ctx->buf_output);
     ggml_backend_buffer_clear(t5ctx->buf_output, 0);
     struct ggml_cgraph * gf = NULL;
diff --git a/otherarch/ttscpp/src/t5_encoder_model.h b/otherarch/ttscpp/src/ttst5_encoder_model.h
similarity index 99%
rename from otherarch/ttscpp/src/t5_encoder_model.h
rename to otherarch/ttscpp/src/ttst5_encoder_model.h
index 9a801873d..eadbf4d55 100644
--- a/otherarch/ttscpp/src/t5_encoder_model.h
+++ b/otherarch/ttscpp/src/ttst5_encoder_model.h
@@ -2,7 +2,7 @@
 #define t5_encoder_model_h
 
 #include "tts_model.h"
-#include "tokenizer.h"
+#include "ttstokenizer.h"
 
 
 enum t5_tensor {
@@ -75,14 +75,14 @@ void assign_to_t5_layer(t5_encoder * model, t5_layer & layer, std::string name,
 
 struct t5_context : runner_context {
     t5_context(t5_encoder * model, int n_threads): runner_context(n_threads), model(model) {};
-    
+
     struct t5_encoder * model;
-    
+
     struct ggml_tensor * inp_tokens;
     struct ggml_tensor * positions;
     struct ggml_tensor * attn_mask;
     struct ggml_tensor * inp_pos_bucket;
-    
+
     void build_schedule() {
         runner_context::build_schedule(model->max_nodes());
     }
@@ -116,7 +116,7 @@ struct t5_runner : tts_runner {
     void init_build() {
         tts_runner::init_build(&t5ctx->buf_compute_meta);
     }
-    
+
     void prepare_post_load();
     struct t5_ubatch build_worst_case_batch();
     void set_inputs(t5_ubatch & batch);
diff --git a/otherarch/ttscpp/src/tokenizer.cpp b/otherarch/ttscpp/src/ttstokenizer.cpp
similarity index 99%
rename from otherarch/ttscpp/src/tokenizer.cpp
rename to otherarch/ttscpp/src/ttstokenizer.cpp
index 9b870d44a..cabac7089 100644
--- a/otherarch/ttscpp/src/tokenizer.cpp
+++ b/otherarch/ttscpp/src/ttstokenizer.cpp
@@ -1,4 +1,4 @@
-#include "tokenizer.h"
+#include "ttstokenizer.h"
 
 void token_trie::add(const std::string & gram, uint32_t token) {
     _add(gram, token, 0);
diff --git a/otherarch/ttscpp/src/tokenizer.h b/otherarch/ttscpp/src/ttstokenizer.h
similarity index 100%
rename from otherarch/ttscpp/src/tokenizer.h
rename to otherarch/ttscpp/src/ttstokenizer.h