diff --git a/README.md b/README.md index f8c3a3e41..a3f97e505 100644 --- a/README.md +++ b/README.md @@ -176,3 +176,4 @@ when you can't use the precompiled binary directly, we provide an automated buil - Image Generation: [Anything v3](https://huggingface.co/admruul/anything-v3.0/resolve/main/Anything-V3.0-pruned-fp16.safetensors) or [Deliberate V2](https://huggingface.co/Yntec/Deliberate2/resolve/main/Deliberate_v2.safetensors) or [Dreamshaper SDXL](https://huggingface.co/Lykon/dreamshaper-xl-v2-turbo/resolve/main/DreamShaperXL_Turbo_v2_1.safetensors) - Image Recognition MMproj: [Pick the correct one for your model architecture here](https://huggingface.co/koboldcpp/mmproj/tree/main) - Speech Recognition: [Whisper models for Speech-To-Text](https://huggingface.co/koboldcpp/whisper/tree/main) + - Text-To-Speech: [TTS models for Narration](https://huggingface.co/koboldcpp/tts/tree/main) diff --git a/colab.ipynb b/colab.ipynb index 6584d0bf7..9f1bdbe8b 100644 --- a/colab.ipynb +++ b/colab.ipynb @@ -67,6 +67,11 @@ "LoadSpeechModel = False #@param {type:\"boolean\"}\n", "SpeechModel = \"https://huggingface.co/koboldcpp/whisper/resolve/main/whisper-base.en-q5_1.bin\" #@param [\"https://huggingface.co/koboldcpp/whisper/resolve/main/whisper-base.en-q5_1.bin\"]{allow-input: true}\n", "WCommand = \"\"\n", + "#@markdown
\n", + "LoadTTSModel = False #@param {type:\"boolean\"}\n", + "TTSModel = \"https://huggingface.co/koboldcpp/tts/resolve/main/OuteTTS-0.2-500M-Q4_0.gguf\" #@param [\"https://huggingface.co/koboldcpp/tts/resolve/main/OuteTTS-0.2-500M-Q4_0.gguf\"]{allow-input: true}\n", + "WavTokModel = \"https://huggingface.co/koboldcpp/tts/resolve/main/WavTokenizer-Large-75-Q4_0.gguf\" #@param [\"https://huggingface.co/koboldcpp/tts/resolve/main/WavTokenizer-Large-75-Q4_0.gguf\"]{allow-input: true}\n", + "TTSCommand = \"\"\n", "\n", "import os\n", "if not os.path.isfile(\"/opt/bin/nvidia-smi\"):\n", @@ -85,6 +90,10 @@ " WCommand = \"--whispermodel wmodel.bin\"\n", "else:\n", " WCommand = \"\"\n", + "if TTSModel and WavTokModel and LoadTTSModel:\n", + " TTSCommand = \"--ttsmodel ttsmodel.bin --ttswavtokenizer ttswavtok.bin --ttsgpu\"\n", + "else:\n", + " TTSCommand = \"\"\n", "if FlashAttention:\n", " FACommand = \"--flashattention\"\n", "else:\n", @@ -110,7 +119,10 @@ " !aria2c -x 10 -o imodel.gguf --summary-interval=5 --download-result=default --allow-overwrite=true --file-allocation=none $ImgModel\n", "if WCommand:\n", " !aria2c -x 10 -o wmodel.bin --summary-interval=5 --download-result=default --allow-overwrite=true --file-allocation=none $SpeechModel\n", - "!./koboldcpp_linux model.gguf --usecublas 0 mmq --multiuser --gpulayers $Layers --contextsize $ContextSize --websearch --quiet --remotetunnel $FACommand $MPCommand $VCommand $SCommand $WCommand\n" + "if TTSCommand:\n", + " !aria2c -x 10 -o ttsmodel.bin --summary-interval=5 --download-result=default --allow-overwrite=true --file-allocation=none $TTSModel\n", + " !aria2c -x 10 -o ttswavtok.bin --summary-interval=5 --download-result=default --allow-overwrite=true --file-allocation=none $WavTokModel\n", + "!./koboldcpp_linux model.gguf --usecublas 0 mmq --multiuser --gpulayers $Layers --contextsize $ContextSize --websearch --quiet --remotetunnel $FACommand $MPCommand $VCommand $SCommand $WCommand $TTSCommand\n" ] } ],