diff --git a/class.py b/class.py index 82c83b877..a6a148581 100644 --- a/class.py +++ b/class.py @@ -273,7 +273,7 @@ class model_backend(InferenceModel): unbantokens=False, bantokens=None, usemirostat=None, forceversion=0, nommap=self.kcpp_nommap, usemlock=False, noavx2=self.kcpp_noavx2, debugmode=self.kcpp_debugmode, skiplauncher=True, hordeconfig=None, noblas=self.kcpp_noblas, useclblast=self.kcpp_useclblast, usecublas=self.kcpp_usecublas, usevulkan=self.kcpp_usevulkan, gpulayers=self.kcpp_gpulayers, tensor_split=self.kcpp_tensor_split, config=None, - onready='', multiuser=False, foreground=False, preloadstory=None, noshift=False, remotetunnel=False, ssl=False, benchmark=False, nocertify=False, sdconfig=None, mmproj=None) + onready='', multiuser=False, foreground=False, preloadstory=None, noshift=False, remotetunnel=False, ssl=False, benchmark=None, nocertify=False, sdconfig=None, mmproj=None) #koboldcpp.main(kcppargs,False) #initialize library without enabling Lite http server diff --git a/colab.ipynb b/colab.ipynb index 32db3d630..f775f76b9 100644 --- a/colab.ipynb +++ b/colab.ipynb @@ -52,6 +52,11 @@ "Layers = 99 #@param [99]{allow-input: true}\r\n", "ContextSize = 4096 #@param [4096] {allow-input: true}\r\n", "ForceRebuild = False #@param {type:\"boolean\"}\r\n", + "#@markdown
\r\n", + "LoadLLaVAmmproj = False #@param {type:\"boolean\"}\r\n", + "LLaVAmmproj = \"https://huggingface.co/concedo/koboldcpp-mmproj/resolve/main/llama-13b-mmproj-v1.5.Q4_1.gguf\" #@param [\"https://huggingface.co/concedo/koboldcpp-mmproj/resolve/main/llama-13b-mmproj-v1.5.Q4_1.gguf\"]{allow-input: true}\r\n", + "Vcommand = \"\"\r\n", + "#@markdown
\r\n", "LoadImgModel = False #@param {type:\"boolean\"}\r\n", "ImgModel = \"https://huggingface.co/koboldcpp/imgmodel/resolve/main/imgmodel_older_q4_0.gguf\" #@param [\"https://huggingface.co/koboldcpp/imgmodel/resolve/main/imgmodel_older_q4_0.gguf\"]{allow-input: true}\r\n", "SCommand = \"\"\r\n", @@ -67,6 +72,10 @@ "kvers = kvers[0]\r\n", "if ForceRebuild:\r\n", " kvers = \"force_rebuild\"\r\n", + "if LLaVAmmproj and LoadLLaVAmmproj:\r\n", + " Vcommand = \"--mmproj vmodel.gguf\"\r\n", + "else:\r\n", + " SCommand = \"\"\r\n", "if ImgModel and LoadImgModel:\r\n", " SCommand = \"--sdconfig imodel.gguf clamped 4 quant\"\r\n", "else:\r\n", @@ -79,9 +88,11 @@ "!apt update\r\n", "!apt install aria2 -y\r\n", "!aria2c -x 10 -o model.gguf --summary-interval=5 --download-result=default --allow-overwrite=true --file-allocation=none $Model\r\n", + "if VCommand:\r\n", + " !aria2c -x 10 -o vmodel.gguf --summary-interval=5 --download-result=default --allow-overwrite=true --file-allocation=none $LLaVAmmproj\r\n", "if SCommand:\r\n", " !aria2c -x 10 -o imodel.gguf --summary-interval=5 --download-result=default --allow-overwrite=true --file-allocation=none $ImgModel\r\n", - "!python koboldcpp.py model.gguf --usecublas 0 mmq --multiuser --gpulayers $Layers --contextsize $ContextSize --quiet --remotetunnel $SCommand\r\n" + "!python koboldcpp.py model.gguf --usecublas 0 mmq --multiuser --gpulayers $Layers --contextsize $ContextSize --quiet --remotetunnel $Vcommand $SCommand\r\n" ] } ], diff --git a/koboldcpp.py b/koboldcpp.py index 2340b717b..0089dcfef 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -2870,7 +2870,7 @@ def main(launch_args,start_server=True): timer_thread = threading.Timer(1, onready_subprocess) #1 second delay timer_thread.start() - if args.model_param and args.benchmark is not None: + if args.model_param and args.benchmark: from datetime import datetime, timezone global libname start_server = False