mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 01:24:36 +00:00
improvements to model downloader and chat completions adapter loader
This commit is contained in:
parent
3fdbe3351d
commit
6b0756506b
2 changed files with 29 additions and 25 deletions
24
colab.ipynb
24
colab.ipynb
|
@ -48,12 +48,12 @@
|
|||
"source": [
|
||||
"#@title <b>v-- Enter your model below and then click this to start Koboldcpp</b>\r\n",
|
||||
"\r\n",
|
||||
"Model = \"https://huggingface.co/KoboldAI/LLaMA2-13B-Tiefighter-GGUF/resolve/main/LLaMA2-13B-Tiefighter.Q4_K_S.gguf\" #@param [\"https://huggingface.co/KoboldAI/LLaMA2-13B-Tiefighter-GGUF/resolve/main/LLaMA2-13B-Tiefighter.Q4_K_S.gguf\",\"https://huggingface.co/Sao10K/Fimbulvetr-11B-v2-GGUF/resolve/main/Fimbulvetr-11B-v2-Test-14.q4_K_M.gguf\",\"https://huggingface.co/TheBloke/MythoMax-L2-13B-GGUF/resolve/main/mythomax-l2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/ReMM-SLERP-L2-13B-GGUF/resolve/main/remm-slerp-l2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/Xwin-LM-13B-v0.2-GGUF/resolve/main/xwin-lm-13b-v0.2.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/Stheno-L2-13B-GGUF/resolve/main/stheno-l2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/MythoMax-L2-Kimiko-v2-13B-GGUF/resolve/main/mythomax-l2-kimiko-v2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/airoboros-mistral2.2-7B-GGUF/resolve/main/airoboros-mistral2.2-7b.Q4_K_S.gguf\",\"https://huggingface.co/concedo/KobbleTinyV2-1.1B-GGUF/resolve/main/KobbleTiny-Q4_K.gguf\",\"https://huggingface.co/grimjim/kukulemon-7B-GGUF/resolve/main/kukulemon-7B.Q8_0.gguf\"]{allow-input: true}\r\n",
|
||||
"Model = \"https://huggingface.co/KoboldAI/LLaMA2-13B-Tiefighter-GGUF/resolve/main/LLaMA2-13B-Tiefighter.Q4_K_S.gguf\" #@param [\"https://huggingface.co/KoboldAI/LLaMA2-13B-Tiefighter-GGUF/resolve/main/LLaMA2-13B-Tiefighter.Q4_K_S.gguf\",\"https://huggingface.co/KoboldAI/LLaMA2-13B-Estopia-GGUF/resolve/main/LLaMA2-13B-Estopia.Q4_K_S.gguf\",\"https://huggingface.co/Sao10K/Fimbulvetr-11B-v2-GGUF/resolve/main/Fimbulvetr-11B-v2-Test-14.q4_K_M.gguf\",\"https://huggingface.co/TheBloke/MythoMax-L2-13B-GGUF/resolve/main/mythomax-l2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/ReMM-SLERP-L2-13B-GGUF/resolve/main/remm-slerp-l2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/Xwin-LM-13B-v0.2-GGUF/resolve/main/xwin-lm-13b-v0.2.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/Stheno-L2-13B-GGUF/resolve/main/stheno-l2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/MythoMax-L2-Kimiko-v2-13B-GGUF/resolve/main/mythomax-l2-kimiko-v2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/airoboros-mistral2.2-7B-GGUF/resolve/main/airoboros-mistral2.2-7b.Q4_K_S.gguf\",\"https://huggingface.co/concedo/KobbleTinyV2-1.1B-GGUF/resolve/main/KobbleTiny-Q4_K.gguf\",\"https://huggingface.co/grimjim/kukulemon-7B-GGUF/resolve/main/kukulemon-7B.Q8_0.gguf\",\"https://huggingface.co/mradermacher/LemonKunoichiWizardV3-GGUF/resolve/main/LemonKunoichiWizardV3.Q4_K_M.gguf\",\"https://huggingface.co/Lewdiculous/Kunoichi-DPO-v2-7B-GGUF-Imatrix/resolve/main/Kunoichi-DPO-v2-7B-Q4_K_M-imatrix.gguf\",\"https://huggingface.co/mradermacher/L3-8B-Stheno-v3.2-i1-GGUF/resolve/main/L3-8B-Stheno-v3.2.i1-Q4_K_M.gguf\",\"https://huggingface.co/Lewdiculous/Llama-3-Lumimaid-8B-v0.1-OAS-GGUF-IQ-Imatrix/resolve/main/v2-Llama-3-Lumimaid-8B-v0.1-OAS-Q4_K_M-imat.gguf\",\"https://huggingface.co/bartowski/NeuralDaredevil-8B-abliterated-GGUF/resolve/main/NeuralDaredevil-8B-abliterated-Q4_K_M.gguf\",\"https://huggingface.co/bartowski/L3-8B-Lunaris-v1-GGUF/resolve/main/L3-8B-Lunaris-v1-Q4_K_M.gguf\",\"https://huggingface.co/mradermacher/L3-Umbral-Mind-RP-v2.0-8B-GGUF/resolve/main/L3-Umbral-Mind-RP-v2.0-8B.Q4_K_M.gguf\"]{allow-input: true}\r\n",
|
||||
"Layers = 99 #@param [99]{allow-input: true}\r\n",
|
||||
"ContextSize = 4096 #@param [4096] {allow-input: true}\r\n",
|
||||
"#@markdown <hr>\r\n",
|
||||
"LoadLLaVAmmproj = False #@param {type:\"boolean\"}\r\n",
|
||||
"LLaVAmmproj = \"https://huggingface.co/koboldcpp/mmproj/resolve/main/llama-13b-mmproj-v1.5.Q4_1.gguf\" #@param [\"https://huggingface.co/koboldcpp/mmproj/resolve/main/llama-13b-mmproj-v1.5.Q4_1.gguf\",\"https://huggingface.co/koboldcpp/mmproj/resolve/main/mistral-7b-mmproj-v1.5-Q4_1.gguf\",\"https://huggingface.co/koboldcpp/mmproj/resolve/main/llama-7b-mmproj-v1.5-Q4_0.gguf\"]{allow-input: true}\r\n",
|
||||
"LLaVAmmproj = \"https://huggingface.co/koboldcpp/mmproj/resolve/main/llama-13b-mmproj-v1.5.Q4_1.gguf\" #@param [\"https://huggingface.co/koboldcpp/mmproj/resolve/main/llama-13b-mmproj-v1.5.Q4_1.gguf\",\"https://huggingface.co/koboldcpp/mmproj/resolve/main/mistral-7b-mmproj-v1.5-Q4_1.gguf\",\"https://huggingface.co/koboldcpp/mmproj/resolve/main/llama-7b-mmproj-v1.5-Q4_0.gguf\",\"https://huggingface.co/koboldcpp/mmproj/resolve/main/LLaMA3-8B_mmproj-Q4_1.gguf\"]{allow-input: true}\r\n",
|
||||
"VCommand = \"\"\r\n",
|
||||
"#@markdown <hr>\r\n",
|
||||
"LoadImgModel = False #@param {type:\"boolean\"}\r\n",
|
||||
|
@ -87,6 +87,9 @@
|
|||
"!chmod +x ./koboldcpp_linux\r\n",
|
||||
"!apt update\r\n",
|
||||
"!apt install aria2 -y\r\n",
|
||||
"# simple fix for a common URL mistake\r\n",
|
||||
"if \"https://huggingface.co/\" in Model and \"/blob/main/\" in Model: \r\n",
|
||||
" Model = Model.replace(\"/blob/main/\", \"/resolve/main/\")\r\n",
|
||||
"!aria2c -x 10 -o model.gguf --summary-interval=5 --download-result=default --allow-overwrite=true --file-allocation=none $Model\r\n",
|
||||
"if VCommand:\r\n",
|
||||
" !aria2c -x 10 -o vmodel.gguf --summary-interval=5 --download-result=default --allow-overwrite=true --file-allocation=none $LLaVAmmproj\r\n",
|
||||
|
@ -99,21 +102,8 @@
|
|||
}
|
||||
],
|
||||
"metadata": {
|
||||
"accelerator": "GPU",
|
||||
"colab": {
|
||||
"cell_execution_strategy": "setup",
|
||||
"gpuType": "T4",
|
||||
"include_colab_link": true,
|
||||
"private_outputs": true,
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
"language_info": {},
|
||||
"orig_nbformat": 3
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
|
|
30
koboldcpp.py
30
koboldcpp.py
|
@ -3302,16 +3302,27 @@ def main(launch_args,start_server=True):
|
|||
|
||||
# try to read chat completions adapter
|
||||
if args.chatcompletionsadapter:
|
||||
ccadapter_path = None
|
||||
adapt_dir = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'kcpp_adapters')
|
||||
adapt_dir = adapt_dir if os.path.isdir(adapt_dir) else None
|
||||
if isinstance(args.chatcompletionsadapter, str) and os.path.exists(args.chatcompletionsadapter):
|
||||
print(f"Loading Chat Completions Adapter...")
|
||||
with open(args.chatcompletionsadapter, 'r') as f:
|
||||
ccadapter_path = os.path.abspath(args.chatcompletionsadapter)
|
||||
elif isinstance(args.chatcompletionsadapter, str) and adapt_dir:
|
||||
filename = args.chatcompletionsadapter
|
||||
if not filename.endswith(".json"):
|
||||
filename += ".json"
|
||||
premade_adapt_path = os.path.join(adapt_dir,filename)
|
||||
if os.path.exists(premade_adapt_path):
|
||||
ccadapter_path = os.path.abspath(premade_adapt_path)
|
||||
if ccadapter_path:
|
||||
print(f"Loading Chat Completions Adapter: {ccadapter_path}")
|
||||
with open(ccadapter_path, 'r') as f:
|
||||
global chatcompl_adapter
|
||||
chatcompl_adapter = json.load(f)
|
||||
print(f"Chat Completions Adapter Loaded")
|
||||
else:
|
||||
print(f"Warning: Chat Completions Adapter {args.chatcompletionsadapter} invalid or not found.")
|
||||
|
||||
|
||||
if args.model_param and args.model_param!="":
|
||||
if args.model_param.endswith("?download=true"):
|
||||
args.model_param = args.model_param.replace("?download=true","")
|
||||
|
@ -3320,13 +3331,16 @@ def main(launch_args,start_server=True):
|
|||
mdlfilename = os.path.basename(args.model_param)
|
||||
#check if file already exists
|
||||
if mdlfilename:
|
||||
if not os.path.exists(mdlfilename):
|
||||
print(f"Downloading model from external URL at {args.model_param}")
|
||||
subprocess.run(f"curl -fL {args.model_param} -o {mdlfilename}", shell=True, capture_output=True, text=True, check=True, encoding='utf-8')
|
||||
print(f"Download {mdlfilename} completed...", flush=True)
|
||||
if os.path.exists(mdlfilename) and os.path.getsize(mdlfilename) > 10000000: #10MB trigger
|
||||
print(f"Model file {mdlfilename} already exists, not redownloading.")
|
||||
args.model_param = mdlfilename
|
||||
else:
|
||||
print(f"Model file {mdlfilename} already exists, not redownloading.")
|
||||
dl_url = args.model_param
|
||||
if "https://huggingface.co/" in dl_url and "/blob/main/" in dl_url:
|
||||
dl_url = dl_url.replace("/blob/main/", "/resolve/main/")
|
||||
print(f"Downloading model from external URL at {dl_url}")
|
||||
subprocess.run(f"curl -fL {dl_url} -o {mdlfilename}", shell=True, capture_output=True, text=True, check=True, encoding='utf-8')
|
||||
print(f"Download {mdlfilename} completed...", flush=True)
|
||||
args.model_param = mdlfilename
|
||||
|
||||
# sanitize and replace the default vanity name. remember me....
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue