koboldcpp/colab.ipynb
2024-03-07 23:56:59 +08:00

107 lines
No EOL
5.6 KiB
Text
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "view-in-github"
},
"source": [
"<a href=\"https://colab.research.google.com/github/LostRuins/koboldcpp/blob/concedo/colab.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "2FCn5tmpn3UV"
},
"source": [
"## Welcome to the Official KoboldCpp Colab Notebook\n",
"It's really easy to get started. Just press the two **Play** buttons below, and then connect to the **Cloudflare URL** shown at the end.\n",
"You can select a model from the dropdown, or enter a **custom URL** to a GGUF model (Example: `https://huggingface.co/KoboldAI/LLaMA2-13B-Tiefighter-GGUF/resolve/main/LLaMA2-13B-Tiefighter.Q4_K_M.gguf`)\n",
"\n",
"**Keep this page open and occationally check for captcha's so that your AI is not shut down**"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "QNaj3u0jn3UW"
},
"outputs": [],
"source": [
"#@title <-- Tap this if you play on Mobile { display-mode: \"form\" }\n",
"%%html\n",
"<b>Press play on the music player to keep the tab alive, then start KoboldCpp below</b><br/>\n",
"<audio autoplay=\"\" src=\"https://raw.githubusercontent.com/KoboldAI/KoboldAI-Client/main/colab/silence.m4a\" loop controls>"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"cellView": "form",
"id": "uJS9i_Dltv8Y"
},
"outputs": [],
"source": [
"#@title <b>v-- Enter your model below and then click this to start Koboldcpp</b>\r\n",
"\r\n",
"Model = \"https://huggingface.co/KoboldAI/LLaMA2-13B-Tiefighter-GGUF/resolve/main/LLaMA2-13B-Tiefighter.Q4_K_M.gguf\" #@param [\"https://huggingface.co/KoboldAI/LLaMA2-13B-Tiefighter-GGUF/resolve/main/LLaMA2-13B-Tiefighter.Q4_K_M.gguf\",\"https://huggingface.co/Sao10K/Fimbulvetr-11B-v2-GGUF/resolve/main/Fimbulvetr-11B-v2-Test-14.q4_K_M.gguf\",\"https://huggingface.co/TheBloke/MythoMax-L2-13B-GGUF/resolve/main/mythomax-l2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/ReMM-SLERP-L2-13B-GGUF/resolve/main/remm-slerp-l2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/Xwin-LM-13B-v0.2-GGUF/resolve/main/xwin-lm-13b-v0.2.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/Stheno-L2-13B-GGUF/resolve/main/stheno-l2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/MythoMax-L2-Kimiko-v2-13B-GGUF/resolve/main/mythomax-l2-kimiko-v2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/airoboros-mistral2.2-7B-GGUF/resolve/main/airoboros-mistral2.2-7b.Q4_K_S.gguf\",\"https://huggingface.co/afrideva/phi-2-uncensored-GGUF/resolve/main/phi-2-uncensored.q3_k_m.gguf\"]{allow-input: true}\r\n",
"Layers = 99 #@param [99]{allow-input: true}\r\n",
"ContextSize = 4096 #@param [4096] {allow-input: true}\r\n",
"ForceRebuild = False #@param {type:\"boolean\"}\r\n",
"LoadImageModel = False \r\n",
"SDModel = \"\"\r\n",
"SDCommand = \"\"\r\n",
"\r\n",
"import os\r\n",
"if not os.path.isfile(\"/opt/bin/nvidia-smi\"):\r\n",
" raise RuntimeError(\"⚠Colab did not give you a GPU due to usage limits, this can take a few hours before they let you back in. Check out https://lite.koboldai.net for a free alternative (that does not provide an API link but can load KoboldAI saves and chat cards) or subscribe to Colab Pro for immediate access.⚠️\")\r\n",
"\r\n",
"%cd /content\r\n",
"!git clone https://github.com/LostRuins/koboldcpp\r\n",
"%cd /content/koboldcpp\r\n",
"kvers = !(cat koboldcpp.py | grep 'KcppVersion = ' | cut -d '\"' -f2)\r\n",
"kvers = kvers[0]\r\n",
"if ForceRebuild:\r\n",
" kvers = \"force_rebuild\"\r\n",
"if SDModel and LoadImageModel:\r\n",
" SDCommand = \"--sdconfig sdmodel.safetensors clamped 4 quant\"\r\n",
"else:\r\n",
" SDCommand = \"\"\r\n",
"!echo Finding prebuilt binary for {kvers}\r\n",
"!wget -O dlfile.tmp https://kcppcolab.concedo.workers.dev/?{kvers} && mv dlfile.tmp koboldcpp_cublas.so\r\n",
"!test -f koboldcpp_cublas.so && echo Prebuilt Binary Exists || echo Prebuilt Binary Does Not Exist\r\n",
"!test -f koboldcpp_cublas.so && echo Build Skipped || make koboldcpp_cublas LLAMA_CUBLAS=1 LLAMA_COLAB=1 LLAMA_PORTABLE=1\r\n",
"!cp koboldcpp_cublas.so koboldcpp_cublas.dat\r\n",
"!apt update\r\n",
"!apt install aria2 -y\r\n",
"!aria2c -x 10 -o model.gguf --summary-interval=5 --download-result=default --allow-overwrite=true --file-allocation=none $Model\r\n",
"if SDCommand:\r\n",
" !aria2c -x 10 -o sdmodel.safetensors --summary-interval=5 --download-result=default --allow-overwrite=true --file-allocation=none $SDModel\r\n",
"!python koboldcpp.py model.gguf --usecublas 0 mmq --multiuser --gpulayers $Layers --contextsize $ContextSize --quiet --remotetunnel $SDCommand\r\n"
]
}
],
"metadata": {
"accelerator": "GPU",
"colab": {
"cell_execution_strategy": "setup",
"gpuType": "T4",
"include_colab_link": true,
"private_outputs": true,
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}