From 86ba1336a9e21339c2bc1b47548b39ec0cb7d324 Mon Sep 17 00:00:00 2001
From: chenxl <chenxl6436@outlook.com>
Date: Thu, 1 Aug 2024 04:01:00 +0000
Subject: [PATCH] [feature] add support for building docker image

---
 Dockerfile                      | 34 +++++++++++++++++++++++++++++++++
 README.md                       | 34 +++++++++++++++++----------------
 doc/en/Docker.md                | 27 ++++++++++++++++++++++++++
 doc/en/deepseek-v2-injection.md |  6 +++++-
 4 files changed, 84 insertions(+), 17 deletions(-)
 create mode 100644 Dockerfile
 create mode 100644 doc/en/Docker.md
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..78264c8
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,34 @@
+FROM node:20.16.0 as web_compile
+WORKDIR /home
+RUN <<EOF
+git clone https://github.com/kvcache-ai/ktransformers.git &&
+cd ktransformers/ktransformers/website/ &&
+npm install @vue/cli &&
+npm run build &&
+rm -rf node_modules
+EOF
+
+
+
+FROM pytorch/pytorch:2.3.1-cuda12.1-cudnn8-devel as compile_server
+WORKDIR /workspace
+COPY --from=web_compile /home/ktransformers /workspace/ktransformers
+RUN <<EOF
+apt update -y &&  apt install -y  --no-install-recommends \
+    git \
+    wget \
+    vim \
+    gcc \
+    g++ \
+    cmake && 
+rm -rf /var/lib/apt/lists/* &&
+cd ktransformers &&
+git submodule init &&
+git submodule update &&
+pip install ninja pyproject numpy &&
+pip install flash-attn &&
+CPU_INSTRUCT=NATIVE  KTRANSFORMERS_FORCE_BUILD=TRUE TORCH_CUDA_ARCH_LIST="8.0;8.6;8.7;8.9" pip install . --no-build-isolation --verbose &&
+pip cache purge
+EOF
+
+ENTRYPOINT [ "/opt/conda/bin/ktransformers" ]
\ No newline at end of file
diff --git a/README.md b/README.md
index d34dffe..1ab12d0 100644
--- a/README.md
+++ b/README.md
@@ -80,30 +80,32 @@ Some preparation:
   ```
 
 <h3>Installation</h3>
-You can install using Pypi:
 
-```
-pip install ktransformers --no-build-isolation
-```
+1. Use a Docker image, see [documentation for Docker](./doc/en/docker.md) 
+2. You can install using Pypi:
 
-Or download source code and compile:
- - init source code 
-  ```sh
-  git clone https://github.com/kvcache-ai/ktransformers.git
-  cd ktransformers
-  git submodule init
-  git submodule update
-  ```
- - [Optional] If you want to run with website, please [compile the website](./doc/en/api/server/website.md) before execute ```bash install.sh```
- - Compile and install
    ```
-   bash install.sh
+   pip install ktransformers --no-build-isolation
    ```
 
+3. Or you can download source code and compile:
+   - init source code 
+     ```sh
+     git clone https://github.com/kvcache-ai/ktransformers.git
+     cd ktransformers
+     git submodule init
+     git submodule update
+     ```
+   - [Optional] If you want to run with website, please [compile the website](./doc/en/api/server/website.md) before execute ```bash install.sh```
+   - Compile and install
+     ```
+     bash install.sh
+     ```
+
 <h3>Local Chat</h3>
 We provide a simple command-line local chat Python script that you can run for testing. 
 
-  > Note that this is a very simple test tool only support one round chat without any memory about last input, if you want to try full ability of the model, you may go to [RESTful API and Web UI](#id_666). We use the DeepSeek-V2-Lite-Chat-GGUF model as an example here. But we alse support other models, you can replace it with any other model that you want to test. 
+  > Note that this is a very simple test tool only support one round chat without any memory about last input, if you want to try full ability of the model, you may go to [RESTful API and Web UI](#id_666). We use the DeepSeek-V2-Lite-Chat-GGUF model as an example here. But we also support other models, you can replace it with any other model that you want to test. 
 
 
 <h4>Run Example</h4>
diff --git a/doc/en/Docker.md b/doc/en/Docker.md
new file mode 100644
index 0000000..0fe9616
--- /dev/null
+++ b/doc/en/Docker.md
@@ -0,0 +1,27 @@
+# Docker
+
+## Prerequisites
+* Docker must be installed and running on your system.
+* Create a folder to store big models & intermediate files (ex. /mnt/models)
+
+## Images
+There are Docker images available for our project：
+
+**Uploading**
+
+## Building docker image locally
+ - Download Dockerfile in [there](../../Dockerfile)
+
+ - finish, execute
+   ```bash
+   docker build  -t approachingai/ktransformers:v0.1.1 .
+   ```
+
+## Usage
+
+Assuming you have the [nvidia-container-toolkit](https://github.com/NVIDIA/nvidia-container-toolkit) that you can use the GPU in a Docker container.
+```
+docker run --gpus all -v /path/to/models:/models -p 10002:10002 approachingai/ktransformers:v0.1.1 --port 10002 --gguf_path /models/path/to/gguf_path --model_path /models/path/to/model_path --web True
+```
+
+More operators you can see in the [readme](../../README.md)
\ No newline at end of file
diff --git a/doc/en/deepseek-v2-injection.md b/doc/en/deepseek-v2-injection.md
index 43359cf..c1ccd39 100644
--- a/doc/en/deepseek-v2-injection.md
+++ b/doc/en/deepseek-v2-injection.md
@@ -43,7 +43,11 @@ In the current version of KTransformers, we utilize Marlin for GPU kernels and l
     <img alt="CPUInfer Performance" src="../assets/cpuinfer.png" width=80%>
   </picture>
 </p>
-
+<p align="center">
+  <picture>
+    <img alt="marlin performance" src="https://github.com/IST-DASLab/marlin/blob/master/assets/sustained.png?raw=true" width=80%>
+  </picture>
+</p>
 
 ### Arithmetic Intensity Guided Offloading