diff --git a/doc/README.md b/doc/README.md index 1e994aa..199b990 100644 --- a/doc/README.md +++ b/doc/README.md @@ -21,6 +21,7 @@ interface, RESTful APIs compliant with OpenAI and Ollama, and even a simplified Our vision for KTransformers is to serve as a flexible platform for experimenting with innovative LLM inference optimizations. Please let us know if you need any other features.

🔥 Updates

+ * **May 14, 2025**: Support Intel Arc GPU ([Tutorial](./en/xpu.md)). * **Apr 9, 2025**: Experimental support for LLaMA 4 models ([Tutorial](./en/llama4.md)). * **Apr 2, 2025**: Support Multi-concurrency. ([Tutorial](./en/balance-serve.md)). diff --git a/doc/en/xpu.md b/doc/en/xpu.md index 4f71ae7..7a03203 100644 --- a/doc/en/xpu.md +++ b/doc/en/xpu.md @@ -41,7 +41,6 @@ Install PyTorch with XPU backend support and [IPEX-LLM](https://github.com/intel pip install --pre --upgrade ipex-llm[xpu_2.6] --extra-index-url https://download.pytorch.org/whl/xpu pip uninstall torch torchvision torchaudio pip install torch==2.7+xpu torchvision torchaudio --index-url https://download.pytorch.org/whl/test/xpu # install torch2.7 -pip install packaging ninja cpufeature numpy pip uninstall intel-opencl-rt dpcpp-cpp-rt ``` diff --git a/ktransformers/models/custom_cache.py b/ktransformers/models/custom_cache.py index 1121b4a..350af73 100644 --- a/ktransformers/models/custom_cache.py +++ b/ktransformers/models/custom_cache.py @@ -293,7 +293,7 @@ class KGQACache(nn.Module): self.v_caches = [] - def load(self, inference_context: "sched_ext.InferenceContext"): + def load(self, inference_context: "sched_ext.InferenceContext"): print(self.config.num_hidden_layers) for i in range(self.config.num_hidden_layers): self.k_caches.append( diff --git a/setup.py b/setup.py index b8f318d..0961d93 100644 --- a/setup.py +++ b/setup.py @@ -229,7 +229,7 @@ class VersionInfo: elif torch.xpu.is_available(): backend_version = f"xpu" else: - raise ValueError("Unsupported backend: CUDA_HOME MUSA_HOME ROCM_HOME all not set.") + raise ValueError("Unsupported backend: CUDA_HOME MUSA_HOME ROCM_HOME all not set and XPU is not available.") package_version = f"{flash_version}+{backend_version}torch{torch_version}{cpu_instruct}" if full_version: return package_version @@ -501,7 +501,7 @@ class CMakeBuild(BuildExtension): elif KTRANSFORMERS_BUILD_XPU: cmake_args += ["-DKTRANSFORMERS_USE_XPU=ON", "-DKTRANSFORMERS_USE_CUDA=OFF"] else: - raise ValueError("Unsupported backend: CUDA_HOME, MUSA_HOME, and ROCM_HOME are not set.") + raise ValueError("Unsupported backend: CUDA_HOME, MUSA_HOME, and ROCM_HOME are not set and XPU is not available.") cmake_args = get_cmake_abi_args(cmake_args) # log cmake_args @@ -628,7 +628,7 @@ elif MUSA_HOME is not None: elif torch.xpu.is_available(): #XPUExtension is not available now. ops_module = None else: - raise ValueError("Unsupported backend: CUDA_HOME and MUSA_HOME are not set.") + raise ValueError("Unsupported backend: CUDA_HOME ROCM_HOME MUSA_HOME are not set and XPU is not available.") if not torch.xpu.is_available(): ext_modules = [