support npu

This commit is contained in:
djw 2025-07-22 10:44:56 +00:00
parent 7d51a13c9b
commit a641aa8063
10 changed files with 3036 additions and 5 deletions

1433
CMakeCache.txt Normal file

File diff suppressed because it is too large Load diff

1433
build_test/CMakeCache.txt Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,76 @@
# Install script for directory: /home/djw/py311_717/ktransformers/csrc/ktransformers_ext
# Set the install prefix
if(NOT DEFINED CMAKE_INSTALL_PREFIX)
set(CMAKE_INSTALL_PREFIX "/usr/local")
endif()
string(REGEX REPLACE "/$" "" CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}")
# Set the install configuration name.
if(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME)
if(BUILD_TYPE)
string(REGEX REPLACE "^[^A-Za-z0-9_]+" ""
CMAKE_INSTALL_CONFIG_NAME "${BUILD_TYPE}")
else()
set(CMAKE_INSTALL_CONFIG_NAME "Debug")
endif()
message(STATUS "Install configuration: \"${CMAKE_INSTALL_CONFIG_NAME}\"")
endif()
# Set the component getting installed.
if(NOT CMAKE_INSTALL_COMPONENT)
if(COMPONENT)
message(STATUS "Install component: \"${COMPONENT}\"")
set(CMAKE_INSTALL_COMPONENT "${COMPONENT}")
else()
set(CMAKE_INSTALL_COMPONENT)
endif()
endif()
# Install shared libraries without execute permission?
if(NOT DEFINED CMAKE_INSTALL_SO_NO_EXE)
set(CMAKE_INSTALL_SO_NO_EXE "1")
endif()
# Is this installation the result of a crosscompile?
if(NOT DEFINED CMAKE_CROSSCOMPILING)
set(CMAKE_CROSSCOMPILING "FALSE")
endif()
# Set path to fallback-tool for dependency-resolution.
if(NOT DEFINED CMAKE_OBJDUMP)
set(CMAKE_OBJDUMP "/usr/bin/objdump")
endif()
if(NOT CMAKE_INSTALL_LOCAL_ONLY)
# Include the install script for the subdirectory.
include("/home/djw/py311_717/ktransformers/build_test/third_party/pybind11/cmake_install.cmake")
endif()
if(NOT CMAKE_INSTALL_LOCAL_ONLY)
# Include the install script for the subdirectory.
include("/home/djw/py311_717/ktransformers/build_test/third_party/llama.cpp/cmake_install.cmake")
endif()
string(REPLACE ";" "\n" CMAKE_INSTALL_MANIFEST_CONTENT
"${CMAKE_INSTALL_MANIFEST_FILES}")
if(CMAKE_INSTALL_LOCAL_ONLY)
file(WRITE "/home/djw/py311_717/ktransformers/build_test/install_local_manifest.txt"
"${CMAKE_INSTALL_MANIFEST_CONTENT}")
endif()
if(CMAKE_INSTALL_COMPONENT)
if(CMAKE_INSTALL_COMPONENT MATCHES "^[a-zA-Z0-9_.+-]+$")
set(CMAKE_INSTALL_MANIFEST "install_manifest_${CMAKE_INSTALL_COMPONENT}.txt")
else()
string(MD5 CMAKE_INST_COMP_HASH "${CMAKE_INSTALL_COMPONENT}")
set(CMAKE_INSTALL_MANIFEST "install_manifest_${CMAKE_INST_COMP_HASH}.txt")
unset(CMAKE_INST_COMP_HASH)
endif()
else()
set(CMAKE_INSTALL_MANIFEST "install_manifest.txt")
endif()
if(NOT CMAKE_INSTALL_LOCAL_ONLY)
file(WRITE "/home/djw/py311_717/ktransformers/build_test/${CMAKE_INSTALL_MANIFEST}"
"${CMAKE_INSTALL_MANIFEST_CONTENT}")
endif()

BIN
build_test/libllamafile.a Normal file

Binary file not shown.

76
cmake_install.cmake Normal file
View file

@ -0,0 +1,76 @@
# Install script for directory: /home/djw/py311_717/ktransformers/csrc/ktransformers_ext
# Set the install prefix
if(NOT DEFINED CMAKE_INSTALL_PREFIX)
set(CMAKE_INSTALL_PREFIX "/usr/local")
endif()
string(REGEX REPLACE "/$" "" CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}")
# Set the install configuration name.
if(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME)
if(BUILD_TYPE)
string(REGEX REPLACE "^[^A-Za-z0-9_]+" ""
CMAKE_INSTALL_CONFIG_NAME "${BUILD_TYPE}")
else()
set(CMAKE_INSTALL_CONFIG_NAME "Release")
endif()
message(STATUS "Install configuration: \"${CMAKE_INSTALL_CONFIG_NAME}\"")
endif()
# Set the component getting installed.
if(NOT CMAKE_INSTALL_COMPONENT)
if(COMPONENT)
message(STATUS "Install component: \"${COMPONENT}\"")
set(CMAKE_INSTALL_COMPONENT "${COMPONENT}")
else()
set(CMAKE_INSTALL_COMPONENT)
endif()
endif()
# Install shared libraries without execute permission?
if(NOT DEFINED CMAKE_INSTALL_SO_NO_EXE)
set(CMAKE_INSTALL_SO_NO_EXE "1")
endif()
# Is this installation the result of a crosscompile?
if(NOT DEFINED CMAKE_CROSSCOMPILING)
set(CMAKE_CROSSCOMPILING "FALSE")
endif()
# Set path to fallback-tool for dependency-resolution.
if(NOT DEFINED CMAKE_OBJDUMP)
set(CMAKE_OBJDUMP "/usr/bin/objdump")
endif()
if(NOT CMAKE_INSTALL_LOCAL_ONLY)
# Include the install script for the subdirectory.
include("/home/djw/py311_717/ktransformers/third_party/pybind11/cmake_install.cmake")
endif()
if(NOT CMAKE_INSTALL_LOCAL_ONLY)
# Include the install script for the subdirectory.
include("/home/djw/py311_717/ktransformers/third_party/llama.cpp/cmake_install.cmake")
endif()
string(REPLACE ";" "\n" CMAKE_INSTALL_MANIFEST_CONTENT
"${CMAKE_INSTALL_MANIFEST_FILES}")
if(CMAKE_INSTALL_LOCAL_ONLY)
file(WRITE "/home/djw/py311_717/ktransformers/install_local_manifest.txt"
"${CMAKE_INSTALL_MANIFEST_CONTENT}")
endif()
if(CMAKE_INSTALL_COMPONENT)
if(CMAKE_INSTALL_COMPONENT MATCHES "^[a-zA-Z0-9_.+-]+$")
set(CMAKE_INSTALL_MANIFEST "install_manifest_${CMAKE_INSTALL_COMPONENT}.txt")
else()
string(MD5 CMAKE_INST_COMP_HASH "${CMAKE_INSTALL_COMPONENT}")
set(CMAKE_INSTALL_MANIFEST "install_manifest_${CMAKE_INST_COMP_HASH}.txt")
unset(CMAKE_INST_COMP_HASH)
endif()
else()
set(CMAKE_INSTALL_MANIFEST "install_manifest.txt")
endif()
if(NOT CMAKE_INSTALL_LOCAL_ONLY)
file(WRITE "/home/djw/py311_717/ktransformers/${CMAKE_INSTALL_MANIFEST}"
"${CMAKE_INSTALL_MANIFEST_CONTENT}")
endif()

View file

@ -156,6 +156,7 @@ elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LW
else()
message(STATUS "Compiler does NOT support AMX")
endif()
endif()
if (MSVC)
# instruction set detection for MSVC only
if (LLAMA_NATIVE)
@ -347,7 +348,16 @@ endif()
aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR} SOURCE_DIR1)
aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR}/cpu_backend SOURCE_DIR2)
aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR}/operators/llamafile SOURCE_DIR3)
aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR}/../../third_party/llamafile SOURCE_DIR4)
# aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR}/../../third_party/llamafile SOURCE_DIR4)
file(GLOB LLAMAFILE_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/../../third_party/llamafile/*.cpp")
list(REMOVE_ITEM LLAMAFILE_SOURCES
"${CMAKE_CURRENT_SOURCE_DIR}/../../third_party/llamafile/sgemm_arm.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/../../third_party/llamafile/sgemm_x86.cpp"
)
set(SOURCE_DIR4 ${LLAMAFILE_SOURCES})
aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR}/operators/kvcache SOURCE_DIR5)
if (HOST_IS_X86 AND HAS_AVX512 AND __HAS_AMX__)

View file

@ -96,6 +96,8 @@ with torch.inference_mode(mode=True):
output = torch.empty((qlen, hidden_size), dtype=torch.float16).contiguous()
input = input / 100
bsz_tensor = torch.tensor([qlen], dtype=torch.int32, device="cuda").to("cpu").contiguous()
moe = moes[i % layer_num]
CPUInfer.submit(
moe.forward(
@ -104,7 +106,8 @@ with torch.inference_mode(mode=True):
expert_ids.data_ptr(),
weights.data_ptr(),
input.data_ptr(),
output.data_ptr()
output.data_ptr(),
bsz_tensor.data_ptr()
)
)
CPUInfer.sync()

View file

@ -29,7 +29,7 @@ pip install -r requirements-local_chat.txt
pip install -r ktransformers/server/requirements.txt
echo "Installing ktransformers"
KTRANSFORMERS_FORCE_BUILD=TRUE pip install -v . --no-build-isolation
KTRANSFORMERS_FORCE_BUILD=TRUE USE_BALANCE_SERVE=1 pip install -vvv . --no-build-isolation
if [[ "$DEV_BACKEND" == "cuda" ]]; then
echo "Installing custom_flashinfer for CUDA backend"

View file

@ -45,7 +45,7 @@ KTRANSFORMERS_BUILD_XPU = torch.xpu.is_available()
try:
import torch_npu
KTRANSFORMERS_BUILD_NPU = torch_npu.npu.is_available()
except ModuleNotFoundError | ImportError as e:
except:
KTRANSFORMERS_BUILD_NPU = False
# 检测 DEV_BACKEND 环境变量

View file

@ -3,5 +3,5 @@
#include "iqk_mul_mat_arm.inc"
#else
// 使用 ARM 版本
#include "qk_mul_mat_x86.inc"
#include "iqk_mul_mat_x86.inc"
#endif