mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2026-04-28 03:39:48 +00:00
[fix](kt-kernel): gate RAWINT4 behind AVX512 and avoid AVX2 build break (#1660)
Some checks failed
Book-CI / test (push) Has been cancelled
Book-CI / test-1 (push) Has been cancelled
Book-CI / test-2 (push) Has been cancelled
Deploy / deploy (macos-latest) (push) Has been cancelled
Deploy / deploy (ubuntu-latest) (push) Has been cancelled
Deploy / deploy (windows-latest) (push) Has been cancelled
Some checks failed
Book-CI / test (push) Has been cancelled
Book-CI / test-1 (push) Has been cancelled
Book-CI / test-2 (push) Has been cancelled
Deploy / deploy (macos-latest) (push) Has been cancelled
Deploy / deploy (ubuntu-latest) (push) Has been cancelled
Deploy / deploy (windows-latest) (push) Has been cancelled
This commit is contained in:
parent
670c488155
commit
0698252484
2 changed files with 11 additions and 4 deletions
|
|
@ -268,7 +268,7 @@ elseif(CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LWR
|
|||
list(APPEND ARCH_FLAGS -mavx2 -mfma -msse3 -mf16c)
|
||||
endif()
|
||||
if(LLAMA_AVX512)
|
||||
list(APPEND ARCH_FLAGS -mavx512f -mavx512bw -mfma -mf16c -msse3)
|
||||
list(APPEND ARCH_FLAGS -mavx512f -mavx512bw -mavx512dq -mfma -mf16c -msse3)
|
||||
endif()
|
||||
if(LLAMA_AVX512_VBMI)
|
||||
list(APPEND ARCH_FLAGS -mavx512vbmi)
|
||||
|
|
@ -639,4 +639,3 @@ else()
|
|||
message(FATAL_ERROR "NUMA library not found, please install NUMA, sudo apt install libnuma-dev")
|
||||
endif()
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -292,6 +292,7 @@ class CMakeBuild(build_ext):
|
|||
cmake_args += cpu_feature_flags()
|
||||
d = self.detect_cpu_info()
|
||||
print(f"Detected CPU info: {d}")
|
||||
cpu_mode = os.environ.get("CPUINFER_CPU_INSTRUCT", "NATIVE").upper()
|
||||
|
||||
# Vendor / feature specific toggles
|
||||
# AMD MoE: explicit env overrides; otherwise default ON on AMD CPU
|
||||
|
|
@ -314,11 +315,18 @@ class CMakeBuild(build_ext):
|
|||
if "AMX" in d["features"]:
|
||||
cmake_args.append("-DKTRANSFORMERS_CPU_USE_AMX=ON")
|
||||
print("-- AMX support detected; enabling (-DKTRANSFORMERS_CPU_USE_AMX=ON)")
|
||||
# AVX512 umbrella: explicit env overrides; else enable if AMX or AVX512 detected
|
||||
|
||||
# AVX512 umbrella (AMX/AVX512 kernels):
|
||||
# - If user explicitly sets CPUINFER_ENABLE_AVX512 -> honor it
|
||||
# - Otherwise, only auto-enable when CPU mode actually wants AVX512
|
||||
# (NATIVE/FANCY/AVX512). In AVX2 mode we do NOT enable this, so
|
||||
# RAWINT4 / K2 kernels are not compiled.
|
||||
if not _forward_bool_env(cmake_args, "CPUINFER_ENABLE_AVX512", "KTRANSFORMERS_CPU_USE_AMX_AVX512"):
|
||||
if "AMX" in d["features"] or "AVX512" in d["features"]:
|
||||
if cpu_mode in ("NATIVE", "FANCY", "AVX512") and ("AMX" in d["features"] or "AVX512" in d["features"]):
|
||||
cmake_args.append("-DKTRANSFORMERS_CPU_USE_AMX_AVX512=ON")
|
||||
print("-- Enabling AMX/AVX512 umbrella (-DKTRANSFORMERS_CPU_USE_AMX_AVX512=ON)")
|
||||
else:
|
||||
print(f"-- CPUINFER_CPU_INSTRUCT={cpu_mode}; not auto-enabling AMX/AVX512 umbrella")
|
||||
|
||||
# Auto-enable MOE kernel only when env explicitly turns on AMD or KML backend
|
||||
# (Do not enable purely on vendor auto-detection to avoid surprise behavior.)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue