merge: integrate origin/main into sft branch

Resolved 6 conflicts:
- CMakeLists.txt: keep cpptrace + debug flag, accept flexible build type
- worker_pool.cpp: keep SFT profiling + main's block=1 spin fix
- ext_bindings.cpp: keep both SFT MOE bindings and AVX2/BF16/FP8 bindings
- common.hpp: keep gpu_experts_mask + SFT backward weight fields
- __init__.py: export both generate_gpu_experts_masks and AMXSFTMoEWrapper
- experts.py: gpu_experts_mask for inference, num_gpu_experts for SFT, new methods
This commit is contained in:
mrhaoxx 2026-04-08 23:19:28 +08:00
commit a98d544833
112 changed files with 21976 additions and 1917 deletions

View file

@ -50,6 +50,7 @@ kt_kernel_ext = _kt_kernel_ext
# Import main API
from .experts import KTMoEWrapper
from .experts_base import generate_gpu_experts_masks
def __getattr__(name):
if name == "AMXSFTMoEWrapper":
@ -91,4 +92,4 @@ except ImportError:
except ImportError:
__version__ = "0.4.3"
__all__ = ["KTMoEWrapper", "AMXSFTMoEWrapper", "kt_kernel_ext", "__cpu_variant__", "__version__"]
__all__ = ["KTMoEWrapper", "AMXSFTMoEWrapper", "generate_gpu_experts_masks", "kt_kernel_ext", "__cpu_variant__", "__version__"]