support Native BF16 format MoE. (#1788)

support Native BF16 format MoE
2026-04-28 03:39:48 +00:00 · 2026-01-12 14:43:28 +08:00 · 2026-01-12 14:43:28 +08:00 · 5edc456749
commit 5edc456749
parent ddb957596f
11 changed files with 2149 additions and 501 deletions
--- a/kt-kernel/python/experts.py
+++ b/kt-kernel/python/experts.py
@ -77,7 +77,7 @@ class KTMoEWrapper:
            chunked_prefill_size: Maximum prefill chunk size
            cpu_save: Whether to save weights to CPU memory
            max_deferred_experts_per_token: Number of experts per token to defer. Defaults to 0.
-            method: Backend method ("AMXINT4", "AMXINT8", "RAWINT4", "FP8", "LLAMAFILE", "MOE_INT4", "MOE_INT8")
+            method: Backend method ("AMXINT4", "AMXINT8", "RAWINT4", "FP8", "BF16", "LLAMAFILE", "MOE_INT4", "MOE_INT8")

        Returns:
            An instance of the appropriate backend implementation (e.g., AMXMoEWrapper)
@ -85,7 +85,7 @@ class KTMoEWrapper:
        # Select backend based on method
        if method in ["AMXINT4", "AMXINT8"]:
            backend_cls = AMXMoEWrapper
-        elif method in ["RAWINT4", "FP8"]:
+        elif method in ["RAWINT4", "FP8", "BF16"]:
            backend_cls = NativeMoEWrapper
        elif method == "LLAMAFILE":
            backend_cls = LlamafileMoEWrapper