[fix]: fix --numa-nodes handling (#1904)
Some checks are pending
Book-CI / test (push) Waiting to run
Book-CI / test-1 (push) Waiting to run
Book-CI / test-2 (push) Waiting to run
Deploy / deploy (macos-latest) (push) Waiting to run
Deploy / deploy (ubuntu-latest) (push) Waiting to run
Deploy / deploy (windows-latest) (push) Waiting to run
Release sglang-kt to PyPI / Build sglang-kt wheel (push) Waiting to run
Release sglang-kt to PyPI / Publish sglang-kt to PyPI (push) Blocked by required conditions

* [fix]: fix --numa-nodes handling
This commit is contained in:
Oql 2026-03-31 17:50:22 +08:00 committed by GitHub
parent cdc867c864
commit 9e6484a538
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 43 additions and 13 deletions

View file

@ -5,7 +5,13 @@ from typing import List, Optional
# Use relative imports for package structure
from ..experts_base import BaseMoEWrapper
from .loader import SafeTensorLoader, CompressedSafeTensorLoader, FP8SafeTensorLoader, BF16SafeTensorLoader, GPTQSafeTensorLoader
from .loader import (
SafeTensorLoader,
CompressedSafeTensorLoader,
FP8SafeTensorLoader,
BF16SafeTensorLoader,
GPTQSafeTensorLoader,
)
from kt_kernel_ext.moe import MOEConfig
import kt_kernel_ext.moe as _moe_mod
@ -351,6 +357,7 @@ class NativeMoEWrapper(BaseMoEWrapper):
cpu_save: bool = False,
max_deferred_experts_per_token: Optional[int] = None,
method: str = "RAWINT4",
numa_nodes: Optional[List[int]] = None,
):
if method == "RAWINT4" and not _HAS_RAWINT4_SUPPORT:
raise RuntimeError(
@ -379,10 +386,7 @@ class NativeMoEWrapper(BaseMoEWrapper):
"Please recompile kt_kernel_ext with AVX512+BF16 or AVX2 enabled."
)
if method == "GPTQ_INT4" and not _HAS_AVX2_GPTQ_INT4_SUPPORT:
raise RuntimeError(
"GPTQ_INT4 backend not available.\n"
"Please recompile kt_kernel_ext with AVX2 enabled."
)
raise RuntimeError("GPTQ_INT4 backend not available.\n" "Please recompile kt_kernel_ext with AVX2 enabled.")
super().__init__(
layer_idx=layer_idx,

View file

@ -41,6 +41,7 @@ class LlamafileMoEWrapper(BaseMoEWrapper):
cpu_save: bool = False,
max_deferred_experts_per_token: Optional[int] = None,
method: str = "LLAMAFILE",
numa_nodes: Optional[List[int]] = None,
):
"""
Initialize Llamafile MoE Wrapper.

View file

@ -49,6 +49,7 @@ class GeneralMoEWrapper(BaseMoEWrapper):
cpu_save: bool = False,
max_deferred_experts_per_token: Optional[int] = None,
method: str = "MOE_INT8",
numa_nodes: Optional[List[int]] = None,
):
"""
Initialize general MoE Wrapper.