convert : avoid dequantizing mxfp4 for GPT-OSS (#16756)
Some checks failed
Python Type-Check / pyright type-check (push) Has been cancelled
Check Pre-Tokenizer Hashes / pre-tokenizer-hashes (push) Has been cancelled
Python check requirements.txt / check-requirements (push) Has been cancelled

This commit is contained in:
compilade 2025-10-24 20:52:00 -04:00 committed by GitHub
parent 55945d2ef5
commit 5cca2542ac
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -8943,6 +8943,13 @@ class SmolLM3Model(LlamaModel):
class GptOssModel(TextModel):
model_arch = gguf.MODEL_ARCH.GPT_OSS
# TODO: remove once MXFP4 is supported more generally
def dequant_model(self):
quant_config = self.hparams.get("quantization_config")
if quant_config is not None and quant_config.get("quant_method") == "mxfp4":
return
return super().dequant_model()
def transform_nibble_layout(self, tensor):
assert tensor.dtype == torch.uint8
assert tensor.shape[-1] == 16