Skip to content

vllm.model_executor.layers.quantization.utils.mxfp4_utils

OCP_MX_BLOCK_SIZE module-attribute

OCP_MX_BLOCK_SIZE = 32

dequant_mxfp4 module-attribute

dequant_mxfp4 = dequant_mxfp4

quant_dequant_mxfp4 module-attribute

quant_dequant_mxfp4 = quant_dequant_mxfp4

_dequant_mxfp4

_dequant_mxfp4(
    x: Tensor, scale: Tensor, float_dtype: dtype
) -> Tensor
Source code in vllm/model_executor/layers/quantization/utils/mxfp4_utils.py
def _dequant_mxfp4(x: torch.Tensor, scale: torch.Tensor,
                   float_dtype: torch.dtype) -> torch.Tensor:
    try:
        from quark.torch.kernel import mx
    except ImportError as err:
        raise ImportError("The package `amd-quark` is required to use "
                          "MX-FP4 models. Please install it with `pip install "
                          "amd-quark`.") from err

    return mx.dq_mxfp4(x, scale, float_dtype)

_dequant_mxfp4_fake

_dequant_mxfp4_fake(
    x: Tensor, scale: Tensor, float_dtype: dtype
) -> Tensor
Source code in vllm/model_executor/layers/quantization/utils/mxfp4_utils.py
def _dequant_mxfp4_fake(x: torch.Tensor, scale: torch.Tensor,
                        float_dtype: torch.dtype) -> torch.Tensor:
    return torch.empty((*x.shape[:-1], x.shape[-1] * 2),
                       dtype=float_dtype,
                       device=x.device)

_quant_dequant_mxfp4

_quant_dequant_mxfp4(
    x: Tensor, scale_calculation_mode: str = "even"
) -> Tensor
Source code in vllm/model_executor/layers/quantization/utils/mxfp4_utils.py
def _quant_dequant_mxfp4(x: torch.Tensor,
                         scale_calculation_mode: str = "even") -> torch.Tensor:
    try:
        from quark.torch.kernel import mx
    except ImportError as err:
        raise ImportError("The package `amd-quark` is required to use "
                          "MX-FP4 models. Please install it with `pip install "
                          "amd-quark`.") from err

    return mx.qdq_mxfp4(x, scale_calculation_mode)

_quant_dequant_mxfp4_fake

_quant_dequant_mxfp4_fake(
    x: Tensor, scale_calculation_mode: str = "even"
) -> Tensor
Source code in vllm/model_executor/layers/quantization/utils/mxfp4_utils.py
def _quant_dequant_mxfp4_fake(x: torch.Tensor,
                              scale_calculation_mode: str = "even"
                              ) -> torch.Tensor:
    return torch.empty_like(x)