Skip to content

vllm.utils.flashinfer

Compatibility wrapper for FlashInfer API changes.

Users of vLLM should always import only these wrappers.

__all__ module-attribute

__all__ = [
    "has_flashinfer",
    "has_flashinfer_cutlass_fused_moe",
    "flashinfer_cutlass_fused_moe",
    "fp4_quantize",
    "fp4_swizzle_blockscale",
    "autotune",
]

autotune module-attribute

autotune = _lazy_import_wrapper(
    "flashinfer.autotuner",
    "autotune",
    fallback_fn=lambda *args, **kwargs: nullcontext(),
)

flashinfer_cutlass_fused_moe module-attribute

flashinfer_cutlass_fused_moe = _lazy_import_wrapper(
    "flashinfer.fused_moe", "cutlass_fused_moe"
)

fp4_quantize module-attribute

fp4_quantize = _lazy_import_wrapper(
    "flashinfer", "fp4_quantize"
)

fp4_swizzle_blockscale module-attribute

fp4_swizzle_blockscale = _lazy_import_wrapper(
    "flashinfer", "fp4_swizzle_blockscale"
)

logger module-attribute

logger = init_logger(__name__)

_get_submodule

_get_submodule(module_name: str) -> Any | None

Safely import a submodule and return it, or None if not available.

Source code in vllm/utils/flashinfer.py
def _get_submodule(module_name: str) -> Any | None:
    """Safely import a submodule and return it, or None if not available."""
    try:
        return importlib.import_module(module_name)
    except (ImportError, ModuleNotFoundError):
        return None

_lazy_import_wrapper

_lazy_import_wrapper(
    module_name: str,
    attr_name: str,
    fallback_fn: Callable[..., Any] = _missing,
)

Create a lazy import wrapper for a specific function.

Source code in vllm/utils/flashinfer.py
def _lazy_import_wrapper(module_name: str,
                         attr_name: str,
                         fallback_fn: Callable[..., Any] = _missing):
    """Create a lazy import wrapper for a specific function."""

    @functools.cache
    def _get_impl():
        if not has_flashinfer():
            return None
        mod = _get_submodule(module_name)
        return getattr(mod, attr_name, None) if mod else None

    def wrapper(*args, **kwargs):
        impl = _get_impl()
        if impl is None:
            return fallback_fn(*args, **kwargs)
        return impl(*args, **kwargs)

    return wrapper

_missing

_missing(*_: Any, **__: Any) -> NoReturn

Placeholder for unavailable FlashInfer backend.

Source code in vllm/utils/flashinfer.py
def _missing(*_: Any, **__: Any) -> NoReturn:
    """Placeholder for unavailable FlashInfer backend."""
    raise RuntimeError(
        "FlashInfer backend is not available. Please install the package "
        "to enable FlashInfer kernels: "
        "https://github.com/flashinfer-ai/flashinfer")

has_flashinfer cached

has_flashinfer() -> bool

Return True if FlashInfer is available.

Source code in vllm/utils/flashinfer.py
@functools.cache
def has_flashinfer() -> bool:
    """Return ``True`` if FlashInfer is available."""
    # Use find_spec to check if the module exists without importing it
    # This avoids potential CUDA initialization side effects
    return importlib.util.find_spec("flashinfer") is not None

has_flashinfer_cutlass_fused_moe cached

has_flashinfer_cutlass_fused_moe() -> bool

Return True if FlashInfer CUTLASS fused MoE is available.

Source code in vllm/utils/flashinfer.py
@functools.cache
def has_flashinfer_cutlass_fused_moe() -> bool:
    """Return ``True`` if FlashInfer CUTLASS fused MoE is available."""
    if not has_flashinfer():
        return False

    # Check if all required functions are available
    required_functions = [
        ("flashinfer.fused_moe", "cutlass_fused_moe"),
        ("flashinfer", "fp4_quantize"),
        ("flashinfer", "fp4_swizzle_blockscale"),
    ]

    for module_name, attr_name in required_functions:
        mod = _get_submodule(module_name)
        if not mod or not hasattr(mod, attr_name):
            return False
    return True