vllm.utils.flashinfer

Compatibility wrapper for FlashInfer API changes.

Users of vLLM should always import only these wrappers.

all `module-attribute` ¶

__all__ = [
    "has_flashinfer",
    "has_flashinfer_cutlass_fused_moe",
    "flashinfer_cutlass_fused_moe",
    "fp4_quantize",
    "fp4_swizzle_blockscale",
    "autotune",
]

autotune `module-attribute` ¶

autotune = _lazy_import_wrapper(
    "flashinfer.autotuner",
    "autotune",
    fallback_fn=lambda *args, **kwargs: nullcontext(),
)

flashinfer_cutlass_fused_moe `module-attribute` ¶

flashinfer_cutlass_fused_moe = _lazy_import_wrapper(
    "flashinfer.fused_moe", "cutlass_fused_moe"
)

fp4_quantize `module-attribute` ¶

fp4_quantize = _lazy_import_wrapper(
    "flashinfer", "fp4_quantize"
)

fp4_swizzle_blockscale `module-attribute` ¶

fp4_swizzle_blockscale = _lazy_import_wrapper(
    "flashinfer", "fp4_swizzle_blockscale"
)

logger `module-attribute` ¶

logger = init_logger(__name__)

_get_submodule ¶

_get_submodule(module_name: str) -> Any | None

Safely import a submodule and return it, or None if not available.

Source code in vllm/utils/flashinfer.py

def _get_submodule(module_name: str) -> Any | None:
    """Safely import a submodule and return it, or None if not available."""
    try:
        return importlib.import_module(module_name)
    except (ImportError, ModuleNotFoundError):
        return None

_lazy_import_wrapper ¶

_lazy_import_wrapper(
    module_name: str,
    attr_name: str,
    fallback_fn: Callable[..., Any] = _missing,
)

Create a lazy import wrapper for a specific function.

Source code in vllm/utils/flashinfer.py

def _lazy_import_wrapper(module_name: str,
                         attr_name: str,
                         fallback_fn: Callable[..., Any] = _missing):
    """Create a lazy import wrapper for a specific function."""

    @functools.cache
    def _get_impl():
        if not has_flashinfer():
            return None
        mod = _get_submodule(module_name)
        return getattr(mod, attr_name, None) if mod else None

    def wrapper(*args, **kwargs):
        impl = _get_impl()
        if impl is None:
            return fallback_fn(*args, **kwargs)
        return impl(*args, **kwargs)

    return wrapper

_missing ¶

_missing(*_: Any, **__: Any) -> NoReturn

Placeholder for unavailable FlashInfer backend.

Source code in vllm/utils/flashinfer.py

def _missing(*_: Any, **__: Any) -> NoReturn:
    """Placeholder for unavailable FlashInfer backend."""
    raise RuntimeError(
        "FlashInfer backend is not available. Please install the package "
        "to enable FlashInfer kernels: "
        "https://github.com/flashinfer-ai/flashinfer")

has_flashinfer `cached` ¶

has_flashinfer() -> bool

Return True if FlashInfer is available.

Source code in vllm/utils/flashinfer.py

@functools.cache
def has_flashinfer() -> bool:
    """Return ``True`` if FlashInfer is available."""
    # Use find_spec to check if the module exists without importing it
    # This avoids potential CUDA initialization side effects
    return importlib.util.find_spec("flashinfer") is not None

has_flashinfer_cutlass_fused_moe `cached` ¶

has_flashinfer_cutlass_fused_moe() -> bool

Return True if FlashInfer CUTLASS fused MoE is available.

Source code in vllm/utils/flashinfer.py

@functools.cache
def has_flashinfer_cutlass_fused_moe() -> bool:
    """Return ``True`` if FlashInfer CUTLASS fused MoE is available."""
    if not has_flashinfer():
        return False

    # Check if all required functions are available
    required_functions = [
        ("flashinfer.fused_moe", "cutlass_fused_moe"),
        ("flashinfer", "fp4_quantize"),
        ("flashinfer", "fp4_swizzle_blockscale"),
    ]

    for module_name, attr_name in required_functions:
        mod = _get_submodule(module_name)
        if not mod or not hasattr(mod, attr_name):
            return False
    return True

vllm.utils.flashinfer

__all__ module-attribute ¶

autotune module-attribute ¶

flashinfer_cutlass_fused_moe module-attribute ¶

fp4_quantize module-attribute ¶

fp4_swizzle_blockscale module-attribute ¶

logger module-attribute ¶

_get_submodule ¶

_lazy_import_wrapper ¶

_missing ¶

has_flashinfer cached ¶

has_flashinfer_cutlass_fused_moe cached ¶

all `module-attribute` ¶

autotune `module-attribute` ¶

flashinfer_cutlass_fused_moe `module-attribute` ¶

fp4_quantize `module-attribute` ¶

fp4_swizzle_blockscale `module-attribute` ¶

logger `module-attribute` ¶

has_flashinfer `cached` ¶

has_flashinfer_cutlass_fused_moe `cached` ¶