diff --git a/scripts/rocm/rocm_vars.py b/scripts/rocm/rocm_vars.py index 1cf30231f..0ecdf8298 100644 --- a/scripts/rocm/rocm_vars.py +++ b/scripts/rocm/rocm_vars.py @@ -1,6 +1,5 @@ -from typing import Dict, Any, List, Tuple +from typing import Dict, Any, List, Tuple -# --- General MIOpen/rocBLAS variables (dropdown/textbox/checkbox) --- GENERAL_VARS: Dict[str, Dict[str, Any]] = { "MIOPEN_SYSTEM_DB_PATH": { "default": "{VIRTUAL_ENV}\\Lib\\site-packages\\_rocm_sdk_devel\\bin\\", @@ -16,7 +15,6 @@ GENERAL_VARS: Dict[str, Dict[str, Any]] = { "options": None, "restart_required": True, }, - # -- GEMM backend selector + companion toggles -------------------------- "MIOPEN_GEMM_ENFORCE_BACKEND": { "default": "1", "desc": "GEMM backend", @@ -26,26 +24,25 @@ GENERAL_VARS: Dict[str, Dict[str, Any]] = { }, "PYTORCH_ROCM_USE_ROCBLAS": { "default": "0", - "desc": "PyTorch: Use rocBLAS.", + "desc": "PyTorch: Use rocBLAS", "widget": "dropdown", "options": [("0 - Off", "0"), ("1 - On", "1")], "restart_required": True, }, "PYTORCH_HIPBLASLT_DISABLE": { "default": "1", - "desc": "PyTorch: Use hipBLASLt.", + "desc": "PyTorch: Use hipBLASLt", "widget": "dropdown", "options": [("0 - Allow hipBLASLt", "0"), ("1 - Disable hipBLASLt", "1")], "restart_required": True, }, "ROCBLAS_USE_HIPBLASLT": { "default": "0", - "desc": "rocBLAS: use hipBLASLt backend.", + "desc": "rocBLAS: use hipBLASLt backend", "widget": "dropdown", "options": [("0 - Tensile (rocBLAS)", "0"), ("1 - hipBLASLt", "1")], "restart_required": True, }, - # -- MIOpen behavioural settings ---------------------------------------- "MIOPEN_FIND_MODE": { "default": "2", "desc": "MIOpen Find Mode", @@ -74,9 +71,6 @@ GENERAL_VARS: Dict[str, Dict[str, Any]] = { "options": [("0 - Off", "0"), ("1 - On", "1")], "restart_required": False, }, - - # -- Paths / sizes ------------------------------------------------------ - "MIOPEN_CONVOLUTION_MAX_WORKSPACE": { "default": "1073741824", "desc": "MIOpen convolutions: max workspace (bytes; 1 GB)", @@ -99,7 +93,6 @@ GENERAL_VARS: Dict[str, Dict[str, Any]] = { "restart_required": False, }, - # -- rocBLAS settings --------------------------------------------------- "ROCBLAS_STREAM_ORDER_ALLOC": { "default": "1", "desc": "rocBLAS stream-ordered memory allocation", @@ -128,8 +121,6 @@ GENERAL_VARS: Dict[str, Dict[str, Any]] = { "options": [("0 - Use Cache", "0"), ("1 - Benchmark new shapes", "1")], "restart_required": False, }, - - # -- hipBLASLt settings ------------------------------------------------- "PYTORCH_TUNABLEOP_HIPBLASLT_ENABLED": { "default": "0", "desc": "TunableOp: benchmark hipBLASLt kernels", @@ -137,8 +128,6 @@ GENERAL_VARS: Dict[str, Dict[str, Any]] = { "options": [("0 - Off", "0"), ("1 - On", "1")], "restart_required": False, }, - - # -- Logging: MIOpen -> rocBLAS -> hipBLASLt ----------------------------- "MIOPEN_LOG_LEVEL": { "default": "0", "desc": "MIOpen log verbosity level", @@ -169,14 +158,6 @@ GENERAL_VARS: Dict[str, Dict[str, Any]] = { }, } -# --- Solver toggles (inference/FWD only, RDNA2/3/4 compatible) --- -# Removed entirely - not representable in the UI, cannot be set by users: -# WRW (weight-gradient) and BWD (data-gradient) - training passes only, never run during inference -# XDLOPS/CK CDNA-exclusive (MI100/MI200/MI300 matrix engine variants) - not on any RDNA -# Fixed-geometry (5x10, 7x7-ImageNet, 11x11) - shapes never appear in SD/video inference -# FP32-reference (NAIVE_CONV_FWD, FWDGEN) - IsApplicable() unreliable for FP16/BF16 -# Wide MPASS (F3x4..F7x3) - kernel sizes that cannot match any SD convolution shape -# Disabled by default (added but off): RDNA3/4-only - Group Conv XDLOPS, CK default kernels _SOLVER_DESCS: Dict[str, str] = {} _SOLVER_DESCS.update({ @@ -199,7 +180,7 @@ _SOLVER_DESCS.update({ "MIOPEN_DEBUG_ATTN_SOFTMAX": "Enable Attention Softmax", }) _SOLVER_DESCS.update({ - # Direct ASM - FWD inference only (WRW, fixed-geometry, FP32-reference removed) + # Direct ASM — FWD inference only (WRW, fixed-geometry, FP32-reference removed) "MIOPEN_DEBUG_CONV_DIRECT_ASM_3X3U": "Enable Direct ASM 3x3U", "MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U": "Enable Direct ASM 1x1U", "MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1UV2": "Enable Direct ASM 1x1UV2", @@ -207,12 +188,12 @@ _SOLVER_DESCS.update({ "MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U_AI_HEUR": "Enable Direct ASM 1x1U AI Heuristic", }) _SOLVER_DESCS.update({ - # Direct OCL - FWD inference only (WRW, FWD11X11 fixed-geom, FWDGEN FP32-ref removed) + # Direct OCL — FWD inference only (WRW, FWD11X11 fixed-geom, FWDGEN FP32-ref removed) "MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD": "Enable Direct OCL FWD", "MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD1X1": "Enable Direct OCL FWD1X1", }) _SOLVER_DESCS.update({ - # Winograd FWD - WRW removed; Fury/Rage kept as RDNA3/4 inference (off by default) + # Winograd FWD — WRW removed; Fury/Rage kept as RDNA3/4 inference (off by default) "MIOPEN_DEBUG_AMD_WINOGRAD_3X3": "Enable AMD Winograd 3x3", "MIOPEN_DEBUG_AMD_WINOGRAD_RXS": "Enable AMD Winograd RxS", "MIOPEN_DEBUG_AMD_WINOGRAD_RXS_FWD_BWD": "Enable AMD Winograd RxS FWD", @@ -225,32 +206,32 @@ _SOLVER_DESCS.update({ "MIOPEN_DEBUG_AMD_WINOGRAD_RAGE_RXS_F2X3": "Enable AMD Winograd Rage RxS F2x3", }) _SOLVER_DESCS.update({ - # Multi-pass Winograd - only F3x2/F3x3 match typical 3x3 SD shapes; wider kernels removed + # Multi-pass Winograd — only F3x2/F3x3 match typical 3x3 SD shapes; wider kernels removed "MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X2": "Enable AMD Winograd MPASS F3x2", "MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X3": "Enable AMD Winograd MPASS F3x3", }) _SOLVER_DESCS.update({ - # Implicit GEMM FWD - BWD/WRW (training), CDNA-exclusive XDLOPS variants removed + # Implicit GEMM FWD — BWD/WRW (training), CDNA-exclusive XDLOPS variants removed "MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_FWD_V4R1": "Enable ASM Implicit GEMM FWD V4R1", "MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_FWD_V4R1_1X1": "Enable ASM Implicit GEMM FWD V4R1 1x1", "MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R1": "Enable HIP Implicit GEMM FWD V4R1", "MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R4": "Enable HIP Implicit GEMM FWD V4R4", }) _SOLVER_DESCS.update({ - # Group Conv XDLOPS FWD - RDNA3/4 (gfx1100+) only; disabled by default + # Group Conv XDLOPS FWD — RDNA3/4 (gfx1100+) only; disabled by default "MIOPEN_DEBUG_GROUP_CONV_IMPLICIT_GEMM_HIP_FWD_XDLOPS": "Enable Group Conv Implicit GEMM XDLOPS FWD", "MIOPEN_DEBUG_GROUP_CONV_IMPLICIT_GEMM_HIP_FWD_XDLOPS_AI_HEUR": "Enable Group Conv Implicit GEMM XDLOPS FWD AI Heuristic", - # CK (Composable Kernel) default kernels - RDNA3/4 (gfx1100+); disabled by default + # CK (Composable Kernel) default kernels — RDNA3/4 (gfx1100+); disabled by default "MIOPEN_DEBUG_CK_DEFAULT_KERNELS": "Enable CK (Composable Kernel) default kernels", }) # Solvers still in the registry but disabled by default. -# FORCE_IMMED_MODE_FALLBACK - overrides FIND_MODE entirely, defeats tuning DB -# Fury RxS F2x3/F3x2 - RDNA3/4-only; harmless on RDNA2 but won't select -# Rage RxS F2x3 - RDNA4-only -# Group Conv XDLOPS - RDNA3/4-only (gfx1100+) -# CK_DEFAULT_KERNELS - RDNA3/4-only (gfx1100+) +# FORCE_IMMED_MODE_FALLBACK — overrides FIND_MODE entirely, defeats tuning DB +# Fury RxS F2x3/F3x2 — RDNA3/4-only; harmless on RDNA2 but won't select +# Rage RxS F2x3 — RDNA4-only +# Group Conv XDLOPS — RDNA3/4-only (gfx1100+) +# CK_DEFAULT_KERNELS — RDNA3/4-only (gfx1100+) SOLVER_DISABLED_BY_DEFAULT = { "MIOPEN_DEBUG_FORCE_IMMED_MODE_FALLBACK", "MIOPEN_DEBUG_AMD_WINOGRAD_FURY_RXS_F2X3",