mirror of https://github.com/vladmandic/automatic
254 lines
13 KiB
Python
254 lines
13 KiB
Python
from typing import Dict, Any, List, Tuple
|
|
|
|
# --- General MIOpen/rocBLAS variables (dropdown/textbox/checkbox) ---
|
|
GENERAL_VARS: Dict[str, Dict[str, Any]] = {
|
|
|
|
"MIOPEN_GEMM_ENFORCE_BACKEND": {
|
|
"default": "1",
|
|
"desc": "Enforce GEMM backend",
|
|
"widget": "dropdown",
|
|
"options": [("1 - rocBLAS", "1"), ("5 - hipBLASLt", "5")],
|
|
"restart_required": False,
|
|
},
|
|
"MIOPEN_FIND_MODE": {
|
|
"default": "2",
|
|
"desc": "MIOpen Find Mode",
|
|
"widget": "dropdown",
|
|
"options": [("1 - NORMAL", "1"), ("2 - FAST", "2"), ("3 - HYBRID", "3"), ("5 - DYNAMIC_HYBRID", "5"), ("6 - TRUST_VERIFY", "6"), ("7 - TRUST_VERIFY_FULL", "7")],
|
|
"restart_required": True,
|
|
},
|
|
"MIOPEN_FIND_ENFORCE": {
|
|
"default": "1",
|
|
"desc": "MIOpen Find Enforce",
|
|
"widget": "dropdown",
|
|
"options": [("1 - NONE", "1"), ("2 - DB_UPDATE", "2"), ("3 - SEARCH", "3"), ("4 - SEARCH_DB_UPDATE", "4"), ("5 - DB_CLEAN", "5")],
|
|
"restart_required": True,
|
|
},
|
|
"MIOPEN_SEARCH_CUTOFF": {
|
|
"default": "0",
|
|
"desc": "Enable early termination of suboptimal searches",
|
|
"widget": "dropdown",
|
|
"options": [("0 - Off", "0"), ("1 - On", "1")],
|
|
"restart_required": True,
|
|
},
|
|
"MIOPEN_SYSTEM_DB_PATH": {
|
|
"default": "{VIRTUAL_ENV}\\Lib\\site-packages\\_rocm_sdk_devel\\bin\\",
|
|
"desc": "MIOpen system DB path",
|
|
"widget": "textbox",
|
|
"options": None,
|
|
"restart_required": True,
|
|
},
|
|
"MIOPEN_LOG_LEVEL": {
|
|
"default": "0",
|
|
"desc": "MIOpen log verbosity level",
|
|
"widget": "dropdown",
|
|
"options": [("0 - Default", "0"), ("1 - Quiet", "1"), ("3 - Error", "3"), ("4 - Warning", "4"), ("5 - Info", "5"), ("6 - Detail", "6"), ("7 - Trace", "7")],
|
|
"restart_required": False,
|
|
},
|
|
"MIOPEN_DEBUG_ENABLE": {
|
|
"default": "0",
|
|
"desc": "Enable MIOpen logging",
|
|
"widget": "dropdown",
|
|
"options": [("0 - Off", "0"), ("1 - On", "1")],
|
|
"restart_required": False,
|
|
},
|
|
"ROCBLAS_LAYER": {
|
|
"default": "0",
|
|
"desc": "rocBLAS logging",
|
|
"widget": "dropdown",
|
|
"options": [("0 - Off", "0"), ("1 - Trace", "1"), ("2 - Bench", "2"), ("3 - Trace+Bench", "3"), ("4 - Profile", "4"), ("5 - Trace+Profile", "5"), ("6 - Bench+Profile", "6"), ("7 - All", "7")],
|
|
"restart_required": False,
|
|
},
|
|
"HIPBLASLT_LOG_LEVEL": {
|
|
"default": "0",
|
|
"desc": "hipBLASLt logging",
|
|
"widget": "dropdown",
|
|
"options": [("0 - Off", "0"), ("1 - Error", "1"), ("2 - Trace", "2"), ("3 - Hints", "3"), ("4 - Info", "4"), ("5 - API Trace", "5")],
|
|
"restart_required": False,
|
|
},
|
|
"MIOPEN_DEBUG_CONVOLUTION_DETERMINISTIC": {
|
|
"default": "0",
|
|
"desc": "Deterministic convolution (reproducible results, may be slower)",
|
|
"widget": "dropdown",
|
|
"options": [("0 - Off", "0"), ("1 - On", "1")],
|
|
"restart_required": False,
|
|
},
|
|
}
|
|
|
|
# --- Solver toggles (inference/FWD only, RDNA2/3/4 compatible) ---
|
|
# Removed entirely — not representable in the UI, cannot be set by users:
|
|
# WRW (weight-gradient) and BWD (data-gradient) — training passes only, never run during inference
|
|
# XDLOPS/CK CDNA-exclusive (MI100/MI200/MI300 matrix engine variants) — not on any RDNA
|
|
# Fixed-geometry (5x10, 7x7-ImageNet, 11x11) — shapes never appear in SD/video inference
|
|
# FP32-reference (NAIVE_CONV_FWD, FWDGEN) — IsApplicable() unreliable for FP16/BF16
|
|
# Wide MPASS (F3x4..F7x3) — kernel sizes that cannot match any SD convolution shape
|
|
# Disabled by default (added but off): RDNA3/4-only — Group Conv XDLOPS, CK default kernels
|
|
_SOLVER_DESCS: Dict[str, str] = {}
|
|
|
|
_SOLVER_DESCS.update({
|
|
"MIOPEN_DEBUG_CONV_FFT": "Enable FFT solver",
|
|
"MIOPEN_DEBUG_CONV_DIRECT": "Enable Direct solver",
|
|
"MIOPEN_DEBUG_CONV_GEMM": "Enable GEMM solver",
|
|
"MIOPEN_DEBUG_CONV_WINOGRAD": "Enable Winograd solver",
|
|
"MIOPEN_DEBUG_CONV_IMPLICIT_GEMM": "Enable Implicit GEMM solver",
|
|
})
|
|
_SOLVER_DESCS.update({
|
|
"MIOPEN_DEBUG_CONV_IMMED_FALLBACK": "Enable Immediate Fallback",
|
|
"MIOPEN_DEBUG_ENABLE_AI_IMMED_MODE_FALLBACK": "Enable AI Immediate Mode Fallback",
|
|
"MIOPEN_DEBUG_FORCE_IMMED_MODE_FALLBACK": "Force Immediate Mode Fallback",
|
|
})
|
|
_SOLVER_DESCS.update({
|
|
"MIOPEN_DEBUG_GCN_ASM_KERNELS": "Enable GCN ASM kernels",
|
|
"MIOPEN_DEBUG_HIP_KERNELS": "Enable HIP kernels",
|
|
"MIOPEN_DEBUG_OPENCL_CONVOLUTIONS": "Enable OpenCL convolutions",
|
|
"MIOPEN_DEBUG_OPENCL_WAVE64_NOWGP": "Enable OpenCL Wave64 NOWGP",
|
|
"MIOPEN_DEBUG_ATTN_SOFTMAX": "Enable Attention Softmax",
|
|
})
|
|
_SOLVER_DESCS.update({
|
|
# Direct ASM — FWD inference only (WRW, fixed-geometry, FP32-reference removed)
|
|
"MIOPEN_DEBUG_CONV_DIRECT_ASM_3X3U": "Enable Direct ASM 3x3U",
|
|
"MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U": "Enable Direct ASM 1x1U",
|
|
"MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1UV2": "Enable Direct ASM 1x1UV2",
|
|
"MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U_SEARCH_OPTIMIZED": "Enable Direct ASM 1x1U Search Optimized",
|
|
"MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U_AI_HEUR": "Enable Direct ASM 1x1U AI Heuristic",
|
|
})
|
|
_SOLVER_DESCS.update({
|
|
# Direct OCL — FWD inference only (WRW, FWD11X11 fixed-geom, FWDGEN FP32-ref removed)
|
|
"MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD": "Enable Direct OCL FWD",
|
|
"MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD1X1": "Enable Direct OCL FWD1X1",
|
|
})
|
|
_SOLVER_DESCS.update({
|
|
# Winograd FWD — WRW removed; Fury/Rage kept as RDNA3/4 inference (off by default)
|
|
"MIOPEN_DEBUG_AMD_WINOGRAD_3X3": "Enable AMD Winograd 3x3",
|
|
"MIOPEN_DEBUG_AMD_WINOGRAD_RXS": "Enable AMD Winograd RxS",
|
|
"MIOPEN_DEBUG_AMD_WINOGRAD_RXS_FWD_BWD": "Enable AMD Winograd RxS FWD",
|
|
"MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F3X2": "Enable AMD Winograd RxS F3x2",
|
|
"MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F2X3": "Enable AMD Winograd RxS F2x3",
|
|
"MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F2X3_G1": "Enable AMD Winograd RxS F2x3 G1",
|
|
"MIOPEN_DEBUG_AMD_FUSED_WINOGRAD": "Enable AMD Fused Winograd",
|
|
"MIOPEN_DEBUG_AMD_WINOGRAD_FURY_RXS_F2X3": "Enable AMD Winograd Fury RxS F2x3",
|
|
"MIOPEN_DEBUG_AMD_WINOGRAD_FURY_RXS_F3X2": "Enable AMD Winograd Fury RxS F3x2",
|
|
"MIOPEN_DEBUG_AMD_WINOGRAD_RAGE_RXS_F2X3": "Enable AMD Winograd Rage RxS F2x3",
|
|
})
|
|
_SOLVER_DESCS.update({
|
|
# Multi-pass Winograd — only F3x2/F3x3 match typical 3x3 SD shapes; wider kernels removed
|
|
"MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X2": "Enable AMD Winograd MPASS F3x2",
|
|
"MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X3": "Enable AMD Winograd MPASS F3x3",
|
|
})
|
|
_SOLVER_DESCS.update({
|
|
# Implicit GEMM FWD — BWD/WRW (training), CDNA-exclusive XDLOPS variants removed
|
|
"MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_FWD_V4R1": "Enable ASM Implicit GEMM FWD V4R1",
|
|
"MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_FWD_V4R1_1X1": "Enable ASM Implicit GEMM FWD V4R1 1x1",
|
|
"MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R1": "Enable HIP Implicit GEMM FWD V4R1",
|
|
"MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R4": "Enable HIP Implicit GEMM FWD V4R4",
|
|
})
|
|
_SOLVER_DESCS.update({
|
|
# Group Conv XDLOPS FWD — RDNA3/4 (gfx1100+) only; disabled by default
|
|
"MIOPEN_DEBUG_GROUP_CONV_IMPLICIT_GEMM_HIP_FWD_XDLOPS": "Enable Group Conv Implicit GEMM XDLOPS FWD",
|
|
"MIOPEN_DEBUG_GROUP_CONV_IMPLICIT_GEMM_HIP_FWD_XDLOPS_AI_HEUR": "Enable Group Conv Implicit GEMM XDLOPS FWD AI Heuristic",
|
|
# CK (Composable Kernel) default kernels — RDNA3/4 (gfx1100+); disabled by default
|
|
"MIOPEN_DEBUG_CK_DEFAULT_KERNELS": "Enable CK (Composable Kernel) default kernels",
|
|
})
|
|
|
|
|
|
# Solvers still in the registry but disabled by default.
|
|
# FORCE_IMMED_MODE_FALLBACK — overrides FIND_MODE entirely, defeats tuning DB
|
|
# Fury RxS F2x3/F3x2 — RDNA3/4-only; harmless on RDNA2 but won't select
|
|
# Rage RxS F2x3 — RDNA4-only
|
|
# Group Conv XDLOPS — RDNA3/4-only (gfx1100+)
|
|
# CK_DEFAULT_KERNELS — RDNA3/4-only (gfx1100+)
|
|
SOLVER_DISABLED_BY_DEFAULT = {
|
|
"MIOPEN_DEBUG_FORCE_IMMED_MODE_FALLBACK",
|
|
"MIOPEN_DEBUG_AMD_WINOGRAD_FURY_RXS_F2X3",
|
|
"MIOPEN_DEBUG_AMD_WINOGRAD_FURY_RXS_F3X2",
|
|
"MIOPEN_DEBUG_AMD_WINOGRAD_RAGE_RXS_F2X3",
|
|
"MIOPEN_DEBUG_GROUP_CONV_IMPLICIT_GEMM_HIP_FWD_XDLOPS",
|
|
"MIOPEN_DEBUG_GROUP_CONV_IMPLICIT_GEMM_HIP_FWD_XDLOPS_AI_HEUR",
|
|
"MIOPEN_DEBUG_CK_DEFAULT_KERNELS",
|
|
}
|
|
|
|
SOLVER_DTYPE_TAGS: Dict[str, str] = {
|
|
"MIOPEN_DEBUG_CONV_DIRECT_ASM_3X3U": "FP16/FP32",
|
|
"MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U": "FP16/FP32",
|
|
"MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1UV2": "FP16/FP32",
|
|
"MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD": "FP16/FP32",
|
|
"MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD1X1": "FP16/FP32",
|
|
"MIOPEN_DEBUG_AMD_WINOGRAD_3X3": "FP32",
|
|
"MIOPEN_DEBUG_AMD_FUSED_WINOGRAD": "FP32",
|
|
"MIOPEN_DEBUG_AMD_WINOGRAD_RXS": "FP16/FP32",
|
|
"MIOPEN_DEBUG_AMD_WINOGRAD_RXS_FWD_BWD": "FP16/FP32",
|
|
"MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F3X2": "FP16/FP32",
|
|
"MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F2X3": "FP16/FP32",
|
|
"MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F2X3_G1": "FP16/FP32",
|
|
"MIOPEN_DEBUG_AMD_WINOGRAD_FURY_RXS_F2X3": "FP16/FP32",
|
|
"MIOPEN_DEBUG_AMD_WINOGRAD_FURY_RXS_F3X2": "FP16/FP32",
|
|
"MIOPEN_DEBUG_AMD_WINOGRAD_RAGE_RXS_F2X3": "FP16/FP32",
|
|
"MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X2": "FP16/FP32",
|
|
"MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X3": "FP16/FP32",
|
|
"MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_FWD_V4R1": "FP16/FP32",
|
|
"MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_FWD_V4R1_1X1": "FP16/FP32",
|
|
"MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R1": "FP16/FP32",
|
|
"MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R4": "FP16/FP32",
|
|
"MIOPEN_DEBUG_GROUP_CONV_IMPLICIT_GEMM_HIP_FWD_XDLOPS": "FP16/BF16",
|
|
"MIOPEN_DEBUG_GROUP_CONV_IMPLICIT_GEMM_HIP_FWD_XDLOPS_AI_HEUR": "FP16/BF16",
|
|
"MIOPEN_DEBUG_CK_DEFAULT_KERNELS": "FP16/BF16/FP32",
|
|
}
|
|
|
|
# Build full merged var registry
|
|
ROCM_ENV_VARS: Dict[str, Dict[str, Any]] = {}
|
|
ROCM_ENV_VARS.update(GENERAL_VARS)
|
|
for _var, _desc in _SOLVER_DESCS.items():
|
|
ROCM_ENV_VARS[_var] = {
|
|
"default": "0" if _var in SOLVER_DISABLED_BY_DEFAULT else "1",
|
|
"desc": _desc,
|
|
"widget": "checkbox",
|
|
"options": None,
|
|
"dtype": SOLVER_DTYPE_TAGS.get(_var),
|
|
"restart_required": False,
|
|
}
|
|
|
|
# UI group ordering for solver sections
|
|
SOLVER_GROUPS: List[Tuple[str, List[str]]] = [
|
|
("Algorithm/Solver Group Enables", [
|
|
"MIOPEN_DEBUG_CONV_FFT", "MIOPEN_DEBUG_CONV_DIRECT", "MIOPEN_DEBUG_CONV_GEMM",
|
|
"MIOPEN_DEBUG_CONV_WINOGRAD", "MIOPEN_DEBUG_CONV_IMPLICIT_GEMM",
|
|
]),
|
|
("Immediate Fallback Mode", [
|
|
"MIOPEN_DEBUG_CONV_IMMED_FALLBACK", "MIOPEN_DEBUG_ENABLE_AI_IMMED_MODE_FALLBACK",
|
|
"MIOPEN_DEBUG_FORCE_IMMED_MODE_FALLBACK",
|
|
]),
|
|
("Build Method Toggles", [
|
|
"MIOPEN_DEBUG_GCN_ASM_KERNELS", "MIOPEN_DEBUG_HIP_KERNELS",
|
|
"MIOPEN_DEBUG_OPENCL_CONVOLUTIONS", "MIOPEN_DEBUG_OPENCL_WAVE64_NOWGP",
|
|
"MIOPEN_DEBUG_ATTN_SOFTMAX",
|
|
]),
|
|
("Direct ASM Solver Toggles", [
|
|
"MIOPEN_DEBUG_CONV_DIRECT_ASM_3X3U", "MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U",
|
|
"MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1UV2",
|
|
"MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U_SEARCH_OPTIMIZED", "MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U_AI_HEUR",
|
|
]),
|
|
("Direct OpenCL Solver Toggles", [
|
|
"MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD", "MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD1X1",
|
|
]),
|
|
("Winograd Solver Toggles", [
|
|
"MIOPEN_DEBUG_AMD_WINOGRAD_3X3", "MIOPEN_DEBUG_AMD_WINOGRAD_RXS",
|
|
"MIOPEN_DEBUG_AMD_WINOGRAD_RXS_FWD_BWD",
|
|
"MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F3X2", "MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F2X3",
|
|
"MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F2X3_G1", "MIOPEN_DEBUG_AMD_FUSED_WINOGRAD",
|
|
"MIOPEN_DEBUG_AMD_WINOGRAD_FURY_RXS_F2X3",
|
|
"MIOPEN_DEBUG_AMD_WINOGRAD_FURY_RXS_F3X2", "MIOPEN_DEBUG_AMD_WINOGRAD_RAGE_RXS_F2X3",
|
|
]),
|
|
("Multi-pass Winograd Toggles", [
|
|
"MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X2", "MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X3",
|
|
]),
|
|
("Implicit GEMM Toggles", [
|
|
"MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_FWD_V4R1", "MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_FWD_V4R1_1X1",
|
|
"MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R1", "MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R4",
|
|
]),
|
|
("Group Conv / CK Toggles (RDNA3/4+)", [
|
|
"MIOPEN_DEBUG_GROUP_CONV_IMPLICIT_GEMM_HIP_FWD_XDLOPS",
|
|
"MIOPEN_DEBUG_GROUP_CONV_IMPLICIT_GEMM_HIP_FWD_XDLOPS_AI_HEUR",
|
|
"MIOPEN_DEBUG_CK_DEFAULT_KERNELS",
|
|
]),
|
|
]
|