code quality and layout fixes

2026-04-03 10:12:05 +02:00 · 2026-04-03 10:12:05 +02:00 · 01d53edb25
parent 4cafae9350
commit 01d53edb25
3 changed files with 114 additions and 115 deletions
--- a/scripts/rocm/rocm_mgr.py
+++ b/scripts/rocm/rocm_mgr.py
@ -29,17 +29,17 @@ CONFIG = Path(os.path.abspath(os.path.join('data', 'rocm.json')))
 _cache: Optional[Dict[str, str]] = None  # loaded once, invalidated on save
 # Metadata key written into rocm.json to record which architecture profile is active.
-# Not an environment variable — always skipped during env application but preserved in the
+# Not an environment variable - always skipped during env application but preserved in the
 # saved config so that arch-safety enforcement is consistent across restarts.
 _ARCH_KEY = "_rocm_arch"
 # Vars that must never appear in the process environment.
 #
-# _DTYPE_UNSAFE: alter FP16 inference dtype — must be cleared regardless of config
+# _DTYPE_UNSAFE: alter FP16 inference dtype - must be cleared regardless of config
-#   MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP16_ALT_IMPL  — DEBUG alias: routes all FP16 convs through BF16 exponent math
+#   MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP16_ALT_IMPL  - DEBUG alias: routes all FP16 convs through BF16 exponent math
-#   MIOPEN_CONVOLUTION_ATTRIB_FP16_ALT_IMPL        — API-level alias: same BF16-exponent effect
+#   MIOPEN_CONVOLUTION_ATTRIB_FP16_ALT_IMPL        - API-level alias: same BF16-exponent effect
-#   MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_EXPEREMENTAL_FP16_TRANSFORM — unstable experimental FP16 path
+#   MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_EXPEREMENTAL_FP16_TRANSFORM - unstable experimental FP16 path
-#   MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_PK_ATOMIC_ADD_FP16     — changes FP16 WrW atomic accumulation
+#   MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_PK_ATOMIC_ADD_FP16     - changes FP16 WrW atomic accumulation
 #
 # SOLVER_DISABLED_BY_DEFAULT: every solver known to be incompatible with this runtime
 #   (FP32-only, training-only WrW/BWD, fixed-geometry mismatches, XDLOPS/CDNA-only, arch-specific).
@ -54,18 +54,18 @@ _DTYPE_UNSAFE = {
 # regardless of saved config. Limited to dtype-corrupting vars only.
 # IMPORTANT: SOLVER_DISABLED_BY_DEFAULT is intentionally NOT included here.
 #   When a solver var is absent (unset) MIOpen still calls IsApplicable() on every
-#   conv-find — wasted probing overhead. When a var is explicitly "0" MIOpen skips
+#   conv-find - wasted probing overhead. When a var is explicitly "0" MIOpen skips
 #   IsApplicable() immediately. Solver defaults flow through the config loop as "0"
 #   (their ROCM_ENV_VARS default is "0") so they are explicitly set to "0" in the env.
 _UNSET_VARS = _DTYPE_UNSAFE
 # Additional environment vars that must be removed from the process before MIOpen loads.
 # These are not MIOpen solver toggles but can corrupt MIOpen's runtime behaviour:
-#   HIP_PATH / HIP_PATH_71  — point to the system AMD ROCm install; override the venv-bundled
+#   HIP_PATH / HIP_PATH_71  - point to the system AMD ROCm install; override the venv-bundled
 #                              _rocm_sdk_devel DLLs with a potentially mismatched system version
-#   QML_*/QT_*              — QtQuick shader/disk-cache flags leaked from Qt tools; harmless for
+#   QML_*/QT_*              - QtQuick shader/disk-cache flags leaked from Qt tools; harmless for
 #                              PyTorch but can conflict with Gradio's embedded Qt helpers
-#   PYENV_VIRTUALENV_DISABLE_PROMPT — pyenv noise that confuses venv detection
+#   PYENV_VIRTUALENV_DISABLE_PROMPT - pyenv noise that confuses venv detection
 _EXTRA_CLEAR_VARS = {
    "HIP_PATH",
    "HIP_PATH_71",
@ -73,7 +73,7 @@ _EXTRA_CLEAR_VARS = {
    "QML_DISABLE_DISK_CACHE",
    "QML_FORCE_DISK_CACHE",
    "QT_DISABLE_SHADER_DISK_CACHE",
-    # PERF_VALS vars are NOT boolean toggles — MIOpen reads them as perf-config strings.
+    # PERF_VALS vars are NOT boolean toggles - MIOpen reads them as perf-config strings.
    # If inherited from a parent shell with value "1", MIOpen's GetPerfConfFromEnv parses
    # "1" as a degenerate config and can return dtype=float32 output from FP16 tensors.
    "MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U_PERF_VALS",
@ -82,12 +82,12 @@ _EXTRA_CLEAR_VARS = {
 # Solvers whose MIOpen IsApplicable() explicitly rejects non-FP32 tensors.
 # They are safe to leave enabled in FP32 mode. When the active dtype is FP16 or BF16
-# we force them OFF so MIOpen skips the IsApplicable probe entirely — avoids overhead on
+# we force them OFF so MIOpen skips the IsApplicable probe entirely - avoids overhead on
 # every conv shape find. These are NOT in _UNSET_VARS because they are valid in FP32.
 _FP32_ONLY_SOLVERS = {
-    "MIOPEN_DEBUG_CONV_FFT",           # FFT convolution — FP32 only (MIOpen source: IsFp32 check)
+    "MIOPEN_DEBUG_CONV_FFT",           # FFT convolution - FP32 only (MIOpen source: IsFp32 check)
-    "MIOPEN_DEBUG_AMD_WINOGRAD_3X3",   # Winograd 3x3 — FP32 only
+    "MIOPEN_DEBUG_AMD_WINOGRAD_3X3",   # Winograd 3x3 - FP32 only
-    "MIOPEN_DEBUG_AMD_FUSED_WINOGRAD", # Fused Winograd — FP32 only
+    "MIOPEN_DEBUG_AMD_FUSED_WINOGRAD", # Fused Winograd - FP32 only
 }
@ -172,7 +172,7 @@ def load_config() -> Dict[str, str]:
            _cache = data if data else {k: v["default"] for k, v in ROCM_ENV_VARS.items()}
            # Purge unsafe vars from a stale saved config and re-persist only if the file existed.
            # When running without a saved config (first run / after Delete), load_config() must
-            # never create the file — that only happens via save_config() on Apply or Apply Profile.
+            # never create the file - that only happens via save_config() on Apply or Apply Profile.
            dirty = {k for k in _cache if k in _UNSET_VARS or (k != _ARCH_KEY and k not in ROCM_ENV_VARS)}
            if dirty:
                _cache = {k: v for k, v in _cache.items() if k not in dirty}
@ -221,7 +221,7 @@ def apply_env(config: Optional[Dict[str, str]] = None) -> None:
        os.environ[var] = expanded
    # Arch safety net: hard-force all hardware-incompatible vars to "0" in the env.
    # This runs *after* the config loop so it overrides any stale "1" that survived in the JSON.
-    # Source of truth: rocm_profiles.UNAVAILABLE[arch] — vars with no supporting hardware.
+    # Source of truth: rocm_profiles.UNAVAILABLE[arch] - vars with no supporting hardware.
    arch = config.get(_ARCH_KEY, "")
    unavailable = rocm_profiles.UNAVAILABLE.get(arch, set())
    if unavailable:
@ -249,7 +249,7 @@ def apply_all(names: list, values: list) -> None:
        meta = ROCM_ENV_VARS[name]
        if meta["widget"] == "checkbox":
            if value is None:
-                pass  # Gradio passed None (component not interacted with) — leave config unchanged
+                pass  # Gradio passed None (component not interacted with) - leave config unchanged
            else:
                config[name] = "1" if value else "0"
        elif meta["widget"] == "radio":
@ -257,7 +257,7 @@ def apply_all(names: list, values: list) -> None:
            valid = {v for _, v in meta["options"]} if meta["options"] and isinstance(meta["options"][0], tuple) else set(meta["options"] or [])
            if stored in valid:
                config[name] = stored
-            # else: value was None/invalid — leave the existing saved value untouched
+            # else: value was None/invalid - leave the existing saved value untouched
        else:
            if meta.get("options"):
                value = _dropdown_stored(str(value), meta["options"])
@ -300,7 +300,7 @@ def delete_config() -> None:
        CONFIG.unlink()
        log.info(f'ROCm delete_config: deleted {CONFIG}')
    _cache = None
-    # Delete the MIOpen user DB (~/.miopen/db) — stale entries can cause solver mismatches
+    # Delete the MIOpen user DB (~/.miopen/db) - stale entries can cause solver mismatches
    miopen_db = Path(os.path.expanduser('~')) / '.miopen' / 'db'
    if miopen_db.exists():
        shutil.rmtree(miopen_db, ignore_errors=True)
@ -458,7 +458,7 @@ def info() -> dict:
        if ufiles:
            udb["files"] = ufiles
-    # --- User cache (~/.miopen/cache/<version-hash>) ---
+    # User cache (~/.miopen/cache/<version-hash>) 
    cache_base = Path.home() / ".miopen" / "cache"
    db_hash = _extract_db_hash(user_db_path) if user_db_path.exists() else ""
    cache_path = cache_base / db_hash if db_hash else cache_base
--- a/scripts/rocm/rocm_profiles.py
+++ b/scripts/rocm/rocm_profiles.py
@ -1,4 +1,4 @@
-"""
+"""
 Architecture-specific MIOpen solver profiles for AMD GCN/RDNA GPUs.
 Sources:
@ -6,8 +6,8 @@ Sources:
 Key axis: consumer RDNA GPUs have NO XDLOPS hardware (that's CDNA/Instinct only).
  RDNA2 (gfx1030): RX 6000 series
-  RDNA3 (gfx1100): RX 7000 series — adds Fury Winograd, wider MPASS
+  RDNA3 (gfx1100): RX 7000 series - adds Fury Winograd, wider MPASS
-  RDNA4 (gfx1200): RX 9000 series — adds Rage Winograd, wider MPASS
+  RDNA4 (gfx1200): RX 9000 series - adds Rage Winograd, wider MPASS
 Each profile is a dict of {var: value} that will be MERGED on top of the
 current config (general vars like DB path / log level are preserved).
@ -15,9 +15,9 @@ current config (general vars like DB path / log level are preserved).
 from typing import Dict
-# ---------------------------------------------------------------------------
+
 # Shared: everything that must be OFF on ALL consumer RDNA (no XDLOPS hw)
-# ---------------------------------------------------------------------------
+
 _XDLOPS_OFF: Dict[str, str] = {
    # GTC XDLOPS (CDNA-only)
    "MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_FWD_GTC_XDLOPS":                                "0",
@ -55,7 +55,7 @@ _XDLOPS_OFF: Dict[str, str] = {
    # MLIR (CDNA-only in practice)
    "MIOPEN_DEBUG_CONV_MLIR_IGEMM_WRW_XDLOPS":                                          "0",
    "MIOPEN_DEBUG_CONV_MLIR_IGEMM_BWD_XDLOPS":                                          "0",
-    # MP BD Winograd (Multi-pass Block-Decomposed — CDNA / high-end only)
+    # MP BD Winograd (Multi-pass Block-Decomposed - CDNA / high-end only)
    "MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_F2X3":                                              "0",
    "MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_F3X3":                                              "0",
    "MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_F4X3":                                              "0",
@ -68,17 +68,17 @@ _XDLOPS_OFF: Dict[str, str] = {
    "MIOPEN_DEBUG_AMD_MP_BD_XDLOPS_WINOGRAD_F6X3":                                      "0",
 }
-# ---------------------------------------------------------------------------
+
-# RDNA2 — gfx1030 (RX 6000 series)
+# RDNA2 - gfx1030 (RX 6000 series)
 # No XDLOPS, no Fury/Rage Winograd, MPASS limited to F3x2/F3x3
 # ASM IGEMM: V4R1 variants only; HIP IGEMM: non-XDLOPS V4R1/R4 only
-# ---------------------------------------------------------------------------
+
 RDNA2: Dict[str, str] = {
    **_XDLOPS_OFF,
    # General settings (architecture-independent; set here so all profiles cover them)
    "MIOPEN_SEARCH_CUTOFF": "0",
    "MIOPEN_DEBUG_CONVOLUTION_DETERMINISTIC": "0",
-    # Core algo enables — FFT is FP32-only but harmless (IsApplicable rejects it for fp16 tensors)
+    # Core algo enables - FFT is FP32-only but harmless (IsApplicable rejects it for fp16 tensors)
    "MIOPEN_DEBUG_CONV_FFT":               "1",
    "MIOPEN_DEBUG_CONV_DIRECT":            "1",
    "MIOPEN_DEBUG_CONV_GEMM":              "1",
@ -93,16 +93,16 @@ RDNA2: Dict[str, str] = {
    "MIOPEN_DEBUG_OPENCL_CONVOLUTIONS":    "1",
    "MIOPEN_DEBUG_OPENCL_WAVE64_NOWGP":    "1",
    "MIOPEN_DEBUG_ATTN_SOFTMAX":           "1",
-    # Direct ASM — dtype notes
+    # Direct ASM - dtype notes
-    # 3X3U / 1X1U / 1X1UV2: FP32/FP16 forward — enabled
+    # 3X3U / 1X1U / 1X1UV2: FP32/FP16 forward - enabled
    "MIOPEN_DEBUG_CONV_DIRECT_ASM_3X3U":                    "1",
    "MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U":                    "1",
    "MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1UV2":                  "1",
-    # 5X10U2V2: fixed geometry (5*10 stride-2), no SD conv matches — disabled
+    # 5X10U2V2: fixed geometry (5*10 stride-2), no SD conv matches - disabled
    "MIOPEN_DEBUG_CONV_DIRECT_ASM_5X10U2V2":                "0",
-    # 7X7C3H224W224: hard-coded ImageNet stem (C=3, H=W=224, K=64) — never matches SD — disabled
+    # 7X7C3H224W224: hard-coded ImageNet stem (C=3, H=W=224, K=64) - never matches SD - disabled
    "MIOPEN_DEBUG_CONV_DIRECT_ASM_7X7C3H224W224":           "0",
-    # WRW3X3 / WRW1X1: FP32-only weight-gradient (training only) — disabled for inference
+    # WRW3X3 / WRW1X1: FP32-only weight-gradient (training only) - disabled for inference
    "MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW3X3":                  "0",
    "MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW1X1":                  "0",
    # PERF_VALS intentionally blank: MIOpen reads this as a config string not a boolean;
@ -110,30 +110,30 @@ RDNA2: Dict[str, str] = {
    "MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U_PERF_VALS":          "",
    "MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U_SEARCH_OPTIMIZED":   "1",
    "MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U_AI_HEUR":            "1",
-    # NAIVE_CONV_FWD: scalar FP32 reference solver — IsApplicable does NOT reliably filter for FP16;
+    # NAIVE_CONV_FWD: scalar FP32 reference solver - IsApplicable does NOT reliably filter for FP16;
    # can be selected for unusual shapes (e.g. VAE decoder 3-ch output) and returns dtype=float32
    "MIOPEN_DEBUG_CONV_DIRECT_NAIVE_CONV_FWD":               "0",
-    # Direct OCL — dtype notes
+    # Direct OCL - dtype notes
-    # FWD / FWD1X1: FP32/FP16 forward — enabled
+    # FWD / FWD1X1: FP32/FP16 forward - enabled
    "MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD":      "1",
    "MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD1X1":   "1",
-    # FWD11X11: requires 11*11 kernel — no SD match — disabled
+    # FWD11X11: requires 11*11 kernel - no SD match - disabled
    "MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD11X11": "0",
-    # FWDGEN: FP32 generic OCL fallback — IsApplicable does NOT reliably reject for FP16;
+    # FWDGEN: FP32 generic OCL fallback - IsApplicable does NOT reliably reject for FP16;
-    # can produce dtype=float32 output for FP16 inputs — disabled
+    # can produce dtype=float32 output for FP16 inputs - disabled
    "MIOPEN_DEBUG_CONV_DIRECT_OCL_FWDGEN":   "0",
-    # WRW2 / WRW53 / WRW1X1: training-only weight-gradient — disabled
+    # WRW2 / WRW53 / WRW1X1: training-only weight-gradient - disabled
    "MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW2":     "0",
    "MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW53":    "0",
    "MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW1X1":   "0",
-    # Winograd RxS — dtype per MIOpen docs
+    # Winograd RxS - dtype per MIOpen docs
-    # WINOGRAD_3X3: FP32-only — harmless (IsApplicable rejects for fp16); enabled
+    # WINOGRAD_3X3: FP32-only - harmless (IsApplicable rejects for fp16); enabled
    "MIOPEN_DEBUG_AMD_WINOGRAD_3X3":                "1",
-    # RXS: covers FP32/FP16 F(3,3) Fwd/Bwd + FP32 F(3,2) WrW — keep enabled (fp16 fwd/bwd path exists)
+    # RXS: covers FP32/FP16 F(3,3) Fwd/Bwd + FP32 F(3,2) WrW - keep enabled (fp16 fwd/bwd path exists)
    "MIOPEN_DEBUG_AMD_WINOGRAD_RXS":                "1",
-    # RXS_FWD_BWD: FP32/FP16 — explicitly the fp16-capable subset
+    # RXS_FWD_BWD: FP32/FP16 - explicitly the fp16-capable subset
    "MIOPEN_DEBUG_AMD_WINOGRAD_RXS_FWD_BWD":        "1",
-    # RXS_WRW: FP32 WrW only — training-only, disabled for inference fp16 profile
+    # RXS_WRW: FP32 WrW only - training-only, disabled for inference fp16 profile
    "MIOPEN_DEBUG_AMD_WINOGRAD_RXS_WRW":            "0",
    # RXS_F3X2: FP32/FP16 Fwd/Bwd
    "MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F3X2":           "1",
@ -141,15 +141,15 @@ RDNA2: Dict[str, str] = {
    "MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F2X3":           "1",
    # RXS_F2X3_G1: FP32/FP16 Fwd/Bwd (non-group convolutions)
    "MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F2X3_G1":        "1",
-    # FUSED_WINOGRAD: FP32-only — harmless (IsApplicable rejects for fp16); enabled
+    # FUSED_WINOGRAD: FP32-only - harmless (IsApplicable rejects for fp16); enabled
    "MIOPEN_DEBUG_AMD_FUSED_WINOGRAD":              "1",
-    # PERF_VALS intentionally blank: same reason as ASM_1X1U — not a boolean, config string
+    # PERF_VALS intentionally blank: same reason as ASM_1X1U - not a boolean, config string
    "MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F2X3_PERF_VALS": "",
-    # Fury/Rage Winograd — NOT available on RDNA2
+    # Fury/Rage Winograd - NOT available on RDNA2
    "MIOPEN_DEBUG_AMD_WINOGRAD_FURY_RXS_F2X3": "0",
    "MIOPEN_DEBUG_AMD_WINOGRAD_FURY_RXS_F3X2": "0",
    "MIOPEN_DEBUG_AMD_WINOGRAD_RAGE_RXS_F2X3": "0",
-    # MPASS — only F3x2 and F3x3 are safe on RDNA2
+    # MPASS - only F3x2 and F3x3 are safe on RDNA2
    "MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X2": "1",
    "MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X3": "1",
    "MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X4": "0",
@ -159,50 +159,50 @@ RDNA2: Dict[str, str] = {
    "MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F5X4": "0",
    "MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F7X2": "0",
    "MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F7X3": "0",
-    # ASM Implicit GEMM — forward V4R1 only; no GTC/XDLOPS on RDNA2
+    # ASM Implicit GEMM - forward V4R1 only; no GTC/XDLOPS on RDNA2
-    # BWD (backward data-gradient) and WrW (weight-gradient) are training-only — disabled
+    # BWD (backward data-gradient) and WrW (weight-gradient) are training-only - disabled
    "MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_FWD_V4R1":     "1",
    "MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_FWD_V4R1_1X1": "1",
    "MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_BWD_V4R1":     "0",
    "MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_WRW_V4R1":     "0",
-    # HIP Implicit GEMM — non-XDLOPS V4R1/R4 forward only
+    # HIP Implicit GEMM - non-XDLOPS V4R1/R4 forward only
-    # BWD (backward data-gradient) and WrW (weight-gradient) are training-only — disabled
+    # BWD (backward data-gradient) and WrW (weight-gradient) are training-only - disabled
    "MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R1": "1",
    "MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R4": "1",
    "MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V1R1": "0",
    "MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1": "0",
    "MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_WRW_V4R1": "0",
    "MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_WRW_V4R4": "0",
-    # Group Conv XDLOPS / CK default kernels — RDNA3/4 only, not available on RDNA2
+    # Group Conv XDLOPS / CK default kernels - RDNA3/4 only, not available on RDNA2
    "MIOPEN_DEBUG_GROUP_CONV_IMPLICIT_GEMM_HIP_FWD_XDLOPS":         "0",
    "MIOPEN_DEBUG_GROUP_CONV_IMPLICIT_GEMM_HIP_FWD_XDLOPS_AI_HEUR": "0",
    "MIOPEN_DEBUG_CK_DEFAULT_KERNELS":                               "0",
 }
 # ---------------------------------------------------------------------------
-# RDNA3 — gfx1100 (RX 7000 series)
+# RDNA3 - gfx1100 (RX 7000 series)
 # Fury Winograd added; MPASS F3x4 enabled; Group Conv XDLOPS + CK default kernels enabled
 # ---------------------------------------------------------------------------
 RDNA3: Dict[str, str] = {
    **RDNA2,
-    # Fury Winograd — introduced for gfx1100 (RDNA3)
+    # Fury Winograd - introduced for gfx1100 (RDNA3)
    "MIOPEN_DEBUG_AMD_WINOGRAD_FURY_RXS_F2X3": "1",
    "MIOPEN_DEBUG_AMD_WINOGRAD_FURY_RXS_F3X2": "1",
    # Wider MPASS on RDNA3
    "MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X4": "1",
-    # Group Conv XDLOPS / CK — available from gfx1100 (RDNA3) onwards
+    # Group Conv XDLOPS / CK - available from gfx1100 (RDNA3) onwards
    "MIOPEN_DEBUG_GROUP_CONV_IMPLICIT_GEMM_HIP_FWD_XDLOPS":         "1",
    "MIOPEN_DEBUG_GROUP_CONV_IMPLICIT_GEMM_HIP_FWD_XDLOPS_AI_HEUR": "1",
    "MIOPEN_DEBUG_CK_DEFAULT_KERNELS":                               "1",
 }
 # ---------------------------------------------------------------------------
-# RDNA4 — gfx1200 (RX 9000 series)
+# RDNA4 - gfx1200 (RX 9000 series)
 # Rage Winograd added; MPASS F3x5 enabled
 # ---------------------------------------------------------------------------
 RDNA4: Dict[str, str] = {
    **RDNA3,
-    # Rage Winograd — introduced for gfx1200 (RDNA4)
+    # Rage Winograd - introduced for gfx1200 (RDNA4)
    "MIOPEN_DEBUG_AMD_WINOGRAD_RAGE_RXS_F2X3": "1",
    # Wider MPASS on RDNA4
    "MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X5": "1",
--- a/scripts/rocm/rocm_vars.py
+++ b/scripts/rocm/rocm_vars.py
@ -1,39 +1,51 @@
-from typing import Dict, Any, List, Tuple
+from typing import Dict, Any, List, Tuple
 # --- General MIOpen/rocBLAS variables (dropdown/textbox/checkbox) ---
 GENERAL_VARS: Dict[str, Dict[str, Any]] = {
-
+     "MIOPEN_SYSTEM_DB_PATH": {
-    # ── GEMM backend selector + companion toggles ──────────────────────────
+        "default": "{VIRTUAL_ENV}\\Lib\\site-packages\\_rocm_sdk_devel\\bin\\",
        "desc": "MIOpen system DB path",
        "widget": "textbox",
        "options": None,
        "restart_required": True,
    },
    "ROCBLAS_TENSILE_LIBPATH": {
        "default": "{VIRTUAL_ENV}\\Lib\\site-packages\\_rocm_sdk_devel\\bin\\rocblas\\library",
        "desc": "rocBLAS Tensile library path",
        "widget": "textbox",
        "options": None,
        "restart_required": True,
    },
    # -- GEMM backend selector + companion toggles --------------------------
    "MIOPEN_GEMM_ENFORCE_BACKEND": {
        "default": "1",
-        "desc": "Enforce GEMM backend",
+        "desc": "GEMM backend",
        "widget": "dropdown",
        "options": [("1 - rocBLAS", "1"), ("5 - hipBLASLt", "5")],
        "restart_required": False,
    },
    "PYTORCH_ROCM_USE_ROCBLAS": {
        "default": "0",
-        "desc": "PyTorch ROCm: prioritise rocBLAS for linear algebra",
+        "desc": "PyTorch: Use rocBLAS.",
        "widget": "dropdown",
        "options": [("0 - Off", "0"), ("1 - On", "1")],
        "restart_required": True,
    },
    "PYTORCH_HIPBLASLT_DISABLE": {
        "default": "1",
-        "desc": "Disable PyTorch hipBLASLt dispatcher",
+        "desc": "PyTorch: Use hipBLASLt.",
        "widget": "dropdown",
        "options": [("0 - Allow hipBLASLt", "0"), ("1 - Disable hipBLASLt", "1")],
        "restart_required": True,
    },
    "ROCBLAS_USE_HIPBLASLT": {
        "default": "0",
-        "desc": "rocBLAS: use hipBLASLt backend (0 = Tensile)",
+        "desc": "rocBLAS: use hipBLASLt backend.",
        "widget": "dropdown",
        "options": [("0 - Tensile (rocBLAS)", "0"), ("1 - hipBLASLt", "1")],
        "restart_required": True,
    },
-
+    # -- MIOpen behavioural settings ----------------------------------------
    # ── MIOpen behavioural settings ────────────────────────────────────────
    "MIOPEN_FIND_MODE": {
        "default": "2",
        "desc": "MIOpen Find Mode",
@ -57,34 +69,21 @@ GENERAL_VARS: Dict[str, Dict[str, Any]] = {
    },
    "MIOPEN_DEBUG_CONVOLUTION_DETERMINISTIC": {
        "default": "0",
-        "desc": "Deterministic convolution (reproducible results, may be slower)",
+        "desc": "Deterministic convolutions",
        "widget": "dropdown",
        "options": [("0 - Off", "0"), ("1 - On", "1")],
        "restart_required": False,
    },
-    # ── Paths / sizes ──────────────────────────────────────────────────────
+    # -- Paths / sizes ------------------------------------------------------
-    "MIOPEN_SYSTEM_DB_PATH": {
+   
        "default": "{VIRTUAL_ENV}\\Lib\\site-packages\\_rocm_sdk_devel\\bin\\",
        "desc": "MIOpen system DB path",
        "widget": "textbox",
        "options": None,
        "restart_required": True,
    },
    "MIOPEN_CONVOLUTION_MAX_WORKSPACE": {
        "default": "1073741824",
-        "desc": "MIOpen convolution max workspace (bytes; 1 GB default)",
+        "desc": "MIOpen convolutions: max workspace (bytes; 1 GB)",
        "widget": "textbox",
        "options": None,
        "restart_required": False,
    },
    "ROCBLAS_TENSILE_LIBPATH": {
        "default": "{VIRTUAL_ENV}\\Lib\\site-packages\\_rocm_sdk_devel\\bin\\rocblas\\library",
        "desc": "rocBLAS Tensile library path",
        "widget": "textbox",
        "options": None,
        "restart_required": True,
    },
    "ROCBLAS_DEVICE_MEMORY_SIZE": {
        "default": "",
        "desc": "rocBLAS workspace size in bytes (empty = dynamic)",
@ -94,13 +93,13 @@ GENERAL_VARS: Dict[str, Dict[str, Any]] = {
    },
    "PYTORCH_TUNABLEOP_CACHE_DIR": {
        "default": "{ROOT}\\models\\tunable",
-        "desc": "TunableOp: kernel profile cache directory",
+        "desc": "TunableOp cache directory",
        "widget": "textbox",
        "options": None,
        "restart_required": False,
    },
-    # ── rocBLAS settings ───────────────────────────────────────────────────
+    # -- rocBLAS settings ---------------------------------------------------
    "ROCBLAS_STREAM_ORDER_ALLOC": {
        "default": "1",
        "desc": "rocBLAS stream-ordered memory allocation",
@ -110,27 +109,27 @@ GENERAL_VARS: Dict[str, Dict[str, Any]] = {
    },
    "ROCBLAS_DEFAULT_ATOMICS_MODE": {
        "default": "1",
-        "desc": "rocBLAS default atomics mode (1 = allow non-deterministic for performance)",
+        "desc": "rocBLAS allow atomics",
        "widget": "dropdown",
        "options": [("0 - Off (deterministic)", "0"), ("1 - On (performance)", "1")],
        "restart_required": False,
    },
    "PYTORCH_TUNABLEOP_ROCBLAS_ENABLED": {
        "default": "0",
-        "desc": "TunableOp: wrap and optimise rocBLAS GEMM calls",
+        "desc": "TunableOp: Enable tuning",
        "widget": "dropdown",
        "options": [("0 - Off", "0"), ("1 - On", "1")],
        "restart_required": False,
    },
    "PYTORCH_TUNABLEOP_TUNING": {
        "default": "0",
-        "desc": "TunableOp: tuning mode (1 = benchmark; 0 = use saved CSV)",
+        "desc": "TunableOp: Tuning mode",
        "widget": "dropdown",
-        "options": [("0 - Use saved CSV", "0"), ("1 - Benchmark new shapes", "1")],
+        "options": [("0 - Use Cache", "0"), ("1 - Benchmark new shapes", "1")],
        "restart_required": False,
    },
-    # ── hipBLASLt settings ─────────────────────────────────────────────────
+    # -- hipBLASLt settings -------------------------------------------------
    "PYTORCH_TUNABLEOP_HIPBLASLT_ENABLED": {
        "default": "0",
        "desc": "TunableOp: benchmark hipBLASLt kernels",
@ -139,7 +138,7 @@ GENERAL_VARS: Dict[str, Dict[str, Any]] = {
        "restart_required": False,
    },
-    # ── Logging: MIOpen → rocBLAS → hipBLASLt ─────────────────────────────
+    # -- Logging: MIOpen -> rocBLAS -> hipBLASLt -----------------------------
    "MIOPEN_LOG_LEVEL": {
        "default": "0",
        "desc": "MIOpen log verbosity level",
@ -171,13 +170,13 @@ GENERAL_VARS: Dict[str, Dict[str, Any]] = {
 }
 # --- Solver toggles (inference/FWD only, RDNA2/3/4 compatible) ---
-# Removed entirely — not representable in the UI, cannot be set by users:
+# Removed entirely - not representable in the UI, cannot be set by users:
-#   WRW (weight-gradient) and BWD (data-gradient) — training passes only, never run during inference
+#   WRW (weight-gradient) and BWD (data-gradient) - training passes only, never run during inference
-#   XDLOPS/CK CDNA-exclusive (MI100/MI200/MI300 matrix engine variants) — not on any RDNA
+#   XDLOPS/CK CDNA-exclusive (MI100/MI200/MI300 matrix engine variants) - not on any RDNA
-#   Fixed-geometry (5x10, 7x7-ImageNet, 11x11) — shapes never appear in SD/video inference
+#   Fixed-geometry (5x10, 7x7-ImageNet, 11x11) - shapes never appear in SD/video inference
-#   FP32-reference (NAIVE_CONV_FWD, FWDGEN) — IsApplicable() unreliable for FP16/BF16
+#   FP32-reference (NAIVE_CONV_FWD, FWDGEN) - IsApplicable() unreliable for FP16/BF16
-#   Wide MPASS (F3x4..F7x3) — kernel sizes that cannot match any SD convolution shape
+#   Wide MPASS (F3x4..F7x3) - kernel sizes that cannot match any SD convolution shape
-# Disabled by default (added but off): RDNA3/4-only — Group Conv XDLOPS, CK default kernels
+# Disabled by default (added but off): RDNA3/4-only - Group Conv XDLOPS, CK default kernels
 _SOLVER_DESCS: Dict[str, str] = {}
 _SOLVER_DESCS.update({
@ -200,7 +199,7 @@ _SOLVER_DESCS.update({
    "MIOPEN_DEBUG_ATTN_SOFTMAX":         "Enable Attention Softmax",
 })
 _SOLVER_DESCS.update({
-    # Direct ASM — FWD inference only (WRW, fixed-geometry, FP32-reference removed)
+    # Direct ASM - FWD inference only (WRW, fixed-geometry, FP32-reference removed)
    "MIOPEN_DEBUG_CONV_DIRECT_ASM_3X3U":                  "Enable Direct ASM 3x3U",
    "MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U":                  "Enable Direct ASM 1x1U",
    "MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1UV2":                "Enable Direct ASM 1x1UV2",
@ -208,12 +207,12 @@ _SOLVER_DESCS.update({
    "MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U_AI_HEUR":          "Enable Direct ASM 1x1U AI Heuristic",
 })
 _SOLVER_DESCS.update({
-    # Direct OCL — FWD inference only (WRW, FWD11X11 fixed-geom, FWDGEN FP32-ref removed)
+    # Direct OCL - FWD inference only (WRW, FWD11X11 fixed-geom, FWDGEN FP32-ref removed)
    "MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD":    "Enable Direct OCL FWD",
    "MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD1X1": "Enable Direct OCL FWD1X1",
 })
 _SOLVER_DESCS.update({
-    # Winograd FWD — WRW removed; Fury/Rage kept as RDNA3/4 inference (off by default)
+    # Winograd FWD - WRW removed; Fury/Rage kept as RDNA3/4 inference (off by default)
    "MIOPEN_DEBUG_AMD_WINOGRAD_3X3":           "Enable AMD Winograd 3x3",
    "MIOPEN_DEBUG_AMD_WINOGRAD_RXS":           "Enable AMD Winograd RxS",
    "MIOPEN_DEBUG_AMD_WINOGRAD_RXS_FWD_BWD":   "Enable AMD Winograd RxS FWD",
@ -226,32 +225,32 @@ _SOLVER_DESCS.update({
    "MIOPEN_DEBUG_AMD_WINOGRAD_RAGE_RXS_F2X3": "Enable AMD Winograd Rage RxS F2x3",
 })
 _SOLVER_DESCS.update({
-    # Multi-pass Winograd — only F3x2/F3x3 match typical 3x3 SD shapes; wider kernels removed
+    # Multi-pass Winograd - only F3x2/F3x3 match typical 3x3 SD shapes; wider kernels removed
    "MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X2": "Enable AMD Winograd MPASS F3x2",
    "MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X3": "Enable AMD Winograd MPASS F3x3",
 })
 _SOLVER_DESCS.update({
-    # Implicit GEMM FWD — BWD/WRW (training), CDNA-exclusive XDLOPS variants removed
+    # Implicit GEMM FWD - BWD/WRW (training), CDNA-exclusive XDLOPS variants removed
    "MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_FWD_V4R1":     "Enable ASM Implicit GEMM FWD V4R1",
    "MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_FWD_V4R1_1X1": "Enable ASM Implicit GEMM FWD V4R1 1x1",
    "MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R1":     "Enable HIP Implicit GEMM FWD V4R1",
    "MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R4":     "Enable HIP Implicit GEMM FWD V4R4",
 })
 _SOLVER_DESCS.update({
-    # Group Conv XDLOPS FWD — RDNA3/4 (gfx1100+) only; disabled by default
+    # Group Conv XDLOPS FWD - RDNA3/4 (gfx1100+) only; disabled by default
    "MIOPEN_DEBUG_GROUP_CONV_IMPLICIT_GEMM_HIP_FWD_XDLOPS":         "Enable Group Conv Implicit GEMM XDLOPS FWD",
    "MIOPEN_DEBUG_GROUP_CONV_IMPLICIT_GEMM_HIP_FWD_XDLOPS_AI_HEUR": "Enable Group Conv Implicit GEMM XDLOPS FWD AI Heuristic",
-    # CK (Composable Kernel) default kernels — RDNA3/4 (gfx1100+); disabled by default
+    # CK (Composable Kernel) default kernels - RDNA3/4 (gfx1100+); disabled by default
    "MIOPEN_DEBUG_CK_DEFAULT_KERNELS": "Enable CK (Composable Kernel) default kernels",
 })
 # Solvers still in the registry but disabled by default.
-#   FORCE_IMMED_MODE_FALLBACK — overrides FIND_MODE entirely, defeats tuning DB
+#   FORCE_IMMED_MODE_FALLBACK - overrides FIND_MODE entirely, defeats tuning DB
-#   Fury RxS F2x3/F3x2       — RDNA3/4-only; harmless on RDNA2 but won't select
+#   Fury RxS F2x3/F3x2       - RDNA3/4-only; harmless on RDNA2 but won't select
-#   Rage RxS F2x3            — RDNA4-only
+#   Rage RxS F2x3            - RDNA4-only
-#   Group Conv XDLOPS        — RDNA3/4-only (gfx1100+)
+#   Group Conv XDLOPS        - RDNA3/4-only (gfx1100+)
-#   CK_DEFAULT_KERNELS       — RDNA3/4-only (gfx1100+)
+#   CK_DEFAULT_KERNELS       - RDNA3/4-only (gfx1100+)
 SOLVER_DISABLED_BY_DEFAULT = {
    "MIOPEN_DEBUG_FORCE_IMMED_MODE_FALLBACK",
    "MIOPEN_DEBUG_AMD_WINOGRAD_FURY_RXS_F2X3",