From f5c037a7350a8a778e412ced3e53fec8dccb60ce Mon Sep 17 00:00:00 2001 From: resonantsky Date: Thu, 2 Apr 2026 14:59:00 +0200 Subject: [PATCH] Added further rocblas support enhancements and performance-related best practice settings. --- scripts/rocm/rocm_mgr.py | 44 +++++++++++++- scripts/rocm/rocm_vars.py | 119 +++++++++++++++++++++++++++++++++++--- scripts/rocm_ext.py | 85 +++++++++++++++++++++------ 3 files changed, 221 insertions(+), 27 deletions(-) diff --git a/scripts/rocm/rocm_mgr.py b/scripts/rocm/rocm_mgr.py index b704fff03..9bbc0199f 100644 --- a/scripts/rocm/rocm_mgr.py +++ b/scripts/rocm/rocm_mgr.py @@ -1,4 +1,5 @@ import os +import re import sys from pathlib import Path from typing import Dict, Optional @@ -121,14 +122,22 @@ def _get_venv() -> str: return os.environ.get("VIRTUAL_ENV", "") or sys.prefix +def _get_root() -> str: + """App root — one level above the venv folder (e.g. E:\\Sd.Next).""" + return str(Path(_get_venv()).parent) + + def _expand_venv(value: str) -> str: - return value.replace("{VIRTUAL_ENV}", _get_venv()) + return value.replace("{VIRTUAL_ENV}", _get_venv()).replace("{ROOT}", _get_root()) def _collapse_venv(value: str) -> str: venv = _get_venv() + root = _get_root() if venv and value.startswith(venv): return "{VIRTUAL_ENV}" + value[len(venv):] + if root and value.startswith(root): + return "{ROOT}" + value[len(root):] return value @@ -365,6 +374,28 @@ def _user_db_summary(path: Path) -> dict: return out +def _extract_db_hash(db_path: Path) -> str: + """Derive the cache subfolder name from udb.txt filenames. + e.g. gfx1030_30.HIP.3_5_1_5454e9e2da.udb.txt → '3.5.1.5454e9e2da'""" + for f in db_path.glob("*.HIP.*.udb.txt"): + m = re.search(r'\.HIP\.([^.]+)\.udb\.txt$', f.name) + if m: + return m.group(1).replace("_", ".") + return "" + + +def _user_cache_summary(path: Path) -> dict: + """Return {filename: 'N KB'} for binary cache blobs in the resolved cache path.""" + out = {} + if not path.exists(): + return out + for f in sorted(path.iterdir()): + if f.is_file(): + kb = f.stat().st_size // 1024 + out[f.name] = f"{kb} KB" + return out + + def info() -> dict: config = load_config() db_path = Path(_expand_venv(config.get("MIOPEN_SYSTEM_DB_PATH", ""))) @@ -427,12 +458,23 @@ def info() -> dict: if ufiles: udb["files"] = ufiles + # --- User cache (~/.miopen/cache/) --- + cache_base = Path.home() / ".miopen" / "cache" + db_hash = _extract_db_hash(user_db_path) if user_db_path.exists() else "" + cache_path = cache_base / db_hash if db_hash else cache_base + ucache = {"path": str(cache_path), "exists": cache_path.exists()} + if cache_path.exists(): + cfiles = _user_cache_summary(cache_path) + if cfiles: + ucache["files"] = cfiles + return { "rocm": rocm_section, "torch": torch_section, "gpu": gpu_section, "system_db": sdb, "user_db": udb, + "user_cache": ucache, } diff --git a/scripts/rocm/rocm_vars.py b/scripts/rocm/rocm_vars.py index ac1e720c6..235aef0d0 100644 --- a/scripts/rocm/rocm_vars.py +++ b/scripts/rocm/rocm_vars.py @@ -3,6 +3,7 @@ from typing import Dict, Any, List, Tuple # --- General MIOpen/rocBLAS variables (dropdown/textbox/checkbox) --- GENERAL_VARS: Dict[str, Dict[str, Any]] = { + # ── GEMM backend selector + companion toggles ────────────────────────── "MIOPEN_GEMM_ENFORCE_BACKEND": { "default": "1", "desc": "Enforce GEMM backend", @@ -10,6 +11,29 @@ GENERAL_VARS: Dict[str, Dict[str, Any]] = { "options": [("1 - rocBLAS", "1"), ("5 - hipBLASLt", "5")], "restart_required": False, }, + "PYTORCH_ROCM_USE_ROCBLAS": { + "default": "1", + "desc": "PyTorch ROCm: prioritise rocBLAS for linear algebra", + "widget": "dropdown", + "options": [("0 - Off", "0"), ("1 - On", "1")], + "restart_required": True, + }, + "PYTORCH_HIPBLASLT_DISABLE": { + "default": "1", + "desc": "Disable PyTorch hipBLASLt dispatcher", + "widget": "dropdown", + "options": [("0 - Allow hipBLASLt", "0"), ("1 - Disable hipBLASLt", "1")], + "restart_required": True, + }, + "ROCBLAS_USE_HIPBLASLT": { + "default": "0", + "desc": "rocBLAS: use hipBLASLt backend (0 = Tensile)", + "widget": "dropdown", + "options": [("0 - Tensile (rocBLAS)", "0"), ("1 - hipBLASLt", "1")], + "restart_required": True, + }, + + # ── MIOpen behavioural settings ──────────────────────────────────────── "MIOPEN_FIND_MODE": { "default": "2", "desc": "MIOpen Find Mode", @@ -31,6 +55,15 @@ GENERAL_VARS: Dict[str, Dict[str, Any]] = { "options": [("0 - Off", "0"), ("1 - On", "1")], "restart_required": True, }, + "MIOPEN_DEBUG_CONVOLUTION_DETERMINISTIC": { + "default": "0", + "desc": "Deterministic convolution (reproducible results, may be slower)", + "widget": "dropdown", + "options": [("0 - Off", "0"), ("1 - On", "1")], + "restart_required": False, + }, + + # ── Paths / sizes ────────────────────────────────────────────────────── "MIOPEN_SYSTEM_DB_PATH": { "default": "{VIRTUAL_ENV}\\Lib\\site-packages\\_rocm_sdk_devel\\bin\\", "desc": "MIOpen system DB path", @@ -38,6 +71,75 @@ GENERAL_VARS: Dict[str, Dict[str, Any]] = { "options": None, "restart_required": True, }, + "MIOPEN_CONVOLUTION_MAX_WORKSPACE": { + "default": "1073741824", + "desc": "MIOpen convolution max workspace (bytes; 1 GB default)", + "widget": "textbox", + "options": None, + "restart_required": False, + }, + "ROCBLAS_TENSILE_LIBPATH": { + "default": "{VIRTUAL_ENV}\\Lib\\site-packages\\_rocm_sdk_devel\\bin\\rocblas\\library", + "desc": "rocBLAS Tensile library path", + "widget": "textbox", + "options": None, + "restart_required": True, + }, + "ROCBLAS_DEVICE_MEMORY_SIZE": { + "default": "", + "desc": "rocBLAS workspace size in bytes (empty = dynamic)", + "widget": "textbox", + "options": None, + "restart_required": False, + }, + "PYTORCH_TUNABLEOP_CACHE_DIR": { + "default": "{ROOT}\\models\\tunable", + "desc": "TunableOp: kernel profile cache directory", + "widget": "textbox", + "options": None, + "restart_required": False, + }, + + # ── rocBLAS settings ─────────────────────────────────────────────────── + "ROCBLAS_STREAM_ORDER_ALLOC": { + "default": "1", + "desc": "rocBLAS stream-ordered memory allocation", + "widget": "dropdown", + "options": [("0 - Standard", "0"), ("1 - Stream-ordered", "1")], + "restart_required": False, + }, + "ROCBLAS_DEFAULT_ATOMICS_MODE": { + "default": "1", + "desc": "rocBLAS default atomics mode (1 = allow non-deterministic for performance)", + "widget": "dropdown", + "options": [("0 - Off (deterministic)", "0"), ("1 - On (performance)", "1")], + "restart_required": False, + }, + "PYTORCH_TUNABLEOP_ROCBLAS_ENABLED": { + "default": "1", + "desc": "TunableOp: wrap and optimise rocBLAS GEMM calls", + "widget": "dropdown", + "options": [("0 - Off", "0"), ("1 - On", "1")], + "restart_required": False, + }, + "PYTORCH_TUNABLEOP_TUNING": { + "default": "0", + "desc": "TunableOp: tuning mode (1 = benchmark; 0 = use saved CSV)", + "widget": "dropdown", + "options": [("0 - Use saved CSV", "0"), ("1 - Benchmark new shapes", "1")], + "restart_required": False, + }, + + # ── hipBLASLt settings ───────────────────────────────────────────────── + "PYTORCH_TUNABLEOP_HIPBLASLT_ENABLED": { + "default": "0", + "desc": "TunableOp: benchmark hipBLASLt kernels", + "widget": "dropdown", + "options": [("0 - Off", "0"), ("1 - On", "1")], + "restart_required": False, + }, + + # ── Logging: MIOpen → rocBLAS → hipBLASLt ───────────────────────────── "MIOPEN_LOG_LEVEL": { "default": "0", "desc": "MIOpen log verbosity level", @@ -66,13 +168,6 @@ GENERAL_VARS: Dict[str, Dict[str, Any]] = { "options": [("0 - Off", "0"), ("1 - Error", "1"), ("2 - Trace", "2"), ("3 - Hints", "3"), ("4 - Info", "4"), ("5 - API Trace", "5")], "restart_required": False, }, - "MIOPEN_DEBUG_CONVOLUTION_DETERMINISTIC": { - "default": "0", - "desc": "Deterministic convolution (reproducible results, may be slower)", - "widget": "dropdown", - "options": [("0 - Off", "0"), ("1 - On", "1")], - "restart_required": False, - }, } # --- Solver toggles (inference/FWD only, RDNA2/3/4 compatible) --- @@ -251,3 +346,13 @@ SOLVER_GROUPS: List[Tuple[str, List[str]]] = [ "MIOPEN_DEBUG_CK_DEFAULT_KERNELS", ]), ] + +# Variables that are relevant only when hipBLASLt is the active GEMM backend. +# These are visually greyed-out in the UI when rocBLAS (MIOPEN_GEMM_ENFORCE_BACKEND="1") is selected. +HIPBLASLT_VARS: set = { + "PYTORCH_HIPBLASLT_DISABLE", + "ROCBLAS_USE_HIPBLASLT", + "PYTORCH_TUNABLEOP_HIPBLASLT_ENABLED", + "HIPBLASLT_LOG_LEVEL", +} + diff --git a/scripts/rocm_ext.py b/scripts/rocm_ext.py index 4d89f7e42..e8ddf24ed 100644 --- a/scripts/rocm_ext.py +++ b/scripts/rocm_ext.py @@ -19,7 +19,7 @@ class ROCmScript(scripts_manager.Script): if not shared.cmd_opts.use_rocm and not installer.torch_info.get('type') == 'rocm': # skip ui creation if not rocm return [] - from scripts.rocm import rocm_mgr, rocm_vars # pylint: disable=no-name-in-module + from scripts.rocm import rocm_mgr, rocm_vars, rocm_profiles # pylint: disable=no-name-in-module config = rocm_mgr.load_config() var_names = [] @@ -59,11 +59,25 @@ class ROCmScript(scripts_manager.Script): row("path", udb.get("path", "")) for fname, finfo in udb.get("files", {}).items(): row(fname, finfo) + section("User cache (~/.miopen/cache)") + ucache = d.get("user_cache", {}) + row("path", ucache.get("path", "")) + for fname, sz in ucache.get("files", {}).items(): + row(fname, sz) return f"{''.join(rows)}
" + def _build_style(unavailable, hipblaslt_disabled=False): + rules = [] + for v in (unavailable or []): + rules.append(f"#rocm_var_{v.lower()} label {{ text-decoration: line-through; opacity: 0.5; }}") + if hipblaslt_disabled: + for v in rocm_vars.HIPBLASLT_VARS: + rules.append(f"#rocm_var_{v.lower()} {{ opacity: 0.45; pointer-events: none; }}") + return f"" if rules else "" + with gr.Accordion('ROCm: Advanced Config', open=False, elem_id='rocm_config'): with gr.Row(): - gr.HTML("

Advanced configuration for ROCm users.


Set your database and solver selections based on GPU profile or individually.


Enable cuDNN in Backend Settings to activate MIOpen.

") + gr.HTML("

Advanced configuration for ROCm users.


For best performance ensure that cudnn and torch tunable ops are set to default in Backend Settings.

") with gr.Row(): btn_info = gr.Button("Refresh Info", variant="primary", elem_id="rocm_btn_info", size="sm") btn_apply = gr.Button("Apply", variant="primary", elem_id="rocm_btn_apply", size="sm") @@ -74,7 +88,10 @@ class ROCmScript(scripts_manager.Script): btn_rdna2 = gr.Button("RDNA2 (RX 6000)", elem_id="rocm_btn_rdna2") btn_rdna3 = gr.Button("RDNA3 (RX 7000)", elem_id="rocm_btn_rdna3") btn_rdna4 = gr.Button("RDNA4 (RX 9000)", elem_id="rocm_btn_rdna4") - style_out = gr.HTML("") + _init_gemm = config.get("MIOPEN_GEMM_ENFORCE_BACKEND", "1") + _init_arch = config.get(rocm_mgr._ARCH_KEY, "") + _init_unavailable = rocm_profiles.UNAVAILABLE.get(_init_arch, set()) if _init_arch else set() + style_out = gr.HTML(_build_style(_init_unavailable, _init_gemm == "1")) info_out = gr.HTML(value=_info_html, elem_id="rocm_info_table") # General vars (dropdowns, textboxes, checkboxes) @@ -106,13 +123,46 @@ class ROCmScript(scripts_manager.Script): for name, comp in zip(var_names, components): meta = rocm_vars.ROCM_ENV_VARS[name] - if meta["widget"] == "dropdown": + if meta["widget"] == "dropdown" and name != "MIOPEN_GEMM_ENFORCE_BACKEND": comp.change(fn=lambda v, n=name: _autosave_field(n, v), inputs=[comp], outputs=[], show_progress='hidden') + _GEMM_COMPANIONS = { + "PYTORCH_ROCM_USE_ROCBLAS": {"1": "1", "5": "0"}, + "PYTORCH_HIPBLASLT_DISABLE": {"1": "1", "5": "0"}, + "ROCBLAS_USE_HIPBLASLT": {"1": "0", "5": "1"}, + "PYTORCH_TUNABLEOP_HIPBLASLT_ENABLED": {"1": "0", "5": "1"}, + } + + def gemm_changed(gemm_display_val): + stored = rocm_mgr._dropdown_stored(str(gemm_display_val), rocm_vars.ROCM_ENV_VARS["MIOPEN_GEMM_ENFORCE_BACKEND"]["options"]) + cfg = rocm_mgr.load_config().copy() + cfg["MIOPEN_GEMM_ENFORCE_BACKEND"] = stored + for var, vals in _GEMM_COMPANIONS.items(): + cfg[var] = vals.get(stored, cfg.get(var, "")) + rocm_mgr.save_config(cfg) + rocm_mgr.apply_env(cfg) + arch = cfg.get(rocm_mgr._ARCH_KEY, "") + unavailable = rocm_profiles.UNAVAILABLE.get(arch, set()) + result = [gr.update(value=_build_style(unavailable, stored == "1"))] + for pname in var_names: + if pname in _GEMM_COMPANIONS: + meta = rocm_vars.ROCM_ENV_VARS[pname] + val = _GEMM_COMPANIONS[pname].get(stored, cfg.get(pname, "")) + result.append(gr.update(value=rocm_mgr._dropdown_display(val, meta["options"]))) + else: + result.append(gr.update()) + return result + + gemm_comp = components[var_names.index("MIOPEN_GEMM_ENFORCE_BACKEND")] + gemm_comp.change(fn=gemm_changed, inputs=[gemm_comp], outputs=[style_out] + components, show_progress='hidden') + def apply_fn(*values): rocm_mgr.apply_all(var_names, list(values)) saved = rocm_mgr.load_config() - result = [gr.update(value="")] + arch = saved.get(rocm_mgr._ARCH_KEY, "") + unavailable = rocm_profiles.UNAVAILABLE.get(arch, set()) + gemm_val = saved.get("MIOPEN_GEMM_ENFORCE_BACKEND", "1") + result = [gr.update(value=_build_style(unavailable, gemm_val == "1"))] for name in var_names: meta = rocm_vars.ROCM_ENV_VARS[name] val = saved.get(name, meta["default"]) @@ -124,19 +174,13 @@ class ROCmScript(scripts_manager.Script): result.append(gr.update(value=rocm_mgr._expand_venv(val))) return result - def _build_style(unavailable): - if not unavailable: - return "" - rules = " ".join( - f"#rocm_var_{v.lower()} label {{ text-decoration: line-through; opacity: 0.5; }}" - for v in unavailable - ) - return f"" - def reset_fn(): rocm_mgr.reset_defaults() updated = rocm_mgr.load_config() - result = [gr.update(value="")] + arch = updated.get(rocm_mgr._ARCH_KEY, "") + unavailable = rocm_profiles.UNAVAILABLE.get(arch, set()) + gemm_val = updated.get("MIOPEN_GEMM_ENFORCE_BACKEND", "1") + result = [gr.update(value=_build_style(unavailable, gemm_val == "1"))] for name in var_names: meta = rocm_vars.ROCM_ENV_VARS[name] val = updated.get(name, meta["default"]) @@ -150,7 +194,9 @@ class ROCmScript(scripts_manager.Script): def clear_fn(): rocm_mgr.clear_env() - result = [gr.update(value="")] + cfg = rocm_mgr.load_config() + gemm_val = cfg.get("MIOPEN_GEMM_ENFORCE_BACKEND", "1") + result = [gr.update(value=_build_style(None, gemm_val == "1"))] for name in var_names: meta = rocm_vars.ROCM_ENV_VARS[name] if meta["widget"] == "checkbox": @@ -163,7 +209,8 @@ class ROCmScript(scripts_manager.Script): def delete_fn(): rocm_mgr.delete_config() - result = [gr.update(value="")] + gemm_default = rocm_vars.ROCM_ENV_VARS.get("MIOPEN_GEMM_ENFORCE_BACKEND", {}).get("default", "1") + result = [gr.update(value=_build_style(None, gemm_default == "1"))] for name in var_names: meta = rocm_vars.ROCM_ENV_VARS[name] if meta["widget"] == "checkbox": @@ -175,11 +222,11 @@ class ROCmScript(scripts_manager.Script): return result def profile_fn(arch): - from scripts.rocm import rocm_profiles # pylint: disable=no-name-in-module rocm_mgr.apply_profile(arch) updated = rocm_mgr.load_config() unavailable = rocm_profiles.UNAVAILABLE.get(arch, set()) - result = [gr.update(value=_build_style(unavailable))] + gemm_val = updated.get("MIOPEN_GEMM_ENFORCE_BACKEND", "1") + result = [gr.update(value=_build_style(unavailable, gemm_val == "1"))] for pname in var_names: meta = rocm_vars.ROCM_ENV_VARS[pname] val = updated.get(pname, meta["default"])