diff --git a/installer.py b/installer.py
index 1bbb03719..22172970b 100644
--- a/installer.py
+++ b/installer.py
@@ -677,18 +677,9 @@ def install_rocm_zluda():
     if args.skip_all or args.skip_requirements:
         return torch_command
     from modules import rocm
-    if rocm.err is not None:
-        log.warning(f'ROCm: error checking ROCm toolkit: {rocm.err}')
-        log.info('Using CPU-only torch')
-        return os.environ.get('TORCH_COMMAND', 'torch torchvision')
-    if not rocm.is_installed:
-        log.warning('ROCm: could not find ROCm toolkit installed')
-        log.info('Using CPU-only torch')
-        return os.environ.get('TORCH_COMMAND', 'torch torchvision')
 
     log.info('ROCm: AMD toolkit detected')
-    # if not is_windows:
-    #    os.environ.setdefault('TENSORFLOW_PACKAGE', 'tensorflow-rocm')
+    #os.environ.setdefault('TENSORFLOW_PACKAGE', 'tensorflow')
 
     device = None
     try:
@@ -701,8 +692,6 @@ def install_rocm_zluda():
                 index = 0
                 for idx, gpu in enumerate(amd_gpus):
                     index = idx
-                    # if gpu.name.startswith('gfx11') and os.environ.get('TENSORFLOW_PACKAGE') == 'tensorflow-rocm': # do not use tensorflow-rocm for navi 3x
-                    #    os.environ['TENSORFLOW_PACKAGE'] = 'tensorflow==2.13.0'
                     if not gpu.is_apu:
                         # although apu was found, there can be a dedicated card. do not break loop.
                         # if no dedicated card was found, apu will be used.
@@ -722,22 +711,23 @@ def install_rocm_zluda():
     log.info(msg)
 
     if sys.platform == "win32":
-        #check_python(supported_minors=[10, 11, 12, 13], reason='ZLUDA backend requires a Python version between 3.10 and 3.13')
-
         if args.use_rocm: # TODO install: switch to pytorch source when it becomes available
-            if isinstance(rocm.environment, rocm.PythonPackageEnvironment): # TheRock
+            if device is not None and isinstance(rocm.environment, rocm.PythonPackageEnvironment): # TheRock
+                check_python(supported_minors=[11, 12, 13], reason='ROCm backend requires a Python version between 3.11 and 3.13')
                 torch_command = os.environ.get('TORCH_COMMAND', f'torch torchvision --index-url https://rocm.nightlies.amd.com/v2-staging/{rocm.get_distribution(device)}')
             else:
-                check_python(supported_minors=[12], reason='AMD Windows preview requires a Python version 3.12')
+                check_python(supported_minors=[12], reason='ROCm Windows preview requires Python version 3.12')
                 torch_command = os.environ.get('TORCH_COMMAND', '--no-cache-dir https://repo.radeon.com/rocm/windows/rocm-rel-6.4.4/torch-2.8.0a0%2Bgitfc14c65-cp312-cp312-win_amd64.whl https://repo.radeon.com/rocm/windows/rocm-rel-6.4.4/torchvision-0.24.0a0%2Bc85f008-cp312-cp312-win_amd64.whl')
         else:
+            #check_python(supported_minors=[10, 11, 12, 13], reason='ZLUDA backend requires a Python version between 3.10 and 3.13')
+            torch_command = os.environ.get('TORCH_COMMAND', 'torch==2.7.1+cu118 torchvision==0.22.1+cu118 --index-url https://download.pytorch.org/whl/cu118')
+
             if args.device_id is not None:
                 if os.environ.get('HIP_VISIBLE_DEVICES', None) is not None:
                     log.warning('Setting HIP_VISIBLE_DEVICES and --device-id at the same time may be mistake.')
                 os.environ['HIP_VISIBLE_DEVICES'] = args.device_id
                 del args.device_id
 
-            error = None
             from modules import zluda_installer
             try:
                 if args.reinstall or zluda_installer.is_reinstall_needed():
@@ -745,19 +735,12 @@ def install_rocm_zluda():
                 zluda_installer.install()
                 zluda_installer.set_default_agent(device)
             except Exception as e:
-                error = e
                 log.warning(f'Failed to install ZLUDA: {e}')
 
-            if error is None:
-                try:
-                    zluda_installer.load()
-                    torch_command = os.environ.get('TORCH_COMMAND', 'torch==2.7.1+cu118 torchvision==0.22.1+cu118 --index-url https://download.pytorch.org/whl/cu118')
-                except Exception as e:
-                    error = e
-                    log.warning(f'Failed to load ZLUDA: {e}')
-            if error is not None:
-                log.info('Using CPU-only torch')
-                torch_command = os.environ.get('TORCH_COMMAND', 'torch torchvision')
+            try:
+                zluda_installer.load()
+            except Exception as e:
+                log.warning(f'Failed to load ZLUDA: {e}')
     else:
         #check_python(supported_minors=[10, 11, 12, 13], reason='ROCm backend requires a Python version between 3.10 and 3.13')
 
@@ -793,7 +776,7 @@ def install_rocm_zluda():
         log.info(f'ROCm: HSA_OVERRIDE_GFX_VERSION auto config skipped: device={device.name if device is not None else None} version={os.environ.get("HSA_OVERRIDE_GFX_VERSION", None)}')
     else:
         gfx_ver = device.get_gfx_version()
-        if gfx_ver is not None:
+        if gfx_ver is not None and device.name.removeprefix("gfx") != gfx_ver.replace(".", ""):
             os.environ.setdefault('HSA_OVERRIDE_GFX_VERSION', gfx_ver)
             log.info(f'ROCm: HSA_OVERRIDE_GFX_VERSION config overridden: device={device.name} version={os.environ.get("HSA_OVERRIDE_GFX_VERSION", None)}')
 
@@ -936,8 +919,8 @@ def check_torch():
     if torch_command != '':
         pass
     else:
-        is_cuda_available = allow_cuda and (shutil.which('nvidia-smi') is not None or args.use_xformers or os.path.exists(os.path.join(os.environ.get('SystemRoot') or r'C:\Windows', 'System32', 'nvidia-smi.exe')))
-        is_rocm_available = allow_rocm and rocm.is_installed
+        is_cuda_available = allow_cuda and (args.use_cuda or shutil.which('nvidia-smi') is not None or args.use_xformers or os.path.exists(os.path.join(os.environ.get('SystemRoot') or r'C:\Windows', 'System32', 'nvidia-smi.exe')))
+        is_rocm_available = allow_rocm and (args.use_rocm or args.use_zluda or rocm.is_installed)
         is_ipex_available = allow_ipex and (args.use_ipex or shutil.which('sycl-ls') is not None or shutil.which('sycl-ls.exe') is not None or os.environ.get('ONEAPI_ROOT') is not None or os.path.exists('/opt/intel/oneapi') or os.path.exists("C:/Program Files (x86)/Intel/oneAPI") or os.path.exists("C:/oneAPI"))
 
         if is_cuda_available and args.use_cuda: # prioritize cuda
@@ -965,8 +948,6 @@ def check_torch():
                     install(torch_command, 'torch torchvision')
                 install('onnxruntime-directml', 'onnxruntime-directml', ignore=True)
             else:
-                if args.use_zluda:
-                    log.warning("ZLUDA failed to initialize: no HIP SDK found")
                 log.warning('Torch: CPU-only version installed')
                 torch_command = os.environ.get('TORCH_COMMAND', 'torch torchvision')
     if 'torch' in torch_command and not args.version:
diff --git a/modules/intel/ipex/__init__.py b/modules/intel/ipex/__init__.py
index 93f94d280..3d49beca7 100644
--- a/modules/intel/ipex/__init__.py
+++ b/modules/intel/ipex/__init__.py
@@ -165,7 +165,7 @@ def ipex_init(): # pylint: disable=too-many-statements
                         pass
 
             # Memory:
-            if 'linux' in sys.platform and "WSL2" in os.popen("uname -a").read():
+            if "linux" in sys.platform and "WSL2" in os.popen("uname -a").read():
                 torch.xpu.empty_cache = lambda: None
             torch.cuda.empty_cache = torch.xpu.empty_cache
 
diff --git a/modules/intel/ipex/attention.py b/modules/intel/ipex/attention.py
index c3e8ef8ed..aacccca75 100644
--- a/modules/intel/ipex/attention.py
+++ b/modules/intel/ipex/attention.py
@@ -8,8 +8,8 @@ from functools import cache, wraps
 
 # ARC GPUs can't allocate more than 4GB to a single block so we slice the attention layers
 
-dynamic_attention_slice_rate = float(os.environ.get('IPEX_SDPA_SLICE_TRIGGER_RATE', 1))
-dynamic_attention_trigger_rate = float(os.environ.get('IPEX_ATTENTION_SLICE_RATE', 0.5))
+dynamic_attention_slice_rate = float(os.environ.get("IPEX_SDPA_SLICE_TRIGGER_RATE", "1"))
+dynamic_attention_trigger_rate = float(os.environ.get("IPEX_ATTENTION_SLICE_RATE", "0.5"))
 
 # Find something divisible with the input_tokens
 @cache
diff --git a/modules/intel/ipex/hijacks.py b/modules/intel/ipex/hijacks.py
index c9be1f789..8e8961476 100644
--- a/modules/intel/ipex/hijacks.py
+++ b/modules/intel/ipex/hijacks.py
@@ -80,8 +80,8 @@ def torch_get_autocast_dtype(device_type=None):
 # IPEX 2.5 and above has partial support but doesn't really work most of the time.
 original_interpolate = torch.nn.functional.interpolate
 @wraps(torch.nn.functional.interpolate)
-def interpolate(tensor, size=None, scale_factor=None, mode='nearest', align_corners=None, recompute_scale_factor=None, antialias=False): # pylint: disable=too-many-arguments
-    if mode in {'bicubic', 'bilinear'}:
+def interpolate(tensor, size=None, scale_factor=None, mode="nearest", align_corners=None, recompute_scale_factor=None, antialias=False): # pylint: disable=too-many-arguments
+    if mode in {"bicubic", "bilinear"}:
         return_device = tensor.device
         return_dtype = tensor.dtype
         return original_interpolate(tensor.to("cpu", dtype=torch.float32), size=size, scale_factor=scale_factor, mode=mode,
@@ -94,8 +94,8 @@ def interpolate(tensor, size=None, scale_factor=None, mode='nearest', align_corn
 # SwinIR BF16:
 original_functional_pad = torch.nn.functional.pad
 @wraps(torch.nn.functional.pad)
-def functional_pad(input, pad, mode='constant', value=None):
-    if mode == 'reflect' and input.dtype == torch.bfloat16:
+def functional_pad(input, pad, mode="constant", value=None):
+    if mode == "reflect" and input.dtype == torch.bfloat16:
         return original_functional_pad(input.to(torch.float32), pad, mode=mode, value=value).to(dtype=torch.bfloat16)
     else:
         return original_functional_pad(input, pad, mode=mode, value=value)
@@ -365,13 +365,13 @@ def ipex_hijacks():
     except Exception:
         pass
 
-    if os.environ.get('IPEX_FORCE_ATTENTION_SLICE', '0') == '0':
+    if os.environ.get("IPEX_FORCE_ATTENTION_SLICE", "0") == "0":
         if torch_version[0] > 2 or (torch_version[0] == 2 and torch_version[1] >= 7):
             use_dynamic_attention = False # torch 2.7 has flash atten support
         else:
             use_dynamic_attention = True
     else:
-        use_dynamic_attention = bool(os.environ.get('IPEX_FORCE_ATTENTION_SLICE', '0') == '1')
+        use_dynamic_attention = bool(os.environ.get("IPEX_FORCE_ATTENTION_SLICE", "0") == "1")
 
     if use_dynamic_attention:
         from .attention import dynamic_scaled_dot_product_attention
diff --git a/modules/sdnq/__init__.py b/modules/sdnq/__init__.py
index 6ba49cbb1..6b8e7c409 100644
--- a/modules/sdnq/__init__.py
+++ b/modules/sdnq/__init__.py
@@ -404,7 +404,7 @@ class SDNQQuantizer(DiffusersQuantizer):
     def _process_model_after_weight_loading(self, model, **kwargs): # pylint: disable=unused-argument
         if shared.opts.diffusers_offload_mode != "none":
             model = model.to(devices.cpu)
-        devices.torch_gc(force=True, reason='sdnq')
+        devices.torch_gc(force=True, reason="sdnq")
         return model
 
     def get_accelerator_warm_up_factor(self):
@@ -440,7 +440,7 @@ class SDNQQuantizer(DiffusersQuantizer):
         """
         return missing_keys
 
-    def update_state_dict_with_metadata(self, state_dict: dict, metadata: dict) -> dict: # pylint: disable=unused-argument
+    def update_state_dict_with_metadata(self, state_dict: dict, metadata: dict) -> dict:
         """
         needed for transformers compatibilty, no-op function
         """
diff --git a/modules/sdnq/common.py b/modules/sdnq/common.py
index e6ed21d4d..ecd06a653 100644
--- a/modules/sdnq/common.py
+++ b/modules/sdnq/common.py
@@ -34,7 +34,7 @@ if hasattr(torch, "float8_e5m2fnuz"):
     dtype_dict["float8_e5m2fnuz"] = {"min": -57344, "max": 57344, "num_bits": 8, "target_dtype": "fp8", "torch_dtype": torch.float8_e5m2fnuz, "storage_dtype": torch.float8_e5m2fnuz, "is_unsigned": False, "is_integer": False}
 
 use_torch_compile = shared.opts.sdnq_dequantize_compile # this setting requires a full restart of the webui to apply
-use_tensorwise_fp8_matmul = os.environ.get('SDNQ_USE_TENSORWISE_FP8_MATMUL', "1").lower() not in {"0", "false", "no"} # row-wise FP8 only exist on H100 hardware, sdnq will use software row-wise with tensorwise hardware with this setting
+use_tensorwise_fp8_matmul = os.environ.get("SDNQ_USE_TENSORWISE_FP8_MATMUL", "1").lower() not in {"0", "false", "no"} # row-wise FP8 only exist on H100 hardware, sdnq will use software row-wise with tensorwise hardware with this setting
 
 linear_types = ("Linear",)
 conv_types = ("Conv1d", "Conv2d", "Conv3d")