mirror of https://github.com/vladmandic/automatic
ROCm and Zluda don't fallback to CPU and clenup strings
parent
579b1f3175
commit
71fde8a897
47
installer.py
47
installer.py
|
|
@ -677,18 +677,9 @@ def install_rocm_zluda():
|
|||
if args.skip_all or args.skip_requirements:
|
||||
return torch_command
|
||||
from modules import rocm
|
||||
if rocm.err is not None:
|
||||
log.warning(f'ROCm: error checking ROCm toolkit: {rocm.err}')
|
||||
log.info('Using CPU-only torch')
|
||||
return os.environ.get('TORCH_COMMAND', 'torch torchvision')
|
||||
if not rocm.is_installed:
|
||||
log.warning('ROCm: could not find ROCm toolkit installed')
|
||||
log.info('Using CPU-only torch')
|
||||
return os.environ.get('TORCH_COMMAND', 'torch torchvision')
|
||||
|
||||
log.info('ROCm: AMD toolkit detected')
|
||||
# if not is_windows:
|
||||
# os.environ.setdefault('TENSORFLOW_PACKAGE', 'tensorflow-rocm')
|
||||
#os.environ.setdefault('TENSORFLOW_PACKAGE', 'tensorflow')
|
||||
|
||||
device = None
|
||||
try:
|
||||
|
|
@ -701,8 +692,6 @@ def install_rocm_zluda():
|
|||
index = 0
|
||||
for idx, gpu in enumerate(amd_gpus):
|
||||
index = idx
|
||||
# if gpu.name.startswith('gfx11') and os.environ.get('TENSORFLOW_PACKAGE') == 'tensorflow-rocm': # do not use tensorflow-rocm for navi 3x
|
||||
# os.environ['TENSORFLOW_PACKAGE'] = 'tensorflow==2.13.0'
|
||||
if not gpu.is_apu:
|
||||
# although apu was found, there can be a dedicated card. do not break loop.
|
||||
# if no dedicated card was found, apu will be used.
|
||||
|
|
@ -722,22 +711,23 @@ def install_rocm_zluda():
|
|||
log.info(msg)
|
||||
|
||||
if sys.platform == "win32":
|
||||
#check_python(supported_minors=[10, 11, 12, 13], reason='ZLUDA backend requires a Python version between 3.10 and 3.13')
|
||||
|
||||
if args.use_rocm: # TODO install: switch to pytorch source when it becomes available
|
||||
if isinstance(rocm.environment, rocm.PythonPackageEnvironment): # TheRock
|
||||
if device is not None and isinstance(rocm.environment, rocm.PythonPackageEnvironment): # TheRock
|
||||
check_python(supported_minors=[11, 12, 13], reason='ROCm backend requires a Python version between 3.11 and 3.13')
|
||||
torch_command = os.environ.get('TORCH_COMMAND', f'torch torchvision --index-url https://rocm.nightlies.amd.com/v2-staging/{rocm.get_distribution(device)}')
|
||||
else:
|
||||
check_python(supported_minors=[12], reason='AMD Windows preview requires a Python version 3.12')
|
||||
check_python(supported_minors=[12], reason='ROCm Windows preview requires Python version 3.12')
|
||||
torch_command = os.environ.get('TORCH_COMMAND', '--no-cache-dir https://repo.radeon.com/rocm/windows/rocm-rel-6.4.4/torch-2.8.0a0%2Bgitfc14c65-cp312-cp312-win_amd64.whl https://repo.radeon.com/rocm/windows/rocm-rel-6.4.4/torchvision-0.24.0a0%2Bc85f008-cp312-cp312-win_amd64.whl')
|
||||
else:
|
||||
#check_python(supported_minors=[10, 11, 12, 13], reason='ZLUDA backend requires a Python version between 3.10 and 3.13')
|
||||
torch_command = os.environ.get('TORCH_COMMAND', 'torch==2.7.1+cu118 torchvision==0.22.1+cu118 --index-url https://download.pytorch.org/whl/cu118')
|
||||
|
||||
if args.device_id is not None:
|
||||
if os.environ.get('HIP_VISIBLE_DEVICES', None) is not None:
|
||||
log.warning('Setting HIP_VISIBLE_DEVICES and --device-id at the same time may be mistake.')
|
||||
os.environ['HIP_VISIBLE_DEVICES'] = args.device_id
|
||||
del args.device_id
|
||||
|
||||
error = None
|
||||
from modules import zluda_installer
|
||||
try:
|
||||
if args.reinstall or zluda_installer.is_reinstall_needed():
|
||||
|
|
@ -745,19 +735,12 @@ def install_rocm_zluda():
|
|||
zluda_installer.install()
|
||||
zluda_installer.set_default_agent(device)
|
||||
except Exception as e:
|
||||
error = e
|
||||
log.warning(f'Failed to install ZLUDA: {e}')
|
||||
|
||||
if error is None:
|
||||
try:
|
||||
zluda_installer.load()
|
||||
torch_command = os.environ.get('TORCH_COMMAND', 'torch==2.7.1+cu118 torchvision==0.22.1+cu118 --index-url https://download.pytorch.org/whl/cu118')
|
||||
except Exception as e:
|
||||
error = e
|
||||
log.warning(f'Failed to load ZLUDA: {e}')
|
||||
if error is not None:
|
||||
log.info('Using CPU-only torch')
|
||||
torch_command = os.environ.get('TORCH_COMMAND', 'torch torchvision')
|
||||
try:
|
||||
zluda_installer.load()
|
||||
except Exception as e:
|
||||
log.warning(f'Failed to load ZLUDA: {e}')
|
||||
else:
|
||||
#check_python(supported_minors=[10, 11, 12, 13], reason='ROCm backend requires a Python version between 3.10 and 3.13')
|
||||
|
||||
|
|
@ -793,7 +776,7 @@ def install_rocm_zluda():
|
|||
log.info(f'ROCm: HSA_OVERRIDE_GFX_VERSION auto config skipped: device={device.name if device is not None else None} version={os.environ.get("HSA_OVERRIDE_GFX_VERSION", None)}')
|
||||
else:
|
||||
gfx_ver = device.get_gfx_version()
|
||||
if gfx_ver is not None:
|
||||
if gfx_ver is not None and device.name.removeprefix("gfx") != gfx_ver.replace(".", ""):
|
||||
os.environ.setdefault('HSA_OVERRIDE_GFX_VERSION', gfx_ver)
|
||||
log.info(f'ROCm: HSA_OVERRIDE_GFX_VERSION config overridden: device={device.name} version={os.environ.get("HSA_OVERRIDE_GFX_VERSION", None)}')
|
||||
|
||||
|
|
@ -936,8 +919,8 @@ def check_torch():
|
|||
if torch_command != '':
|
||||
pass
|
||||
else:
|
||||
is_cuda_available = allow_cuda and (shutil.which('nvidia-smi') is not None or args.use_xformers or os.path.exists(os.path.join(os.environ.get('SystemRoot') or r'C:\Windows', 'System32', 'nvidia-smi.exe')))
|
||||
is_rocm_available = allow_rocm and rocm.is_installed
|
||||
is_cuda_available = allow_cuda and (args.use_cuda or shutil.which('nvidia-smi') is not None or args.use_xformers or os.path.exists(os.path.join(os.environ.get('SystemRoot') or r'C:\Windows', 'System32', 'nvidia-smi.exe')))
|
||||
is_rocm_available = allow_rocm and (args.use_rocm or args.use_zluda or rocm.is_installed)
|
||||
is_ipex_available = allow_ipex and (args.use_ipex or shutil.which('sycl-ls') is not None or shutil.which('sycl-ls.exe') is not None or os.environ.get('ONEAPI_ROOT') is not None or os.path.exists('/opt/intel/oneapi') or os.path.exists("C:/Program Files (x86)/Intel/oneAPI") or os.path.exists("C:/oneAPI"))
|
||||
|
||||
if is_cuda_available and args.use_cuda: # prioritize cuda
|
||||
|
|
@ -965,8 +948,6 @@ def check_torch():
|
|||
install(torch_command, 'torch torchvision')
|
||||
install('onnxruntime-directml', 'onnxruntime-directml', ignore=True)
|
||||
else:
|
||||
if args.use_zluda:
|
||||
log.warning("ZLUDA failed to initialize: no HIP SDK found")
|
||||
log.warning('Torch: CPU-only version installed')
|
||||
torch_command = os.environ.get('TORCH_COMMAND', 'torch torchvision')
|
||||
if 'torch' in torch_command and not args.version:
|
||||
|
|
|
|||
|
|
@ -165,7 +165,7 @@ def ipex_init(): # pylint: disable=too-many-statements
|
|||
pass
|
||||
|
||||
# Memory:
|
||||
if 'linux' in sys.platform and "WSL2" in os.popen("uname -a").read():
|
||||
if "linux" in sys.platform and "WSL2" in os.popen("uname -a").read():
|
||||
torch.xpu.empty_cache = lambda: None
|
||||
torch.cuda.empty_cache = torch.xpu.empty_cache
|
||||
|
||||
|
|
|
|||
|
|
@ -8,8 +8,8 @@ from functools import cache, wraps
|
|||
|
||||
# ARC GPUs can't allocate more than 4GB to a single block so we slice the attention layers
|
||||
|
||||
dynamic_attention_slice_rate = float(os.environ.get('IPEX_SDPA_SLICE_TRIGGER_RATE', 1))
|
||||
dynamic_attention_trigger_rate = float(os.environ.get('IPEX_ATTENTION_SLICE_RATE', 0.5))
|
||||
dynamic_attention_slice_rate = float(os.environ.get("IPEX_SDPA_SLICE_TRIGGER_RATE", "1"))
|
||||
dynamic_attention_trigger_rate = float(os.environ.get("IPEX_ATTENTION_SLICE_RATE", "0.5"))
|
||||
|
||||
# Find something divisible with the input_tokens
|
||||
@cache
|
||||
|
|
|
|||
|
|
@ -80,8 +80,8 @@ def torch_get_autocast_dtype(device_type=None):
|
|||
# IPEX 2.5 and above has partial support but doesn't really work most of the time.
|
||||
original_interpolate = torch.nn.functional.interpolate
|
||||
@wraps(torch.nn.functional.interpolate)
|
||||
def interpolate(tensor, size=None, scale_factor=None, mode='nearest', align_corners=None, recompute_scale_factor=None, antialias=False): # pylint: disable=too-many-arguments
|
||||
if mode in {'bicubic', 'bilinear'}:
|
||||
def interpolate(tensor, size=None, scale_factor=None, mode="nearest", align_corners=None, recompute_scale_factor=None, antialias=False): # pylint: disable=too-many-arguments
|
||||
if mode in {"bicubic", "bilinear"}:
|
||||
return_device = tensor.device
|
||||
return_dtype = tensor.dtype
|
||||
return original_interpolate(tensor.to("cpu", dtype=torch.float32), size=size, scale_factor=scale_factor, mode=mode,
|
||||
|
|
@ -94,8 +94,8 @@ def interpolate(tensor, size=None, scale_factor=None, mode='nearest', align_corn
|
|||
# SwinIR BF16:
|
||||
original_functional_pad = torch.nn.functional.pad
|
||||
@wraps(torch.nn.functional.pad)
|
||||
def functional_pad(input, pad, mode='constant', value=None):
|
||||
if mode == 'reflect' and input.dtype == torch.bfloat16:
|
||||
def functional_pad(input, pad, mode="constant", value=None):
|
||||
if mode == "reflect" and input.dtype == torch.bfloat16:
|
||||
return original_functional_pad(input.to(torch.float32), pad, mode=mode, value=value).to(dtype=torch.bfloat16)
|
||||
else:
|
||||
return original_functional_pad(input, pad, mode=mode, value=value)
|
||||
|
|
@ -365,13 +365,13 @@ def ipex_hijacks():
|
|||
except Exception:
|
||||
pass
|
||||
|
||||
if os.environ.get('IPEX_FORCE_ATTENTION_SLICE', '0') == '0':
|
||||
if os.environ.get("IPEX_FORCE_ATTENTION_SLICE", "0") == "0":
|
||||
if torch_version[0] > 2 or (torch_version[0] == 2 and torch_version[1] >= 7):
|
||||
use_dynamic_attention = False # torch 2.7 has flash atten support
|
||||
else:
|
||||
use_dynamic_attention = True
|
||||
else:
|
||||
use_dynamic_attention = bool(os.environ.get('IPEX_FORCE_ATTENTION_SLICE', '0') == '1')
|
||||
use_dynamic_attention = bool(os.environ.get("IPEX_FORCE_ATTENTION_SLICE", "0") == "1")
|
||||
|
||||
if use_dynamic_attention:
|
||||
from .attention import dynamic_scaled_dot_product_attention
|
||||
|
|
|
|||
|
|
@ -404,7 +404,7 @@ class SDNQQuantizer(DiffusersQuantizer):
|
|||
def _process_model_after_weight_loading(self, model, **kwargs): # pylint: disable=unused-argument
|
||||
if shared.opts.diffusers_offload_mode != "none":
|
||||
model = model.to(devices.cpu)
|
||||
devices.torch_gc(force=True, reason='sdnq')
|
||||
devices.torch_gc(force=True, reason="sdnq")
|
||||
return model
|
||||
|
||||
def get_accelerator_warm_up_factor(self):
|
||||
|
|
@ -440,7 +440,7 @@ class SDNQQuantizer(DiffusersQuantizer):
|
|||
"""
|
||||
return missing_keys
|
||||
|
||||
def update_state_dict_with_metadata(self, state_dict: dict, metadata: dict) -> dict: # pylint: disable=unused-argument
|
||||
def update_state_dict_with_metadata(self, state_dict: dict, metadata: dict) -> dict:
|
||||
"""
|
||||
needed for transformers compatibilty, no-op function
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ if hasattr(torch, "float8_e5m2fnuz"):
|
|||
dtype_dict["float8_e5m2fnuz"] = {"min": -57344, "max": 57344, "num_bits": 8, "target_dtype": "fp8", "torch_dtype": torch.float8_e5m2fnuz, "storage_dtype": torch.float8_e5m2fnuz, "is_unsigned": False, "is_integer": False}
|
||||
|
||||
use_torch_compile = shared.opts.sdnq_dequantize_compile # this setting requires a full restart of the webui to apply
|
||||
use_tensorwise_fp8_matmul = os.environ.get('SDNQ_USE_TENSORWISE_FP8_MATMUL', "1").lower() not in {"0", "false", "no"} # row-wise FP8 only exist on H100 hardware, sdnq will use software row-wise with tensorwise hardware with this setting
|
||||
use_tensorwise_fp8_matmul = os.environ.get("SDNQ_USE_TENSORWISE_FP8_MATMUL", "1").lower() not in {"0", "false", "no"} # row-wise FP8 only exist on H100 hardware, sdnq will use software row-wise with tensorwise hardware with this setting
|
||||
|
||||
linear_types = ("Linear",)
|
||||
conv_types = ("Conv1d", "Conv2d", "Conv3d")
|
||||
|
|
|
|||
Loading…
Reference in New Issue