diff --git a/CHANGELOG.md b/CHANGELOG.md
index a175bfdb8..7deaed7a1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,25 @@
# Change Log for SD.Next
+## Update for 2025-10-18
+
+- **Models**
+ [Kandinsky 5 Lite](https://huggingface.co/ai-forever/Kandinsky-5.0-T2V-Lite-sft-5s-Diffusers') in *SFT, CFG-distilled and Steps-distilled* variants
+ first model in Kandinsky5 series is T2V model optimized for 5sec videos and uses Qwen2.5 text encoder
+- **Fixes**
+ - ROCm-on-Windows additional checks
+ - SDNQ-SVD fallback on incompatible layers
+ - Huggingface model download
+ - Video implement dynamic and manual sampler shift
+ - Fix interrupt batch processing
+ - Delay import of control processors until used
+ - Fix tiny VAE with batched results
+ - Fix CFG scale not added to metadata and set valid range to >=1.0
+- **Other**
+ - Optimized Video tab layout
+ - Video enable VAE slicing and framewise decoding when possible
+ - Detect and log `flash-attn` and `sageattention` if installed
+ - Remove unused UI settings
+
## Update for 2025-10-17
### Highlights for 2025-10-17
@@ -15,7 +35,11 @@ Highlight are:
- **Quantization**:
new **SVD**-style quantization using SDNQ offers almost zero-loss even with **4bit** quantization
and now you can also test your favorite quantization on-the-fly and then save/load model for future use
-- Other: support for **Huggingface** mirrors, changes to installer to prevent unwanted `torch-cpu` operations, improved previews, etc.
+- Other: support for **Huggingface** mirrors, changes to installer to prevent unwanted `torch-cpu` operations, improved VAE previews, etc.
+
+
+
+[ReadMe](https://github.com/vladmandic/automatic/blob/master/README.md) | [ChangeLog](https://github.com/vladmandic/automatic/blob/master/CHANGELOG.md) | [Docs](https://vladmandic.github.io/sdnext-docs/) | [WiKi](https://github.com/vladmandic/automatic/wiki) | [Discord](https://discord.com/invite/sd-next-federal-batch-inspectors-1101998836328697867) | [Sponsor](https://github.com/sponsors/vladmandic)
### Details for 2025-10-17
diff --git a/TODO.md b/TODO.md
index 5bdc45124..56ece93cd 100644
--- a/TODO.md
+++ b/TODO.md
@@ -4,6 +4,7 @@ Main ToDo list can be found at [GitHub projects](https://github.com/users/vladma
## Future Candidates
+- Transformers unified cache handler
- Remote TE
- [Canvas](https://konvajs.org/)
- Refactor: [Modular pipelines and guiders](https://github.com/huggingface/diffusers/issues/11915)
@@ -23,10 +24,8 @@ Main ToDo list can be found at [GitHub projects](https://github.com/users/vladma
- [SmoothCache](https://github.com/huggingface/diffusers/issues/11135)
- [MagCache](https://github.com/lllyasviel/FramePack/pull/673/files)
- [Dream0 guidance](https://huggingface.co/ByteDance/DreamO)
-- [SUPIR upscaler](https://github.com/Fanghua-Yu/SUPIR)
- [ByteDance OneReward](https://github.com/bytedance/OneReward)
- [ByteDance USO](https://github.com/bytedance/USO)
-- Remove: `Agent Scheduler`
- Remove: `CodeFormer`
- Remove: `GFPGAN`
- ModernUI: Lite vs Expert mode
diff --git a/extensions-builtin/sdnext-modernui b/extensions-builtin/sdnext-modernui
index db79b18f6..8f6427aa0 160000
--- a/extensions-builtin/sdnext-modernui
+++ b/extensions-builtin/sdnext-modernui
@@ -1 +1 @@
-Subproject commit db79b18f6f3f5a247e710b507d10cb39b01cc371
+Subproject commit 8f6427aa037b654ae664a0197c794e48fdbbc648
diff --git a/installer.py b/installer.py
index 835478690..71ec4cbe0 100644
--- a/installer.py
+++ b/installer.py
@@ -608,7 +608,7 @@ def check_diffusers():
if args.skip_git:
install('diffusers')
return
- sha = 'af769881d37fe916afef2c47279f66c79f5f2714' # diffusers commit hash
+ sha = '23ebbb4bc81a17ebea17cb7cb94f301199e49a7f' # diffusers commit hash
# if args.use_rocm or args.use_zluda or args.use_directml:
# sha = '043ab2520f6a19fce78e6e060a68dbc947edb9f9' # lock diffusers versions for now
pkg = pkg_resources.working_set.by_key.get('diffusers', None)
@@ -682,11 +682,8 @@ def install_rocm_zluda():
amd_gpus = []
try:
- if sys.platform == "win32" and not rocm.is_installed:
- amd_gpus = rocm.driver_get_agents()
- else:
- amd_gpus = rocm.get_agents()
- log.info('ROCm: AMD toolkit detected')
+ amd_gpus = rocm.get_agents()
+ log.info('ROCm: AMD toolkit detected')
except Exception as e:
log.warning(f'ROCm agent enumerator failed: {e}')
@@ -712,10 +709,13 @@ def install_rocm_zluda():
if device_id < len(amd_gpus):
device = amd_gpus[device_id]
- if sys.platform == "win32" and args.use_rocm and not rocm.is_installed:
+ if sys.platform == "win32" and args.use_rocm and not rocm.is_installed and device is not None:
check_python(supported_minors=[11, 12, 13], reason='ROCm backend requires a Python version between 3.11 and 3.13')
- install(f"rocm rocm-sdk-core --index-url https://rocm.nightlies.amd.com/v2-staging/{device.therock}")
- rocm.refresh()
+ if device.therock is None:
+ log.warning('No supported ROCm agent was found. Skipping ROCm package installation.')
+ else:
+ install(f"rocm rocm-sdk-core --index-url https://rocm.nightlies.amd.com/v2-staging/{device.therock}")
+ rocm.refresh()
msg = f'ROCm: version={rocm.version}'
if device is not None:
@@ -724,7 +724,9 @@ def install_rocm_zluda():
if sys.platform == "win32":
if args.use_rocm: # TODO install: switch to pytorch source when it becomes available
- if device is not None and isinstance(rocm.environment, rocm.PythonPackageEnvironment): # TheRock
+ if device is None:
+ log.warning('No ROCm agent was found. Please make sure that graphics driver is installed and up to date.')
+ if isinstance(rocm.environment, rocm.PythonPackageEnvironment):
check_python(supported_minors=[11, 12, 13], reason='ROCm backend requires a Python version between 3.11 and 3.13')
torch_command = os.environ.get('TORCH_COMMAND', f'torch torchvision --index-url https://rocm.nightlies.amd.com/v2-staging/{device.therock}')
else:
@@ -885,7 +887,6 @@ def check_torch():
if args.profile:
pr = cProfile.Profile()
pr.enable()
- from modules import rocm
allow_cuda = not (args.use_rocm or args.use_directml or args.use_ipex or args.use_openvino)
allow_rocm = not (args.use_cuda or args.use_directml or args.use_ipex or args.use_openvino)
allow_ipex = not (args.use_cuda or args.use_rocm or args.use_directml or args.use_openvino)
@@ -902,11 +903,17 @@ def check_torch():
log.error('DirectML is only supported on Windows')
if torch_command != '':
- pass
+ is_cuda_available = False
+ is_ipex_available = False
+ is_rocm_available = False
else:
- is_cuda_available = allow_cuda and (args.use_cuda or shutil.which('nvidia-smi') is not None or args.use_xformers or os.path.exists(os.path.join(os.environ.get('SystemRoot') or r'C:\Windows', 'System32', 'nvidia-smi.exe')))
- is_rocm_available = allow_rocm and (args.use_rocm or args.use_zluda or rocm.is_installed)
+ is_cuda_available = allow_cuda and (args.use_cuda or shutil.which('nvidia-smi') is not None or os.path.exists(os.path.join(os.environ.get('SystemRoot') or r'C:\Windows', 'System32', 'nvidia-smi.exe')))
is_ipex_available = allow_ipex and (args.use_ipex or shutil.which('sycl-ls') is not None or shutil.which('sycl-ls.exe') is not None or os.environ.get('ONEAPI_ROOT') is not None or os.path.exists('/opt/intel/oneapi') or os.path.exists("C:/Program Files (x86)/Intel/oneAPI") or os.path.exists("C:/oneAPI") or os.path.exists("C:/Program Files/Intel/Intel Graphics Software"))
+ is_rocm_available = False
+
+ if not is_cuda_available and not is_ipex_available and allow_rocm:
+ from modules import rocm
+ is_rocm_available = allow_rocm and (args.use_rocm or args.use_zluda or rocm.is_installed) # late eval to avoid unnecessary import
if is_cuda_available and args.use_cuda: # prioritize cuda
torch_command = install_cuda()
@@ -935,6 +942,7 @@ def check_torch():
else:
log.warning('Torch: CPU-only version installed')
torch_command = os.environ.get('TORCH_COMMAND', 'torch torchvision')
+
if args.version:
return
@@ -994,7 +1002,7 @@ def check_torch():
if not args.ignore:
sys.exit(1)
- if rocm.is_installed:
+ if is_rocm_available:
rocm.postinstall()
if not args.skip_all:
install_torch_addons()
diff --git a/modules/control/processors.py b/modules/control/processors.py
index 7cb8f8833..24002a4fb 100644
--- a/modules/control/processors.py
+++ b/modules/control/processors.py
@@ -6,27 +6,6 @@ from installer import log
from modules.errors import display
from modules import devices, images
-from modules.control.proc.hed import HEDdetector
-from modules.control.proc.canny import CannyDetector
-from modules.control.proc.edge import EdgeDetector
-from modules.control.proc.lineart import LineartDetector
-from modules.control.proc.lineart_anime import LineartAnimeDetector
-from modules.control.proc.pidi import PidiNetDetector
-from modules.control.proc.mediapipe_face import MediapipeFaceDetector
-from modules.control.proc.shuffle import ContentShuffleDetector
-from modules.control.proc.leres import LeresDetector
-from modules.control.proc.midas import MidasDetector
-from modules.control.proc.mlsd import MLSDdetector
-from modules.control.proc.normalbae import NormalBaeDetector
-from modules.control.proc.openpose import OpenposeDetector
-from modules.control.proc.dwpose import DWposeDetector
-from modules.control.proc.segment_anything import SamDetector
-from modules.control.proc.zoe import ZoeDetector
-from modules.control.proc.marigold import MarigoldDetector
-from modules.control.proc.dpt import DPTDetector
-from modules.control.proc.glpn import GLPNDetector
-from modules.control.proc.depth_anything import DepthAnythingDetector
-
models = {}
cache_dir = 'models/control/processors'
@@ -36,36 +15,92 @@ config = {
# placeholder
'None': {},
# pose models
- 'OpenPose': {'class': OpenposeDetector, 'checkpoint': True, 'params': {'include_body': True, 'include_hand': False, 'include_face': False}},
- 'DWPose': {'class': DWposeDetector, 'checkpoint': False, 'model': 'Tiny', 'params': {'min_confidence': 0.3}},
- 'MediaPipe Face': {'class': MediapipeFaceDetector, 'checkpoint': False, 'params': {'max_faces': 1, 'min_confidence': 0.5}},
+ 'OpenPose': {'class': None, 'checkpoint': True, 'params': {'include_body': True, 'include_hand': False, 'include_face': False}},
+ 'DWPose': {'class': None, 'checkpoint': False, 'model': 'Tiny', 'params': {'min_confidence': 0.3}},
+ 'MediaPipe Face': {'class': None, 'checkpoint': False, 'params': {'max_faces': 1, 'min_confidence': 0.5}},
# outline models
- 'Canny': {'class': CannyDetector, 'checkpoint': False, 'params': {'low_threshold': 100, 'high_threshold': 200}},
- 'Edge': {'class': EdgeDetector, 'checkpoint': False, 'params': {'pf': True, 'mode': 'edge'}},
- 'LineArt Realistic': {'class': LineartDetector, 'checkpoint': True, 'params': {'coarse': False}},
- 'LineArt Anime': {'class': LineartAnimeDetector, 'checkpoint': True, 'params': {}},
- 'HED': {'class': HEDdetector, 'checkpoint': True, 'params': {'scribble': False, 'safe': False}},
- 'PidiNet': {'class': PidiNetDetector, 'checkpoint': True, 'params': {'scribble': False, 'safe': False, 'apply_filter': False}},
+ 'Canny': {'class': None, 'checkpoint': False, 'params': {'low_threshold': 100, 'high_threshold': 200}},
+ 'Edge': {'class': None, 'checkpoint': False, 'params': {'pf': True, 'mode': 'edge'}},
+ 'LineArt Realistic': {'class': None, 'checkpoint': True, 'params': {'coarse': False}},
+ 'LineArt Anime': {'class': None, 'checkpoint': True, 'params': {}},
+ 'HED': {'class': None, 'checkpoint': True, 'params': {'scribble': False, 'safe': False}},
+ 'PidiNet': {'class': None, 'checkpoint': True, 'params': {'scribble': False, 'safe': False, 'apply_filter': False}},
# depth models
- 'Midas Depth Hybrid': {'class': MidasDetector, 'checkpoint': True, 'params': {'bg_th': 0.1, 'depth_and_normal': False}},
- 'Leres Depth': {'class': LeresDetector, 'checkpoint': True, 'params': {'boost': False, 'thr_a':0, 'thr_b':0}},
- 'Zoe Depth': {'class': ZoeDetector, 'checkpoint': True, 'params': {'gamma_corrected': False}, 'load_config': {'pretrained_model_or_path': 'halffried/gyre_zoedepth', 'filename': 'ZoeD_M12_N.safetensors', 'model_type': "zoedepth"}},
- 'Marigold Depth': {'class': MarigoldDetector, 'checkpoint': True, 'params': {'denoising_steps': 10, 'ensemble_size': 10, 'processing_res': 512, 'match_input_res': True, 'color_map': 'None'}, 'load_config': {'pretrained_model_or_path': 'Bingxin/Marigold'}},
- 'Normal Bae': {'class': NormalBaeDetector, 'checkpoint': True, 'params': {}},
+ 'Midas Depth Hybrid': {'class': None, 'checkpoint': True, 'params': {'bg_th': 0.1, 'depth_and_normal': False}},
+ 'Leres Depth': {'class': None, 'checkpoint': True, 'params': {'boost': False, 'thr_a':0, 'thr_b':0}},
+ 'Zoe Depth': {'class': None, 'checkpoint': True, 'params': {'gamma_corrected': False}, 'load_config': {'pretrained_model_or_path': 'halffried/gyre_zoedepth', 'filename': 'ZoeD_M12_N.safetensors', 'model_type': "zoedepth"}},
+ 'Marigold Depth': {'class': None, 'checkpoint': True, 'params': {'denoising_steps': 10, 'ensemble_size': 10, 'processing_res': 512, 'match_input_res': True, 'color_map': 'None'}, 'load_config': {'pretrained_model_or_path': 'Bingxin/Marigold'}},
+ 'Normal Bae': {'class': None, 'checkpoint': True, 'params': {}},
# segmentation models
- 'SegmentAnything': {'class': SamDetector, 'checkpoint': True, 'model': 'Base', 'params': {}},
+ 'SegmentAnything': {'class': None, 'checkpoint': True, 'model': 'Base', 'params': {}},
# other models
- 'MLSD': {'class': MLSDdetector, 'checkpoint': True, 'params': {'thr_v': 0.1, 'thr_d': 0.1}},
- 'Shuffle': {'class': ContentShuffleDetector, 'checkpoint': False, 'params': {}},
- 'DPT Depth Hybrid': {'class': DPTDetector, 'checkpoint': False, 'params': {}},
- 'GLPN Depth': {'class': GLPNDetector, 'checkpoint': False, 'params': {}},
- 'Depth Anything': {'class': DepthAnythingDetector, 'checkpoint': True, 'load_config': {'pretrained_model_or_path': 'LiheYoung/depth_anything_vitl14' }, 'params': { 'color_map': 'inferno' }},
+ 'MLSD': {'class': None, 'checkpoint': True, 'params': {'thr_v': 0.1, 'thr_d': 0.1}},
+ 'Shuffle': {'class': None, 'checkpoint': False, 'params': {}},
+ 'DPT Depth Hybrid': {'class': None, 'checkpoint': False, 'params': {}},
+ 'GLPN Depth': {'class': None, 'checkpoint': False, 'params': {}},
+ 'Depth Anything': {'class': None, 'checkpoint': True, 'load_config': {'pretrained_model_or_path': 'LiheYoung/depth_anything_vitl14' }, 'params': { 'color_map': 'inferno' }},
# 'Midas Depth Large': {'class': MidasDetector, 'checkpoint': True, 'params': {'bg_th': 0.1, 'depth_and_normal': False}, 'load_config': {'pretrained_model_or_path': 'Intel/dpt-large', 'model_type': "dpt_large", 'filename': ''}},
# 'Zoe Depth Zoe': {'class': ZoeDetector, 'checkpoint': True, 'params': {}},
# 'Zoe Depth NK': {'class': ZoeDetector, 'checkpoint': True, 'params': {}, 'load_config': {'pretrained_model_or_path': 'halffried/gyre_zoedepth', 'filename': 'ZoeD_M12_NK.safetensors', 'model_type': "zoedepth_nk"}},
}
+def delay_load_config():
+ global config # pylint: disable=global-statement
+ from modules.control.proc.hed import HEDdetector
+ from modules.control.proc.canny import CannyDetector
+ from modules.control.proc.edge import EdgeDetector
+ from modules.control.proc.lineart import LineartDetector
+ from modules.control.proc.lineart_anime import LineartAnimeDetector
+ from modules.control.proc.pidi import PidiNetDetector
+ from modules.control.proc.mediapipe_face import MediapipeFaceDetector
+ from modules.control.proc.shuffle import ContentShuffleDetector
+ from modules.control.proc.leres import LeresDetector
+ from modules.control.proc.midas import MidasDetector
+ from modules.control.proc.mlsd import MLSDdetector
+ from modules.control.proc.normalbae import NormalBaeDetector
+ from modules.control.proc.openpose import OpenposeDetector
+ from modules.control.proc.dwpose import DWposeDetector
+ from modules.control.proc.segment_anything import SamDetector
+ from modules.control.proc.zoe import ZoeDetector
+ from modules.control.proc.marigold import MarigoldDetector
+ from modules.control.proc.dpt import DPTDetector
+ from modules.control.proc.glpn import GLPNDetector
+ from modules.control.proc.depth_anything import DepthAnythingDetector
+ config = {
+ # placeholder
+ 'None': {},
+ # pose models
+ 'OpenPose': {'class': OpenposeDetector, 'checkpoint': True, 'params': {'include_body': True, 'include_hand': False, 'include_face': False}},
+ 'DWPose': {'class': DWposeDetector, 'checkpoint': False, 'model': 'Tiny', 'params': {'min_confidence': 0.3}},
+ 'MediaPipe Face': {'class': MediapipeFaceDetector, 'checkpoint': False, 'params': {'max_faces': 1, 'min_confidence': 0.5}},
+ # outline models
+ 'Canny': {'class': CannyDetector, 'checkpoint': False, 'params': {'low_threshold': 100, 'high_threshold': 200}},
+ 'Edge': {'class': EdgeDetector, 'checkpoint': False, 'params': {'pf': True, 'mode': 'edge'}},
+ 'LineArt Realistic': {'class': LineartDetector, 'checkpoint': True, 'params': {'coarse': False}},
+ 'LineArt Anime': {'class': LineartAnimeDetector, 'checkpoint': True, 'params': {}},
+ 'HED': {'class': HEDdetector, 'checkpoint': True, 'params': {'scribble': False, 'safe': False}},
+ 'PidiNet': {'class': PidiNetDetector, 'checkpoint': True, 'params': {'scribble': False, 'safe': False, 'apply_filter': False}},
+ # depth models
+ 'Midas Depth Hybrid': {'class': MidasDetector, 'checkpoint': True, 'params': {'bg_th': 0.1, 'depth_and_normal': False}},
+ 'Leres Depth': {'class': LeresDetector, 'checkpoint': True, 'params': {'boost': False, 'thr_a':0, 'thr_b':0}},
+ 'Zoe Depth': {'class': ZoeDetector, 'checkpoint': True, 'params': {'gamma_corrected': False}, 'load_config': {'pretrained_model_or_path': 'halffried/gyre_zoedepth', 'filename': 'ZoeD_M12_N.safetensors', 'model_type': "zoedepth"}},
+ 'Marigold Depth': {'class': MarigoldDetector, 'checkpoint': True, 'params': {'denoising_steps': 10, 'ensemble_size': 10, 'processing_res': 512, 'match_input_res': True, 'color_map': 'None'}, 'load_config': {'pretrained_model_or_path': 'Bingxin/Marigold'}},
+ 'Normal Bae': {'class': NormalBaeDetector, 'checkpoint': True, 'params': {}},
+ # segmentation models
+ 'SegmentAnything': {'class': SamDetector, 'checkpoint': True, 'model': 'Base', 'params': {}},
+ # other models
+ 'MLSD': {'class': MLSDdetector, 'checkpoint': True, 'params': {'thr_v': 0.1, 'thr_d': 0.1}},
+ 'Shuffle': {'class': ContentShuffleDetector, 'checkpoint': False, 'params': {}},
+ 'DPT Depth Hybrid': {'class': DPTDetector, 'checkpoint': False, 'params': {}},
+ 'GLPN Depth': {'class': GLPNDetector, 'checkpoint': False, 'params': {}},
+ 'Depth Anything': {'class': DepthAnythingDetector, 'checkpoint': True, 'load_config': {'pretrained_model_or_path': 'LiheYoung/depth_anything_vitl14' }, 'params': { 'color_map': 'inferno' }},
+ # 'Midas Depth Large': {'class': MidasDetector, 'checkpoint': True, 'params': {'bg_th': 0.1, 'depth_and_normal': False}, 'load_config': {'pretrained_model_or_path': 'Intel/dpt-large', 'model_type': "dpt_large", 'filename': ''}},
+ # 'Zoe Depth Zoe': {'class': ZoeDetector, 'checkpoint': True, 'params': {}},
+ # 'Zoe Depth NK': {'class': ZoeDetector, 'checkpoint': True, 'params': {}, 'load_config': {'pretrained_model_or_path': 'halffried/gyre_zoedepth', 'filename': 'ZoeD_M12_NK.safetensors', 'model_type': "zoedepth_nk"}},
+ }
+
+
def list_models(refresh=False):
global models # pylint: disable=global-statement
if not refresh and len(models) > 0:
@@ -178,6 +213,9 @@ class Processor():
log.error(f'Control Processor unknown: id="{processor_id}" available={list(config)}')
return f'Processor failed to load: {processor_id}'
cls = config[processor_id]['class']
+ if cls is None:
+ delay_load_config()
+ cls = config[processor_id]['class']
# log.debug(f'Control Processor loading: id="{processor_id}" class={cls.__name__}')
debug(f'Control Processor config={self.load_config}')
jobid = state.begin('Load processor')
diff --git a/modules/control/run.py b/modules/control/run.py
index b62cad89f..0bdfdc994 100644
--- a/modules/control/run.py
+++ b/modules/control/run.py
@@ -589,7 +589,7 @@ def control_run(state: str = '', # pylint: disable=keyword-arg-before-vararg
if p.scripts is not None:
processed = p.scripts.after(p, processed, *p.script_args)
output = None
- if processed is not None:
+ if processed is not None and processed.images is not None:
output = processed.images
info_txt = [processed.infotext(p, i) for i in range(len(output))]
diff --git a/modules/devices.py b/modules/devices.py
index a98b53d02..961ffb384 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -434,7 +434,7 @@ def set_sdpa_params():
torch.backends.cuda.enable_math_sdp('Math attention' in opts.sdp_options)
if hasattr(torch.backends.cuda, "allow_fp16_bf16_reduction_math_sdp"): # only valid for torch >= 2.5
torch.backends.cuda.allow_fp16_bf16_reduction_math_sdp(True)
- log.debug(f'Torch attention: type="sdpa" flash={"Flash attention" in opts.sdp_options} memory={"Memory attention" in opts.sdp_options} math={"Math attention" in opts.sdp_options}')
+ log.debug(f'Torch attention: type="sdpa" opts={opts.sdp_options}')
except Exception as err:
log.warning(f'Torch attention: type="sdpa" {err}')
@@ -447,7 +447,6 @@ def set_sdpa_params():
sdpa_pre_dyanmic_atten = torch.nn.functional.scaled_dot_product_attention
from modules.sd_hijack_dynamic_atten import dynamic_scaled_dot_product_attention
torch.nn.functional.scaled_dot_product_attention = dynamic_scaled_dot_product_attention
- log.debug('Torch attention: type="dynamic attention"')
except Exception as err:
log.error(f'Torch attention: type="dynamic attention" {err}')
@@ -542,6 +541,17 @@ def set_sdpa_params():
log.debug('Torch attention: type="sage attention"')
except Exception as err:
log.error(f'Torch attention: type="sage attention" {err}')
+
+ from importlib.metadata import version
+ try:
+ flash = version('flash-attn')
+ except Exception:
+ flash = False
+ try:
+ sage = version('sageattention')
+ except Exception:
+ sage = False
+ log.info(f'Torch attention: flashattn={flash} sageattention={sage}')
except Exception as e:
log.warning(f'Torch SDPA: {e}')
diff --git a/modules/face/instantid.py b/modules/face/instantid.py
index 3cc613b3d..158c2f577 100644
--- a/modules/face/instantid.py
+++ b/modules/face/instantid.py
@@ -22,7 +22,7 @@ def instant_id(p: processing.StableDiffusionProcessing, app, source_images, stre
return None
c = shared.sd_model.__class__.__name__ if shared.sd_loaded else ''
- if c != 'StableDiffusionXLPipeline' and c != 'StableDiffusionXLInstantIDPipeline':
+ if c not in ['StableDiffusionXLPipeline', 'StableDiffusionXLInstantIDPipeline']:
shared.log.warning(f'InstantID invalid base model: current={c} required=StableDiffusionXLPipeline')
return None
diff --git a/modules/framepack/framepack_ui.py b/modules/framepack/framepack_ui.py
index 305fddc38..de1ab5347 100644
--- a/modules/framepack/framepack_ui.py
+++ b/modules/framepack/framepack_ui.py
@@ -1,6 +1,5 @@
import gradio as gr
-from modules import ui_sections, ui_common, ui_video_vlm
-from modules.video_models.video_utils import get_codecs
+from modules import ui_sections, ui_video_vlm
from modules.framepack import framepack_load
from modules.framepack.framepack_worker import get_latent_paddings
from modules.framepack.framepack_wrappers import load_model, unload_model
@@ -13,7 +12,7 @@ def change_sections(duration, mp4_fps, mp4_interpolate, latent_ws, variant):
return gr.update(value=f'Target video: {num_frames} frames in {num_sections} sections'), gr.update(lines=max(2, 2*num_sections//3))
-def create_ui(prompt, negative, styles, _overrides):
+def create_ui(prompt, negative, styles, _overrides, init_image, last_image, mp4_fps, mp4_interpolate, mp4_codec, mp4_ext, mp4_opt, mp4_video, mp4_frames, mp4_sf):
with gr.Row():
with gr.Column(variant='compact', elem_id="framepack_settings", elem_classes=['settings-column'], scale=1):
with gr.Row():
@@ -28,25 +27,12 @@ def create_ui(prompt, negative, styles, _overrides):
with gr.Row():
section_html = gr.HTML(show_label=False, elem_id="framepack_section_html")
with gr.Accordion(label="Inputs", open=False):
- with gr.Row():
- input_image = gr.Image(sources='upload', type="numpy", label="FP init image", width=256, height=256, interactive=True, tool="editor", image_mode='RGB', elem_id="framepack_input_image")
- end_image = gr.Image(sources='upload', type="numpy", label="FP end image", width=256, height=256, interactive=True, tool="editor", image_mode='RGB', elem_id="framepack_end_image")
with gr.Row():
start_weight = gr.Slider(label="FP init strength", value=1.0, minimum=0.0, maximum=2.0, step=0.05, elem_id="framepack_start_weight")
end_weight = gr.Slider(label="FP end strength", value=1.0, minimum=0.0, maximum=2.0, step=0.05, elem_id="framepack_end_weight")
vision_weight = gr.Slider(label="FP vision strength", value=1.0, minimum=0.0, maximum=2.0, step=0.05, elem_id="framepack_vision_weight")
with gr.Accordion(label="Sections", open=False):
section_prompt = gr.Textbox(label="FP section prompts", elem_id="framepack_section_prompt", lines=2, placeholder="Optional one-line prompt suffix per each video section", interactive=True)
- with gr.Accordion(label="Video", open=False):
- with gr.Row():
- mp4_codec = gr.Dropdown(label="FP codec", choices=['none', 'libx264'], value='libx264', type='value')
- ui_common.create_refresh_button(mp4_codec, get_codecs, elem_id="framepack_mp4_codec_refresh")
- mp4_ext = gr.Textbox(label="FP format", value='mp4', elem_id="framepack_mp4_ext")
- mp4_opt = gr.Textbox(label="FP options", value='crf:16', elem_id="framepack_mp4_ext")
- with gr.Row():
- mp4_video = gr.Checkbox(label='FP save video', value=True, elem_id="framepack_mp4_video")
- mp4_frames = gr.Checkbox(label='FP save frames', value=False, elem_id="framepack_mp4_frames")
- mp4_sf = gr.Checkbox(label='FP save safetensors', value=False, elem_id="framepack_mp4_sf")
with gr.Accordion(label="Advanced", open=False):
seed = ui_sections.create_seed_inputs('control', reuse_visible=False, subseed_visible=False, accordion=False)[0]
latent_ws = gr.Slider(label="FP latent window size", minimum=1, maximum=33, value=9, step=1)
@@ -58,7 +44,7 @@ def create_ui(prompt, negative, styles, _overrides):
cfg_distilled = gr.Slider(label="FP distilled CFG scale", minimum=1.0, maximum=32.0, value=10.0, step=0.01)
cfg_rescale = gr.Slider(label="FP CFG re-scale", minimum=0.0, maximum=1.0, value=0.0, step=0.01)
- vlm_enhance, vlm_model, vlm_system_prompt = ui_video_vlm.create_ui(prompt_element=prompt, image_element=input_image)
+ vlm_enhance, vlm_model, vlm_system_prompt = ui_video_vlm.create_ui(prompt_element=prompt, image_element=init_image)
with gr.Accordion(label="Model", open=False):
with gr.Row():
@@ -108,7 +94,7 @@ def create_ui(prompt, negative, styles, _overrides):
receipe_reset.click(fn=framepack_load.reset_model, inputs=[], outputs=[receipe])
framepack_inputs=[
- input_image, end_image,
+ init_image, last_image,
start_weight, end_weight, vision_weight,
prompt, system_prompt, optimized_prompt, section_prompt, negative, styles,
seed,
diff --git a/modules/ltx/ltx_process.py b/modules/ltx/ltx_process.py
index 7d9e6ae84..b45298854 100644
--- a/modules/ltx/ltx_process.py
+++ b/modules/ltx/ltx_process.py
@@ -37,6 +37,7 @@ def run_ltx(task_id,
refine_strength:float,
condition_strength: float,
condition_image,
+ condition_last,
condition_files,
condition_video,
condition_video_frames:int,
@@ -100,11 +101,16 @@ def run_ltx(task_id,
)
p.ops.append('video')
+ condition_images = []
+ if condition_image is not None:
+ condition_images.append(condition_image)
+ if condition_last is not None:
+ condition_images.append(condition_last)
conditions = get_conditions(
width,
height,
condition_strength,
- condition_image,
+ condition_images,
condition_files,
condition_video,
condition_video_frames,
diff --git a/modules/ltx/ltx_ui.py b/modules/ltx/ltx_ui.py
index 04cea1d1e..6ac23bf86 100644
--- a/modules/ltx/ltx_ui.py
+++ b/modules/ltx/ltx_ui.py
@@ -1,8 +1,6 @@
import os
import gradio as gr
-from modules import shared, ui_sections, ui_symbols, ui_common
-from modules.ui_components import ToolButton
-from modules.video_models.video_utils import get_codecs
+from modules import shared, ui_sections
from modules.video_models.models_def import models
from modules.ltx import ltx_process
@@ -10,7 +8,7 @@ from modules.ltx import ltx_process
debug = shared.log.trace if os.environ.get('SD_VIDEO_DEBUG', None) is not None else lambda *args, **kwargs: None
-def create_ui(prompt, negative, styles, overrides):
+def create_ui(prompt, negative, styles, overrides, init_image, init_strength, last_image, mp4_fps, mp4_interpolate, mp4_codec, mp4_ext, mp4_opt, mp4_video, mp4_frames, mp4_sf, width, height, frames, seed):
with gr.Row():
with gr.Column(variant='compact', elem_id="ltx_settings", elem_classes=['settings-column'], scale=1):
with gr.Row():
@@ -18,18 +16,8 @@ def create_ui(prompt, negative, styles, overrides):
with gr.Row():
ltx_models = [m.name for m in models['LTX Video']]
model = gr.Dropdown(label='LTX model', choices=ltx_models, value=ltx_models[0])
- with gr.Accordion(open=True, label="LTX size", elem_id='ltx_generate_accordion'):
- with gr.Row():
- width, height = ui_sections.create_resolution_inputs('ltx', default_width=832, default_height=480)
- with gr.Row():
- frames = gr.Slider(label='LTX frames', minimum=1, maximum=513, step=1, value=17, elem_id="ltx_frames")
- seed = gr.Number(label='LTX seed', value=-1, elem_id="ltx_seed", container=True)
- random_seed = ToolButton(ui_symbols.random, elem_id="ltx_seed_random")
with gr.Accordion(open=False, label="Condition", elem_id='ltx_condition_accordion'):
- condition_strength = gr.Slider(label='LTX condition strength', minimum=0.1, maximum=1.0, step=0.05, value=0.8, elem_id="ltx_condition_image_strength")
with gr.Tabs():
- with gr.Tab('Image', id='ltx_condition_image_tab'):
- condition_image = gr.Image(sources='upload', type="pil", label="Image", width=256, height=256, interactive=True, tool="editor", image_mode='RGB', elem_id="ltx_condition_image")
with gr.Tab('Video', id='ltx_condition_video_tab'):
condition_video = gr.Video(label='Video', type='filepath', elem_id="ltx_condition_video", width=256, height=256, source='upload')
with gr.Row():
@@ -45,19 +33,6 @@ def create_ui(prompt, negative, styles, overrides):
with gr.Row():
refine_enable = gr.Checkbox(label='LTX enable refine', value=False, elem_id="ltx_refine_enable")
refine_strength = gr.Slider(label='LTX refine strength', minimum=0.1, maximum=1.0, step=0.05, value=0.4, elem_id="ltx_refine_strength")
- with gr.Accordion(label="Video", open=False):
- with gr.Row():
- mp4_fps = gr.Slider(label="FPS", minimum=1, maximum=60, value=24, step=1)
- mp4_interpolate = gr.Slider(label="LTX interpolation", minimum=0, maximum=10, value=0, step=1)
- with gr.Row():
- mp4_codec = gr.Dropdown(label="LTX codec", choices=['none', 'libx264'], value='libx264', type='value')
- ui_common.create_refresh_button(mp4_codec, get_codecs, elem_id="framepack_mp4_codec_refresh")
- mp4_ext = gr.Textbox(label="LTX format", value='mp4', elem_id="framepack_mp4_ext")
- mp4_opt = gr.Textbox(label="LTX options", value='crf:16', elem_id="framepack_mp4_ext")
- with gr.Row():
- mp4_video = gr.Checkbox(label='LTX save video', value=True, elem_id="framepack_mp4_video")
- mp4_frames = gr.Checkbox(label='LTX save frames', value=False, elem_id="framepack_mp4_frames")
- mp4_sf = gr.Checkbox(label='LTX save safetensors', value=False, elem_id="framepack_mp4_sf")
with gr.Accordion(open=False, label="Advanced", elem_id='ltx_parameters_accordion'):
steps, sampler_index = ui_sections.create_sampler_and_steps_selection(None, "ltx", default_steps=50)
with gr.Row():
@@ -71,7 +46,6 @@ def create_ui(prompt, negative, styles, overrides):
with gr.Row():
text = gr.HTML('', elem_id='ltx_generation_info', show_label=False)
- random_seed.click(fn=lambda: -1, show_progress=False, inputs=[], outputs=[seed])
task_id = gr.Textbox(visible=False, value='')
ui_state = gr.Textbox(visible=False, value='')
state_inputs = [task_id, ui_state]
@@ -83,7 +57,7 @@ def create_ui(prompt, negative, styles, overrides):
steps, sampler_index, seed,
upsample_enable, upsample_ratio,
refine_enable, refine_strength,
- condition_strength, condition_image, condition_files, condition_video, condition_video_frames, condition_video_skip,
+ init_strength, init_image, last_image, condition_files, condition_video, condition_video_frames, condition_video_skip,
decode_timestep, image_cond_noise_scale,
mp4_fps, mp4_interpolate, mp4_codec, mp4_ext, mp4_opt, mp4_video, mp4_frames, mp4_sf,
overrides,
diff --git a/modules/ltx/ltx_util.py b/modules/ltx/ltx_util.py
index ddc53a323..a329373fd 100644
--- a/modules/ltx/ltx_util.py
+++ b/modules/ltx/ltx_util.py
@@ -54,19 +54,20 @@ def load_upsample(upsample_pipe, upsample_repo_id):
return upsample_pipe
-def get_conditions(width, height, condition_strength, condition_image, condition_files, condition_video, condition_video_frames, condition_video_skip):
+def get_conditions(width, height, condition_strength, condition_images, condition_files, condition_video, condition_video_frames, condition_video_skip):
from diffusers.pipelines.ltx.pipeline_ltx_condition import LTXVideoCondition
conditions = []
- if condition_image is not None:
- try:
- if isinstance(condition_image, str):
- from modules.api.api import decode_base64_to_image
- condition_image = decode_base64_to_image(condition_image)
- condition_image = condition_image.convert('RGB').resize((width, height), resample=Image.Resampling.LANCZOS)
- conditions.append(LTXVideoCondition(image=condition_image, frame_index=0, strength=condition_strength))
- shared.log.debug(f'Video condition: image={condition_image.size} strength={condition_strength}')
- except Exception as e:
- shared.log.error(f'LTX condition image: {e}')
+ if condition_images is not None:
+ for condition_image in condition_images:
+ try:
+ if isinstance(condition_image, str):
+ from modules.api.api import decode_base64_to_image
+ condition_image = decode_base64_to_image(condition_image)
+ condition_image = condition_image.convert('RGB').resize((width, height), resample=Image.Resampling.LANCZOS)
+ conditions.append(LTXVideoCondition(image=condition_image, frame_index=0, strength=condition_strength))
+ shared.log.debug(f'Video condition: image={condition_image.size} strength={condition_strength}')
+ except Exception as e:
+ shared.log.error(f'LTX condition image: {e}')
if condition_files is not None:
condition_images = []
for fn in condition_files:
diff --git a/modules/models_hf.py b/modules/models_hf.py
index 73a82de6e..800b838e2 100644
--- a/modules/models_hf.py
+++ b/modules/models_hf.py
@@ -1,4 +1,6 @@
import os
+import time
+import gradio as gr
from installer import log, install
from modules.shared import opts
@@ -37,7 +39,7 @@ def hf_init():
obfuscated_token = None
if len(opts.huggingface_token) > 0 and opts.huggingface_token.startswith('hf_'):
obfuscated_token = 'hf_...' + opts.huggingface_token[-4:]
- log.info(f'Huggingface init: transfer={opts.hf_transfer_mode} parallel={opts.sd_parallel_load} direct={opts.diffusers_to_gpu} token="{obfuscated_token}" cache="{opts.hfcache_dir}"')
+ log.info(f'Huggingface: transfer={opts.hf_transfer_mode} parallel={opts.sd_parallel_load} direct={opts.diffusers_to_gpu} token="{obfuscated_token}" cache="{opts.hfcache_dir}" init')
def hf_check_cache():
@@ -48,22 +50,26 @@ def hf_check_cache():
if size//1024//1024 > 0:
log.warning(f'Cache location changed: previous="{prev_default}" size={size//1024//1024} MB')
size, _mtime = stat(opts.hfcache_dir)
- log.debug(f'Huggingface cache: path="{opts.hfcache_dir}" size={size//1024//1024} MB')
+ log.debug(f'Huggingface: cache="{opts.hfcache_dir}" size={size//1024//1024} MB')
def hf_search(keyword):
import huggingface_hub as hf
+ t0 = time.time()
hf_api = hf.HfApi()
models = hf_api.list_models(model_name=keyword, full=True, library="diffusers", limit=50, sort="downloads", direction=-1)
data = []
for model in models:
tags = [t for t in model.tags if not t.startswith('diffusers') and not t.startswith('license') and not t.startswith('arxiv') and len(t) > 2]
data.append([model.id, model.pipeline_tag, tags, model.downloads, model.lastModified, f'https://huggingface.co/{model.id}'])
+ log.debug(f'Huggingface: search="{keyword}" results={len(data)} time={time.time()-t0:.2f}')
return data
-def hf_select(evt, data):
- return data[evt.index[0]][0]
+def hf_select(evt: gr.SelectData, df):
+ row = list(df.iloc[evt.index[0]])
+ log.debug(f'Huggingface: selected={row} index={evt.index}')
+ return row[0] # repo_id only
def hf_download_model(hub_id: str, token, variant, revision, mirror, custom_pipeline):
@@ -71,11 +77,11 @@ def hf_download_model(hub_id: str, token, variant, revision, mirror, custom_pipe
download_diffusers_model(hub_id, cache_dir=opts.diffusers_dir, token=token, variant=variant, revision=revision, mirror=mirror, custom_pipeline=custom_pipeline)
from modules.sd_models import list_models # pylint: disable=W0621
list_models()
- log.info(f'Diffuser model downloaded: model="{hub_id}"')
+ log.info(f'Huggingface: model="{hub_id}" downloaded')
return f'Diffuser model downloaded: model="{hub_id}"'
def hf_update_token(token):
- log.debug('Huggingface update token')
+ log.debug('Huggingface: update token')
opts.huggingface_token = token
opts.save()
diff --git a/modules/processing.py b/modules/processing.py
index aaf681840..5d92fe246 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -397,13 +397,13 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
shared.state.batch_no = n + 1
debug(f'Processing inner: iteration={n+1}/{p.n_iter}')
p.iteration = n
+ if shared.state.interrupted:
+ shared.log.debug(f'Process interrupted: {n+1}/{p.n_iter}')
+ break
if shared.state.skipped:
shared.log.debug(f'Process skipped: {n+1}/{p.n_iter}')
shared.state.skipped = False
continue
- if shared.state.interrupted:
- shared.log.debug(f'Process interrupted: {n+1}/{p.n_iter}')
- break
if not hasattr(p, 'keep_prompts'):
p.prompts = p.all_prompts[n * p.batch_size:(n+1) * p.batch_size]
@@ -441,6 +441,8 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
infotexts = [create_infotext(p, p.all_prompts, p.all_seeds, p.all_subseeds, index=0)]
else:
samples = []
+ if not shared.opts.keep_incomplete:
+ break
if p.scripts is not None and isinstance(p.scripts, scripts_manager.ScriptRunner):
p.scripts.postprocess_batch(p, samples, batch_number=n)
@@ -460,6 +462,8 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
if shared.cmd_opts.lowvram:
devices.torch_gc(force=True, reason='lowvram')
timer.process.record('post')
+ if shared.state.interrupted:
+ break
if not p.xyz:
if hasattr(shared.sd_model, 'restore_pipeline') and (shared.sd_model.restore_pipeline is not None):
diff --git a/modules/processing_callbacks.py b/modules/processing_callbacks.py
index 62f9b624b..3aec83fca 100644
--- a/modules/processing_callbacks.py
+++ b/modules/processing_callbacks.py
@@ -53,9 +53,9 @@ def diffusers_callback_legacy(step: int, timestep: int, latents: typing.Union[to
def diffusers_callback(pipe, step: int = 0, timestep: int = 0, kwargs: dict = {}):
t0 = time.time()
- if devices.backend == "ipex": # xe driver on linux needs this
+ if devices.backend == "ipex":
torch.xpu.synchronize(devices.device)
- elif (devices.backend == "zluda") or (devices.backend == "rocm") or (devices.backend == "cuda"):
+ elif devices.backend in {"cuda", "zluda", "rocm"}:
torch.cuda.synchronize(devices.device)
latents = kwargs.get('latents', None)
if debug:
diff --git a/modules/processing_info.py b/modules/processing_info.py
index cb1050349..54b4d7b7d 100644
--- a/modules/processing_info.py
+++ b/modules/processing_info.py
@@ -47,7 +47,7 @@ def create_infotext(p: StableDiffusionProcessing, all_prompts=None, all_seeds=No
"Sampler": p.sampler_name if p.sampler_name != 'Default' else None,
"Seed": all_seeds[index],
"Seed resize from": None if p.seed_resize_from_w <= 0 or p.seed_resize_from_h <= 0 else f"{p.seed_resize_from_w}x{p.seed_resize_from_h}",
- "CFG scale": p.cfg_scale if p.cfg_scale > 1.0 else None,
+ "CFG scale": p.cfg_scale if p.cfg_scale > 1.0 else 1.0,
"CFG rescale": p.diffusers_guidance_rescale if p.diffusers_guidance_rescale > 0 else None,
"CFG end": p.cfg_end if p.cfg_end < 1.0 else None,
"CFG true": p.pag_scale if p.pag_scale > 0 else None,
diff --git a/modules/processing_vae.py b/modules/processing_vae.py
index 00ffbb450..270c194d7 100644
--- a/modules/processing_vae.py
+++ b/modules/processing_vae.py
@@ -216,7 +216,7 @@ def taesd_vae_decode(latents):
t0 = time.time()
if len(latents) == 0:
return []
- if shared.opts.diffusers_vae_slicing and len(latents) > 1:
+ if len(latents) > 1:
decoded = torch.zeros((len(latents), 3, latents.shape[2] * 8, latents.shape[3] * 8), dtype=devices.dtype_vae, device=devices.device)
for i in range(latents.shape[0]):
decoded[i] = sd_vae_taesd.decode(latents[i])
diff --git a/modules/rocm.py b/modules/rocm.py
index 1b03bfd32..f14a711ca 100644
--- a/modules/rocm.py
+++ b/modules/rocm.py
@@ -96,7 +96,7 @@ class Agent:
self.blaslt_supported = os.path.exists(os.path.join(blaslt_tensile_libpath, f"Kernels.so-000-{name}.hsaco" if sys.platform == "win32" else f"extop_{name}.co"))
@property
- def therock(self) -> str:
+ def therock(self) -> Union[str, None]:
if (self.gfx_version & 0xFFF0) == 0x1100:
return "gfx110X-dgpu"
if self.gfx_version == 0x1151:
@@ -107,7 +107,7 @@ class Agent:
return "gfx94X-dcgpu"
if self.gfx_version == 0x950:
return "gfx950-dcgpu"
- raise RuntimeError(f"Unsupported GPU architecture: {self.name}")
+ return None
def get_gfx_version(self) -> Union[str, None]:
if self.gfx_version >= 0x1100 and self.gfx_version < 0x1200:
@@ -207,28 +207,47 @@ def get_flash_attention_command(agent: Agent) -> str:
return "--no-build-isolation " + os.environ.get("FLASH_ATTENTION_PACKAGE", default)
+def refresh():
+ global environment, blaslt_tensile_libpath, is_installed, version # pylint: disable=global-statement
+ if sys.platform == "win32":
+ global agents # pylint: disable=global-statement
+ try:
+ agents = driver_get_agents()
+ except Exception:
+ agents = []
+ environment = find()
+ if environment is not None:
+ if isinstance(environment, ROCmEnvironment):
+ blaslt_tensile_libpath = os.environ.get("HIPBLASLT_TENSILE_LIBPATH", os.path.join(environment.path, "bin" if sys.platform == "win32" else "lib", "hipblaslt", "library"))
+ is_installed = True
+ version = get_version()
+
+
if sys.platform == "win32":
def get_agents() -> List[Agent]:
- if isinstance(environment, ROCmEnvironment):
- out = spawn("amdgpu-arch", cwd=os.path.join(environment.path, 'bin'))
- else:
- # Assume that amdgpu-arch is in PATH (venv/Scripts/amdgpu-arch.exe)
- out = spawn("amdgpu-arch")
- out = out.strip()
- return [Agent(x.split(' ')[-1].strip()) for x in out.split("\n")]
+ return agents
+ #if isinstance(environment, ROCmEnvironment):
+ # out = spawn("amdgpu-arch", cwd=os.path.join(environment.path, 'bin'))
+ #else:
+ # # Assume that amdgpu-arch is in PATH (venv/Scripts/amdgpu-arch.exe)
+ # out = spawn("amdgpu-arch")
+ #out = out.strip()
+ #if out == "":
+ # return []
+ #return [Agent(x.split(' ')[-1].strip()) for x in out.split("\n")]
def driver_get_agents() -> List[Agent]:
# unsafe and experimental feature
from modules import windows_hip_ffi
hip = windows_hip_ffi.HIP()
count = hip.get_device_count()
- agents = [None] * count
+ _agents = [None] * count
for i in range(count):
prop = hip.get_device_properties(i)
name = prop.gcnArchName.decode('utf-8').strip('\x00')
- agents[i] = Agent(name)
+ _agents[i] = Agent(name)
del hip
- return agents
+ return _agents
def postinstall():
import torch
@@ -243,6 +262,14 @@ if sys.platform == "win32":
os.environ["PATH"] = ";".join(paths_no_rocm)
return
+ build_targets = torch.cuda.get_arch_list()
+ for available in agents:
+ if available.name in build_targets:
+ return
+
+ # use cpu instead of crashing
+ torch.cuda.is_available = lambda: False
+
def rocm_init():
try:
import torch
@@ -275,15 +302,16 @@ if sys.platform == "win32":
return True, None
is_wsl: bool = False
-else:
+ agents: List[Agent] = [] # temp
+else: # sys.platform != "win32"
def get_agents() -> List[Agent]:
try:
- agents = spawn("rocm_agent_enumerator").split("\n")
- agents = [x for x in agents if x and x != 'gfx000']
+ _agents = spawn("rocm_agent_enumerator").split("\n")
+ _agents = [x for x in _agents if x and x != 'gfx000']
except Exception: # old version of ROCm WSL doesn't have rocm_agent_enumerator
- agents = spawn("rocminfo").split("\n")
- agents = [x.strip().split(" ")[-1] for x in agents if x.startswith(' Name:') and "CPU" not in x]
- return [Agent(x) for x in agents]
+ _agents = spawn("rocminfo").split("\n")
+ _agents = [x.strip().split(" ")[-1] for x in _agents if x.startswith(' Name:') and "CPU" not in x]
+ return [Agent(x) for x in _agents]
def postinstall():
if is_wsl:
@@ -300,17 +328,9 @@ else:
return True, None
is_wsl: bool = os.environ.get('WSL_DISTRO_NAME', 'unknown' if spawn('wslpath -w /') else None) is not None
+
environment = None
blaslt_tensile_libpath = ""
is_installed = False
version = None
-
-def refresh():
- global environment, blaslt_tensile_libpath, is_installed, version # pylint: disable=global-statement
- environment = find()
- if environment is not None:
- if isinstance(environment, ROCmEnvironment):
- blaslt_tensile_libpath = os.environ.get("HIPBLASLT_TENSILE_LIBPATH", os.path.join(environment.path, "bin" if sys.platform == "win32" else "lib", "hipblaslt", "library"))
- is_installed = True
- version = get_version()
refresh()
diff --git a/modules/sd_samplers.py b/modules/sd_samplers.py
index c41eb91fb..63b5dba0d 100644
--- a/modules/sd_samplers.py
+++ b/modules/sd_samplers.py
@@ -51,7 +51,7 @@ def find_sampler_config(name):
def restore_default(model):
if model is None:
return None
- if getattr(model, "default_scheduler", None) is not None:
+ if getattr(model, "default_scheduler", None) is not None and getattr(model, "scheduler", None) is not None:
model.scheduler = copy.deepcopy(model.default_scheduler)
if hasattr(model, "prior_pipe") and hasattr(model.prior_pipe, "scheduler"):
model.prior_pipe.scheduler = copy.deepcopy(model.default_scheduler)
diff --git a/modules/sdnq/layers/conv/conv_fp8_tensorwise.py b/modules/sdnq/layers/conv/conv_fp8_tensorwise.py
index 2dc9fbda3..9010445a6 100644
--- a/modules/sdnq/layers/conv/conv_fp8_tensorwise.py
+++ b/modules/sdnq/layers/conv/conv_fp8_tensorwise.py
@@ -29,7 +29,7 @@ def conv_fp8_matmul_tensorwise(
if svd_up is not None:
input = input.flatten(0,-2)
if bias is not None:
- bias = torch.addmm(bias, torch.mm(input.to(dtype=svd_down.dtype), svd_down), svd_up)
+ bias = torch.addmm(bias.to(dtype=svd_down.dtype), torch.mm(input.to(dtype=svd_down.dtype), svd_down), svd_up)
else:
bias = torch.mm(torch.mm(input.to(dtype=svd_down.dtype), svd_down), svd_up)
diff --git a/modules/sdnq/layers/conv/conv_int8.py b/modules/sdnq/layers/conv/conv_int8.py
index 3332537d1..a1e297cc1 100644
--- a/modules/sdnq/layers/conv/conv_int8.py
+++ b/modules/sdnq/layers/conv/conv_int8.py
@@ -32,7 +32,7 @@ def conv_int8_matmul(
if svd_up is not None:
input = input.flatten(0,-2)
if bias is not None:
- bias = torch.addmm(bias, torch.mm(input.to(dtype=svd_down.dtype), svd_down), svd_up)
+ bias = torch.addmm(bias.to(dtype=svd_down.dtype), torch.mm(input.to(dtype=svd_down.dtype), svd_down), svd_up)
else:
bias = torch.mm(torch.mm(input.to(dtype=svd_down.dtype), svd_down), svd_up)
diff --git a/modules/sdnq/layers/linear/linear_fp8_tensorwise.py b/modules/sdnq/layers/linear/linear_fp8_tensorwise.py
index c7978ef0a..d58b6fbb1 100644
--- a/modules/sdnq/layers/linear/linear_fp8_tensorwise.py
+++ b/modules/sdnq/layers/linear/linear_fp8_tensorwise.py
@@ -31,7 +31,7 @@ def fp8_matmul_tensorwise(
if svd_up is not None:
input.flatten(0,-2)
if bias is not None:
- bias = torch.addmm(bias, torch.mm(input.to(dtype=svd_down.dtype), svd_down), svd_up)
+ bias = torch.addmm(bias.to(dtype=svd_down.dtype), torch.mm(input.to(dtype=svd_down.dtype), svd_down), svd_up)
else:
bias = torch.mm(torch.mm(input.to(dtype=svd_down.dtype), svd_down), svd_up)
dummy_input_scale = torch.ones(1, device=input.device, dtype=torch.float32)
diff --git a/modules/sdnq/layers/linear/linear_int8.py b/modules/sdnq/layers/linear/linear_int8.py
index 6d7f6f2b8..7fa60a87f 100644
--- a/modules/sdnq/layers/linear/linear_int8.py
+++ b/modules/sdnq/layers/linear/linear_int8.py
@@ -36,7 +36,7 @@ def int8_matmul(
if svd_up is not None:
input = input.flatten(0,-2)
if bias is not None:
- bias = torch.addmm(bias, torch.mm(input.to(dtype=svd_down.dtype), svd_down), svd_up)
+ bias = torch.addmm(bias.to(dtype=svd_down.dtype), torch.mm(input.to(dtype=svd_down.dtype), svd_down), svd_up)
else:
bias = torch.mm(torch.mm(input.to(dtype=svd_down.dtype), svd_down), svd_up)
input, scale = quantize_int8_matmul_input(input, scale)
diff --git a/modules/sdnq/quantizer.py b/modules/sdnq/quantizer.py
index 29f3b61d8..3a39122db 100644
--- a/modules/sdnq/quantizer.py
+++ b/modules/sdnq/quantizer.py
@@ -199,10 +199,13 @@ def sdnq_quantize_layer(layer, weights_dtype="int8", torch_dtype=None, group_siz
layer.weight.data = layer.weight.to(dtype=torch.float32)
if use_svd:
- layer.weight.data, svd_up, svd_down = apply_svdquant(layer.weight, rank=svd_rank, niter=svd_steps)
- if use_quantized_matmul:
- svd_up = svd_up.t_()
- svd_down = svd_down.t_()
+ try:
+ layer.weight.data, svd_up, svd_down = apply_svdquant(layer.weight, rank=svd_rank, niter=svd_steps)
+ if use_quantized_matmul:
+ svd_up = svd_up.t_()
+ svd_down = svd_down.t_()
+ except Exception:
+ svd_up, svd_down = None, None
else:
svd_up, svd_down = None, None
@@ -210,9 +213,9 @@ def sdnq_quantize_layer(layer, weights_dtype="int8", torch_dtype=None, group_siz
if use_quantized_matmul and dtype_dict[weights_dtype]["num_bits"] >= 6:
group_size = -1
elif is_linear_type:
- group_size = 2 ** ((2 if not use_svd else 3) + dtype_dict[weights_dtype]["num_bits"])
+ group_size = 2 ** ((2 if svd_up is None else 3) + dtype_dict[weights_dtype]["num_bits"])
else:
- group_size = 2 ** ((1 if not use_svd else 2) + dtype_dict[weights_dtype]["num_bits"])
+ group_size = 2 ** ((1 if svd_up is None else 2) + dtype_dict[weights_dtype]["num_bits"])
elif use_quantized_matmul and dtype_dict[weights_dtype]["num_bits"] == 8:
group_size = -1 # override user value, re-quantizing 8bit into 8bit is pointless
elif group_size != -1 and not is_linear_type:
diff --git a/modules/shared_state.py b/modules/shared_state.py
index d79e90b86..f575f4482 100644
--- a/modules/shared_state.py
+++ b/modules/shared_state.py
@@ -194,7 +194,7 @@ class State:
def begin(self, title="", task_id=0, api=None):
import modules.devices
self.clear()
- self.interrupted = False
+ self.interrupted = self.interrupted if title.startswith('Save') else False
self.skipped = False
self.job_history += 1
self.total_jobs += 1
diff --git a/modules/ui_guidance.py b/modules/ui_guidance.py
index a6ea46391..72ca8500d 100644
--- a/modules/ui_guidance.py
+++ b/modules/ui_guidance.py
@@ -15,7 +15,7 @@ def create_guidance_inputs(tab):
guidance_btn = ui_components.ToolButton(value=ui_symbols.book, elem_id=f"{tab}_guider_docs")
guidance_btn.click(fn=None, _js='getGuidanceDocs', inputs=[guidance_name], outputs=[])
with gr.Row(visible=shared.opts.model_modular_enable):
- guidance_scale = gr.Slider(minimum=0.0, maximum=30.0, step=0.1, label='_Guidance scale', value=6.0, elem_id=f"{tab}_guidance_scale")
+ guidance_scale = gr.Slider(minimum=1.0, maximum=30.0, step=0.1, label='_Guidance scale', value=6.0, elem_id=f"{tab}_guidance_scale")
guidance_rescale = gr.Slider(minimum=0.0, maximum=1.0, step=0.05, label='_Guidance rescale', value=0.0, elem_id=f"{tab}_guidance_rescale")
with gr.Row(visible=shared.opts.model_modular_enable):
guidance_start = gr.Slider(minimum=0.0, maximum=1.0, step=0.05, label='_Guidance start', value=0.0, elem_id=f"{tab}_guidance_start")
@@ -114,7 +114,7 @@ def create_guidance_inputs(tab):
gr.HTML(value='