diff --git a/CHANGELOG.md b/CHANGELOG.md index a175bfdb8..7deaed7a1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,25 @@ # Change Log for SD.Next +## Update for 2025-10-18 + +- **Models** + [Kandinsky 5 Lite](https://huggingface.co/ai-forever/Kandinsky-5.0-T2V-Lite-sft-5s-Diffusers') in *SFT, CFG-distilled and Steps-distilled* variants + first model in Kandinsky5 series is T2V model optimized for 5sec videos and uses Qwen2.5 text encoder +- **Fixes** + - ROCm-on-Windows additional checks + - SDNQ-SVD fallback on incompatible layers + - Huggingface model download + - Video implement dynamic and manual sampler shift + - Fix interrupt batch processing + - Delay import of control processors until used + - Fix tiny VAE with batched results + - Fix CFG scale not added to metadata and set valid range to >=1.0 +- **Other** + - Optimized Video tab layout + - Video enable VAE slicing and framewise decoding when possible + - Detect and log `flash-attn` and `sageattention` if installed + - Remove unused UI settings + ## Update for 2025-10-17 ### Highlights for 2025-10-17 @@ -15,7 +35,11 @@ Highlight are: - **Quantization**: new **SVD**-style quantization using SDNQ offers almost zero-loss even with **4bit** quantization and now you can also test your favorite quantization on-the-fly and then save/load model for future use -- Other: support for **Huggingface** mirrors, changes to installer to prevent unwanted `torch-cpu` operations, improved previews, etc. +- Other: support for **Huggingface** mirrors, changes to installer to prevent unwanted `torch-cpu` operations, improved VAE previews, etc. + +![Screenshot](https://github.com/user-attachments/assets/d6119a63-6ee5-4597-95f6-29ed0701d3b5) + +[ReadMe](https://github.com/vladmandic/automatic/blob/master/README.md) | [ChangeLog](https://github.com/vladmandic/automatic/blob/master/CHANGELOG.md) | [Docs](https://vladmandic.github.io/sdnext-docs/) | [WiKi](https://github.com/vladmandic/automatic/wiki) | [Discord](https://discord.com/invite/sd-next-federal-batch-inspectors-1101998836328697867) | [Sponsor](https://github.com/sponsors/vladmandic) ### Details for 2025-10-17 diff --git a/TODO.md b/TODO.md index 5bdc45124..56ece93cd 100644 --- a/TODO.md +++ b/TODO.md @@ -4,6 +4,7 @@ Main ToDo list can be found at [GitHub projects](https://github.com/users/vladma ## Future Candidates +- Transformers unified cache handler - Remote TE - [Canvas](https://konvajs.org/) - Refactor: [Modular pipelines and guiders](https://github.com/huggingface/diffusers/issues/11915) @@ -23,10 +24,8 @@ Main ToDo list can be found at [GitHub projects](https://github.com/users/vladma - [SmoothCache](https://github.com/huggingface/diffusers/issues/11135) - [MagCache](https://github.com/lllyasviel/FramePack/pull/673/files) - [Dream0 guidance](https://huggingface.co/ByteDance/DreamO) -- [SUPIR upscaler](https://github.com/Fanghua-Yu/SUPIR) - [ByteDance OneReward](https://github.com/bytedance/OneReward) - [ByteDance USO](https://github.com/bytedance/USO) -- Remove: `Agent Scheduler` - Remove: `CodeFormer` - Remove: `GFPGAN` - ModernUI: Lite vs Expert mode diff --git a/extensions-builtin/sdnext-modernui b/extensions-builtin/sdnext-modernui index db79b18f6..8f6427aa0 160000 --- a/extensions-builtin/sdnext-modernui +++ b/extensions-builtin/sdnext-modernui @@ -1 +1 @@ -Subproject commit db79b18f6f3f5a247e710b507d10cb39b01cc371 +Subproject commit 8f6427aa037b654ae664a0197c794e48fdbbc648 diff --git a/installer.py b/installer.py index 835478690..71ec4cbe0 100644 --- a/installer.py +++ b/installer.py @@ -608,7 +608,7 @@ def check_diffusers(): if args.skip_git: install('diffusers') return - sha = 'af769881d37fe916afef2c47279f66c79f5f2714' # diffusers commit hash + sha = '23ebbb4bc81a17ebea17cb7cb94f301199e49a7f' # diffusers commit hash # if args.use_rocm or args.use_zluda or args.use_directml: # sha = '043ab2520f6a19fce78e6e060a68dbc947edb9f9' # lock diffusers versions for now pkg = pkg_resources.working_set.by_key.get('diffusers', None) @@ -682,11 +682,8 @@ def install_rocm_zluda(): amd_gpus = [] try: - if sys.platform == "win32" and not rocm.is_installed: - amd_gpus = rocm.driver_get_agents() - else: - amd_gpus = rocm.get_agents() - log.info('ROCm: AMD toolkit detected') + amd_gpus = rocm.get_agents() + log.info('ROCm: AMD toolkit detected') except Exception as e: log.warning(f'ROCm agent enumerator failed: {e}') @@ -712,10 +709,13 @@ def install_rocm_zluda(): if device_id < len(amd_gpus): device = amd_gpus[device_id] - if sys.platform == "win32" and args.use_rocm and not rocm.is_installed: + if sys.platform == "win32" and args.use_rocm and not rocm.is_installed and device is not None: check_python(supported_minors=[11, 12, 13], reason='ROCm backend requires a Python version between 3.11 and 3.13') - install(f"rocm rocm-sdk-core --index-url https://rocm.nightlies.amd.com/v2-staging/{device.therock}") - rocm.refresh() + if device.therock is None: + log.warning('No supported ROCm agent was found. Skipping ROCm package installation.') + else: + install(f"rocm rocm-sdk-core --index-url https://rocm.nightlies.amd.com/v2-staging/{device.therock}") + rocm.refresh() msg = f'ROCm: version={rocm.version}' if device is not None: @@ -724,7 +724,9 @@ def install_rocm_zluda(): if sys.platform == "win32": if args.use_rocm: # TODO install: switch to pytorch source when it becomes available - if device is not None and isinstance(rocm.environment, rocm.PythonPackageEnvironment): # TheRock + if device is None: + log.warning('No ROCm agent was found. Please make sure that graphics driver is installed and up to date.') + if isinstance(rocm.environment, rocm.PythonPackageEnvironment): check_python(supported_minors=[11, 12, 13], reason='ROCm backend requires a Python version between 3.11 and 3.13') torch_command = os.environ.get('TORCH_COMMAND', f'torch torchvision --index-url https://rocm.nightlies.amd.com/v2-staging/{device.therock}') else: @@ -885,7 +887,6 @@ def check_torch(): if args.profile: pr = cProfile.Profile() pr.enable() - from modules import rocm allow_cuda = not (args.use_rocm or args.use_directml or args.use_ipex or args.use_openvino) allow_rocm = not (args.use_cuda or args.use_directml or args.use_ipex or args.use_openvino) allow_ipex = not (args.use_cuda or args.use_rocm or args.use_directml or args.use_openvino) @@ -902,11 +903,17 @@ def check_torch(): log.error('DirectML is only supported on Windows') if torch_command != '': - pass + is_cuda_available = False + is_ipex_available = False + is_rocm_available = False else: - is_cuda_available = allow_cuda and (args.use_cuda or shutil.which('nvidia-smi') is not None or args.use_xformers or os.path.exists(os.path.join(os.environ.get('SystemRoot') or r'C:\Windows', 'System32', 'nvidia-smi.exe'))) - is_rocm_available = allow_rocm and (args.use_rocm or args.use_zluda or rocm.is_installed) + is_cuda_available = allow_cuda and (args.use_cuda or shutil.which('nvidia-smi') is not None or os.path.exists(os.path.join(os.environ.get('SystemRoot') or r'C:\Windows', 'System32', 'nvidia-smi.exe'))) is_ipex_available = allow_ipex and (args.use_ipex or shutil.which('sycl-ls') is not None or shutil.which('sycl-ls.exe') is not None or os.environ.get('ONEAPI_ROOT') is not None or os.path.exists('/opt/intel/oneapi') or os.path.exists("C:/Program Files (x86)/Intel/oneAPI") or os.path.exists("C:/oneAPI") or os.path.exists("C:/Program Files/Intel/Intel Graphics Software")) + is_rocm_available = False + + if not is_cuda_available and not is_ipex_available and allow_rocm: + from modules import rocm + is_rocm_available = allow_rocm and (args.use_rocm or args.use_zluda or rocm.is_installed) # late eval to avoid unnecessary import if is_cuda_available and args.use_cuda: # prioritize cuda torch_command = install_cuda() @@ -935,6 +942,7 @@ def check_torch(): else: log.warning('Torch: CPU-only version installed') torch_command = os.environ.get('TORCH_COMMAND', 'torch torchvision') + if args.version: return @@ -994,7 +1002,7 @@ def check_torch(): if not args.ignore: sys.exit(1) - if rocm.is_installed: + if is_rocm_available: rocm.postinstall() if not args.skip_all: install_torch_addons() diff --git a/modules/control/processors.py b/modules/control/processors.py index 7cb8f8833..24002a4fb 100644 --- a/modules/control/processors.py +++ b/modules/control/processors.py @@ -6,27 +6,6 @@ from installer import log from modules.errors import display from modules import devices, images -from modules.control.proc.hed import HEDdetector -from modules.control.proc.canny import CannyDetector -from modules.control.proc.edge import EdgeDetector -from modules.control.proc.lineart import LineartDetector -from modules.control.proc.lineart_anime import LineartAnimeDetector -from modules.control.proc.pidi import PidiNetDetector -from modules.control.proc.mediapipe_face import MediapipeFaceDetector -from modules.control.proc.shuffle import ContentShuffleDetector -from modules.control.proc.leres import LeresDetector -from modules.control.proc.midas import MidasDetector -from modules.control.proc.mlsd import MLSDdetector -from modules.control.proc.normalbae import NormalBaeDetector -from modules.control.proc.openpose import OpenposeDetector -from modules.control.proc.dwpose import DWposeDetector -from modules.control.proc.segment_anything import SamDetector -from modules.control.proc.zoe import ZoeDetector -from modules.control.proc.marigold import MarigoldDetector -from modules.control.proc.dpt import DPTDetector -from modules.control.proc.glpn import GLPNDetector -from modules.control.proc.depth_anything import DepthAnythingDetector - models = {} cache_dir = 'models/control/processors' @@ -36,36 +15,92 @@ config = { # placeholder 'None': {}, # pose models - 'OpenPose': {'class': OpenposeDetector, 'checkpoint': True, 'params': {'include_body': True, 'include_hand': False, 'include_face': False}}, - 'DWPose': {'class': DWposeDetector, 'checkpoint': False, 'model': 'Tiny', 'params': {'min_confidence': 0.3}}, - 'MediaPipe Face': {'class': MediapipeFaceDetector, 'checkpoint': False, 'params': {'max_faces': 1, 'min_confidence': 0.5}}, + 'OpenPose': {'class': None, 'checkpoint': True, 'params': {'include_body': True, 'include_hand': False, 'include_face': False}}, + 'DWPose': {'class': None, 'checkpoint': False, 'model': 'Tiny', 'params': {'min_confidence': 0.3}}, + 'MediaPipe Face': {'class': None, 'checkpoint': False, 'params': {'max_faces': 1, 'min_confidence': 0.5}}, # outline models - 'Canny': {'class': CannyDetector, 'checkpoint': False, 'params': {'low_threshold': 100, 'high_threshold': 200}}, - 'Edge': {'class': EdgeDetector, 'checkpoint': False, 'params': {'pf': True, 'mode': 'edge'}}, - 'LineArt Realistic': {'class': LineartDetector, 'checkpoint': True, 'params': {'coarse': False}}, - 'LineArt Anime': {'class': LineartAnimeDetector, 'checkpoint': True, 'params': {}}, - 'HED': {'class': HEDdetector, 'checkpoint': True, 'params': {'scribble': False, 'safe': False}}, - 'PidiNet': {'class': PidiNetDetector, 'checkpoint': True, 'params': {'scribble': False, 'safe': False, 'apply_filter': False}}, + 'Canny': {'class': None, 'checkpoint': False, 'params': {'low_threshold': 100, 'high_threshold': 200}}, + 'Edge': {'class': None, 'checkpoint': False, 'params': {'pf': True, 'mode': 'edge'}}, + 'LineArt Realistic': {'class': None, 'checkpoint': True, 'params': {'coarse': False}}, + 'LineArt Anime': {'class': None, 'checkpoint': True, 'params': {}}, + 'HED': {'class': None, 'checkpoint': True, 'params': {'scribble': False, 'safe': False}}, + 'PidiNet': {'class': None, 'checkpoint': True, 'params': {'scribble': False, 'safe': False, 'apply_filter': False}}, # depth models - 'Midas Depth Hybrid': {'class': MidasDetector, 'checkpoint': True, 'params': {'bg_th': 0.1, 'depth_and_normal': False}}, - 'Leres Depth': {'class': LeresDetector, 'checkpoint': True, 'params': {'boost': False, 'thr_a':0, 'thr_b':0}}, - 'Zoe Depth': {'class': ZoeDetector, 'checkpoint': True, 'params': {'gamma_corrected': False}, 'load_config': {'pretrained_model_or_path': 'halffried/gyre_zoedepth', 'filename': 'ZoeD_M12_N.safetensors', 'model_type': "zoedepth"}}, - 'Marigold Depth': {'class': MarigoldDetector, 'checkpoint': True, 'params': {'denoising_steps': 10, 'ensemble_size': 10, 'processing_res': 512, 'match_input_res': True, 'color_map': 'None'}, 'load_config': {'pretrained_model_or_path': 'Bingxin/Marigold'}}, - 'Normal Bae': {'class': NormalBaeDetector, 'checkpoint': True, 'params': {}}, + 'Midas Depth Hybrid': {'class': None, 'checkpoint': True, 'params': {'bg_th': 0.1, 'depth_and_normal': False}}, + 'Leres Depth': {'class': None, 'checkpoint': True, 'params': {'boost': False, 'thr_a':0, 'thr_b':0}}, + 'Zoe Depth': {'class': None, 'checkpoint': True, 'params': {'gamma_corrected': False}, 'load_config': {'pretrained_model_or_path': 'halffried/gyre_zoedepth', 'filename': 'ZoeD_M12_N.safetensors', 'model_type': "zoedepth"}}, + 'Marigold Depth': {'class': None, 'checkpoint': True, 'params': {'denoising_steps': 10, 'ensemble_size': 10, 'processing_res': 512, 'match_input_res': True, 'color_map': 'None'}, 'load_config': {'pretrained_model_or_path': 'Bingxin/Marigold'}}, + 'Normal Bae': {'class': None, 'checkpoint': True, 'params': {}}, # segmentation models - 'SegmentAnything': {'class': SamDetector, 'checkpoint': True, 'model': 'Base', 'params': {}}, + 'SegmentAnything': {'class': None, 'checkpoint': True, 'model': 'Base', 'params': {}}, # other models - 'MLSD': {'class': MLSDdetector, 'checkpoint': True, 'params': {'thr_v': 0.1, 'thr_d': 0.1}}, - 'Shuffle': {'class': ContentShuffleDetector, 'checkpoint': False, 'params': {}}, - 'DPT Depth Hybrid': {'class': DPTDetector, 'checkpoint': False, 'params': {}}, - 'GLPN Depth': {'class': GLPNDetector, 'checkpoint': False, 'params': {}}, - 'Depth Anything': {'class': DepthAnythingDetector, 'checkpoint': True, 'load_config': {'pretrained_model_or_path': 'LiheYoung/depth_anything_vitl14' }, 'params': { 'color_map': 'inferno' }}, + 'MLSD': {'class': None, 'checkpoint': True, 'params': {'thr_v': 0.1, 'thr_d': 0.1}}, + 'Shuffle': {'class': None, 'checkpoint': False, 'params': {}}, + 'DPT Depth Hybrid': {'class': None, 'checkpoint': False, 'params': {}}, + 'GLPN Depth': {'class': None, 'checkpoint': False, 'params': {}}, + 'Depth Anything': {'class': None, 'checkpoint': True, 'load_config': {'pretrained_model_or_path': 'LiheYoung/depth_anything_vitl14' }, 'params': { 'color_map': 'inferno' }}, # 'Midas Depth Large': {'class': MidasDetector, 'checkpoint': True, 'params': {'bg_th': 0.1, 'depth_and_normal': False}, 'load_config': {'pretrained_model_or_path': 'Intel/dpt-large', 'model_type': "dpt_large", 'filename': ''}}, # 'Zoe Depth Zoe': {'class': ZoeDetector, 'checkpoint': True, 'params': {}}, # 'Zoe Depth NK': {'class': ZoeDetector, 'checkpoint': True, 'params': {}, 'load_config': {'pretrained_model_or_path': 'halffried/gyre_zoedepth', 'filename': 'ZoeD_M12_NK.safetensors', 'model_type': "zoedepth_nk"}}, } +def delay_load_config(): + global config # pylint: disable=global-statement + from modules.control.proc.hed import HEDdetector + from modules.control.proc.canny import CannyDetector + from modules.control.proc.edge import EdgeDetector + from modules.control.proc.lineart import LineartDetector + from modules.control.proc.lineart_anime import LineartAnimeDetector + from modules.control.proc.pidi import PidiNetDetector + from modules.control.proc.mediapipe_face import MediapipeFaceDetector + from modules.control.proc.shuffle import ContentShuffleDetector + from modules.control.proc.leres import LeresDetector + from modules.control.proc.midas import MidasDetector + from modules.control.proc.mlsd import MLSDdetector + from modules.control.proc.normalbae import NormalBaeDetector + from modules.control.proc.openpose import OpenposeDetector + from modules.control.proc.dwpose import DWposeDetector + from modules.control.proc.segment_anything import SamDetector + from modules.control.proc.zoe import ZoeDetector + from modules.control.proc.marigold import MarigoldDetector + from modules.control.proc.dpt import DPTDetector + from modules.control.proc.glpn import GLPNDetector + from modules.control.proc.depth_anything import DepthAnythingDetector + config = { + # placeholder + 'None': {}, + # pose models + 'OpenPose': {'class': OpenposeDetector, 'checkpoint': True, 'params': {'include_body': True, 'include_hand': False, 'include_face': False}}, + 'DWPose': {'class': DWposeDetector, 'checkpoint': False, 'model': 'Tiny', 'params': {'min_confidence': 0.3}}, + 'MediaPipe Face': {'class': MediapipeFaceDetector, 'checkpoint': False, 'params': {'max_faces': 1, 'min_confidence': 0.5}}, + # outline models + 'Canny': {'class': CannyDetector, 'checkpoint': False, 'params': {'low_threshold': 100, 'high_threshold': 200}}, + 'Edge': {'class': EdgeDetector, 'checkpoint': False, 'params': {'pf': True, 'mode': 'edge'}}, + 'LineArt Realistic': {'class': LineartDetector, 'checkpoint': True, 'params': {'coarse': False}}, + 'LineArt Anime': {'class': LineartAnimeDetector, 'checkpoint': True, 'params': {}}, + 'HED': {'class': HEDdetector, 'checkpoint': True, 'params': {'scribble': False, 'safe': False}}, + 'PidiNet': {'class': PidiNetDetector, 'checkpoint': True, 'params': {'scribble': False, 'safe': False, 'apply_filter': False}}, + # depth models + 'Midas Depth Hybrid': {'class': MidasDetector, 'checkpoint': True, 'params': {'bg_th': 0.1, 'depth_and_normal': False}}, + 'Leres Depth': {'class': LeresDetector, 'checkpoint': True, 'params': {'boost': False, 'thr_a':0, 'thr_b':0}}, + 'Zoe Depth': {'class': ZoeDetector, 'checkpoint': True, 'params': {'gamma_corrected': False}, 'load_config': {'pretrained_model_or_path': 'halffried/gyre_zoedepth', 'filename': 'ZoeD_M12_N.safetensors', 'model_type': "zoedepth"}}, + 'Marigold Depth': {'class': MarigoldDetector, 'checkpoint': True, 'params': {'denoising_steps': 10, 'ensemble_size': 10, 'processing_res': 512, 'match_input_res': True, 'color_map': 'None'}, 'load_config': {'pretrained_model_or_path': 'Bingxin/Marigold'}}, + 'Normal Bae': {'class': NormalBaeDetector, 'checkpoint': True, 'params': {}}, + # segmentation models + 'SegmentAnything': {'class': SamDetector, 'checkpoint': True, 'model': 'Base', 'params': {}}, + # other models + 'MLSD': {'class': MLSDdetector, 'checkpoint': True, 'params': {'thr_v': 0.1, 'thr_d': 0.1}}, + 'Shuffle': {'class': ContentShuffleDetector, 'checkpoint': False, 'params': {}}, + 'DPT Depth Hybrid': {'class': DPTDetector, 'checkpoint': False, 'params': {}}, + 'GLPN Depth': {'class': GLPNDetector, 'checkpoint': False, 'params': {}}, + 'Depth Anything': {'class': DepthAnythingDetector, 'checkpoint': True, 'load_config': {'pretrained_model_or_path': 'LiheYoung/depth_anything_vitl14' }, 'params': { 'color_map': 'inferno' }}, + # 'Midas Depth Large': {'class': MidasDetector, 'checkpoint': True, 'params': {'bg_th': 0.1, 'depth_and_normal': False}, 'load_config': {'pretrained_model_or_path': 'Intel/dpt-large', 'model_type': "dpt_large", 'filename': ''}}, + # 'Zoe Depth Zoe': {'class': ZoeDetector, 'checkpoint': True, 'params': {}}, + # 'Zoe Depth NK': {'class': ZoeDetector, 'checkpoint': True, 'params': {}, 'load_config': {'pretrained_model_or_path': 'halffried/gyre_zoedepth', 'filename': 'ZoeD_M12_NK.safetensors', 'model_type': "zoedepth_nk"}}, + } + + def list_models(refresh=False): global models # pylint: disable=global-statement if not refresh and len(models) > 0: @@ -178,6 +213,9 @@ class Processor(): log.error(f'Control Processor unknown: id="{processor_id}" available={list(config)}') return f'Processor failed to load: {processor_id}' cls = config[processor_id]['class'] + if cls is None: + delay_load_config() + cls = config[processor_id]['class'] # log.debug(f'Control Processor loading: id="{processor_id}" class={cls.__name__}') debug(f'Control Processor config={self.load_config}') jobid = state.begin('Load processor') diff --git a/modules/control/run.py b/modules/control/run.py index b62cad89f..0bdfdc994 100644 --- a/modules/control/run.py +++ b/modules/control/run.py @@ -589,7 +589,7 @@ def control_run(state: str = '', # pylint: disable=keyword-arg-before-vararg if p.scripts is not None: processed = p.scripts.after(p, processed, *p.script_args) output = None - if processed is not None: + if processed is not None and processed.images is not None: output = processed.images info_txt = [processed.infotext(p, i) for i in range(len(output))] diff --git a/modules/devices.py b/modules/devices.py index a98b53d02..961ffb384 100644 --- a/modules/devices.py +++ b/modules/devices.py @@ -434,7 +434,7 @@ def set_sdpa_params(): torch.backends.cuda.enable_math_sdp('Math attention' in opts.sdp_options) if hasattr(torch.backends.cuda, "allow_fp16_bf16_reduction_math_sdp"): # only valid for torch >= 2.5 torch.backends.cuda.allow_fp16_bf16_reduction_math_sdp(True) - log.debug(f'Torch attention: type="sdpa" flash={"Flash attention" in opts.sdp_options} memory={"Memory attention" in opts.sdp_options} math={"Math attention" in opts.sdp_options}') + log.debug(f'Torch attention: type="sdpa" opts={opts.sdp_options}') except Exception as err: log.warning(f'Torch attention: type="sdpa" {err}') @@ -447,7 +447,6 @@ def set_sdpa_params(): sdpa_pre_dyanmic_atten = torch.nn.functional.scaled_dot_product_attention from modules.sd_hijack_dynamic_atten import dynamic_scaled_dot_product_attention torch.nn.functional.scaled_dot_product_attention = dynamic_scaled_dot_product_attention - log.debug('Torch attention: type="dynamic attention"') except Exception as err: log.error(f'Torch attention: type="dynamic attention" {err}') @@ -542,6 +541,17 @@ def set_sdpa_params(): log.debug('Torch attention: type="sage attention"') except Exception as err: log.error(f'Torch attention: type="sage attention" {err}') + + from importlib.metadata import version + try: + flash = version('flash-attn') + except Exception: + flash = False + try: + sage = version('sageattention') + except Exception: + sage = False + log.info(f'Torch attention: flashattn={flash} sageattention={sage}') except Exception as e: log.warning(f'Torch SDPA: {e}') diff --git a/modules/face/instantid.py b/modules/face/instantid.py index 3cc613b3d..158c2f577 100644 --- a/modules/face/instantid.py +++ b/modules/face/instantid.py @@ -22,7 +22,7 @@ def instant_id(p: processing.StableDiffusionProcessing, app, source_images, stre return None c = shared.sd_model.__class__.__name__ if shared.sd_loaded else '' - if c != 'StableDiffusionXLPipeline' and c != 'StableDiffusionXLInstantIDPipeline': + if c not in ['StableDiffusionXLPipeline', 'StableDiffusionXLInstantIDPipeline']: shared.log.warning(f'InstantID invalid base model: current={c} required=StableDiffusionXLPipeline') return None diff --git a/modules/framepack/framepack_ui.py b/modules/framepack/framepack_ui.py index 305fddc38..de1ab5347 100644 --- a/modules/framepack/framepack_ui.py +++ b/modules/framepack/framepack_ui.py @@ -1,6 +1,5 @@ import gradio as gr -from modules import ui_sections, ui_common, ui_video_vlm -from modules.video_models.video_utils import get_codecs +from modules import ui_sections, ui_video_vlm from modules.framepack import framepack_load from modules.framepack.framepack_worker import get_latent_paddings from modules.framepack.framepack_wrappers import load_model, unload_model @@ -13,7 +12,7 @@ def change_sections(duration, mp4_fps, mp4_interpolate, latent_ws, variant): return gr.update(value=f'Target video: {num_frames} frames in {num_sections} sections'), gr.update(lines=max(2, 2*num_sections//3)) -def create_ui(prompt, negative, styles, _overrides): +def create_ui(prompt, negative, styles, _overrides, init_image, last_image, mp4_fps, mp4_interpolate, mp4_codec, mp4_ext, mp4_opt, mp4_video, mp4_frames, mp4_sf): with gr.Row(): with gr.Column(variant='compact', elem_id="framepack_settings", elem_classes=['settings-column'], scale=1): with gr.Row(): @@ -28,25 +27,12 @@ def create_ui(prompt, negative, styles, _overrides): with gr.Row(): section_html = gr.HTML(show_label=False, elem_id="framepack_section_html") with gr.Accordion(label="Inputs", open=False): - with gr.Row(): - input_image = gr.Image(sources='upload', type="numpy", label="FP init image", width=256, height=256, interactive=True, tool="editor", image_mode='RGB', elem_id="framepack_input_image") - end_image = gr.Image(sources='upload', type="numpy", label="FP end image", width=256, height=256, interactive=True, tool="editor", image_mode='RGB', elem_id="framepack_end_image") with gr.Row(): start_weight = gr.Slider(label="FP init strength", value=1.0, minimum=0.0, maximum=2.0, step=0.05, elem_id="framepack_start_weight") end_weight = gr.Slider(label="FP end strength", value=1.0, minimum=0.0, maximum=2.0, step=0.05, elem_id="framepack_end_weight") vision_weight = gr.Slider(label="FP vision strength", value=1.0, minimum=0.0, maximum=2.0, step=0.05, elem_id="framepack_vision_weight") with gr.Accordion(label="Sections", open=False): section_prompt = gr.Textbox(label="FP section prompts", elem_id="framepack_section_prompt", lines=2, placeholder="Optional one-line prompt suffix per each video section", interactive=True) - with gr.Accordion(label="Video", open=False): - with gr.Row(): - mp4_codec = gr.Dropdown(label="FP codec", choices=['none', 'libx264'], value='libx264', type='value') - ui_common.create_refresh_button(mp4_codec, get_codecs, elem_id="framepack_mp4_codec_refresh") - mp4_ext = gr.Textbox(label="FP format", value='mp4', elem_id="framepack_mp4_ext") - mp4_opt = gr.Textbox(label="FP options", value='crf:16', elem_id="framepack_mp4_ext") - with gr.Row(): - mp4_video = gr.Checkbox(label='FP save video', value=True, elem_id="framepack_mp4_video") - mp4_frames = gr.Checkbox(label='FP save frames', value=False, elem_id="framepack_mp4_frames") - mp4_sf = gr.Checkbox(label='FP save safetensors', value=False, elem_id="framepack_mp4_sf") with gr.Accordion(label="Advanced", open=False): seed = ui_sections.create_seed_inputs('control', reuse_visible=False, subseed_visible=False, accordion=False)[0] latent_ws = gr.Slider(label="FP latent window size", minimum=1, maximum=33, value=9, step=1) @@ -58,7 +44,7 @@ def create_ui(prompt, negative, styles, _overrides): cfg_distilled = gr.Slider(label="FP distilled CFG scale", minimum=1.0, maximum=32.0, value=10.0, step=0.01) cfg_rescale = gr.Slider(label="FP CFG re-scale", minimum=0.0, maximum=1.0, value=0.0, step=0.01) - vlm_enhance, vlm_model, vlm_system_prompt = ui_video_vlm.create_ui(prompt_element=prompt, image_element=input_image) + vlm_enhance, vlm_model, vlm_system_prompt = ui_video_vlm.create_ui(prompt_element=prompt, image_element=init_image) with gr.Accordion(label="Model", open=False): with gr.Row(): @@ -108,7 +94,7 @@ def create_ui(prompt, negative, styles, _overrides): receipe_reset.click(fn=framepack_load.reset_model, inputs=[], outputs=[receipe]) framepack_inputs=[ - input_image, end_image, + init_image, last_image, start_weight, end_weight, vision_weight, prompt, system_prompt, optimized_prompt, section_prompt, negative, styles, seed, diff --git a/modules/ltx/ltx_process.py b/modules/ltx/ltx_process.py index 7d9e6ae84..b45298854 100644 --- a/modules/ltx/ltx_process.py +++ b/modules/ltx/ltx_process.py @@ -37,6 +37,7 @@ def run_ltx(task_id, refine_strength:float, condition_strength: float, condition_image, + condition_last, condition_files, condition_video, condition_video_frames:int, @@ -100,11 +101,16 @@ def run_ltx(task_id, ) p.ops.append('video') + condition_images = [] + if condition_image is not None: + condition_images.append(condition_image) + if condition_last is not None: + condition_images.append(condition_last) conditions = get_conditions( width, height, condition_strength, - condition_image, + condition_images, condition_files, condition_video, condition_video_frames, diff --git a/modules/ltx/ltx_ui.py b/modules/ltx/ltx_ui.py index 04cea1d1e..6ac23bf86 100644 --- a/modules/ltx/ltx_ui.py +++ b/modules/ltx/ltx_ui.py @@ -1,8 +1,6 @@ import os import gradio as gr -from modules import shared, ui_sections, ui_symbols, ui_common -from modules.ui_components import ToolButton -from modules.video_models.video_utils import get_codecs +from modules import shared, ui_sections from modules.video_models.models_def import models from modules.ltx import ltx_process @@ -10,7 +8,7 @@ from modules.ltx import ltx_process debug = shared.log.trace if os.environ.get('SD_VIDEO_DEBUG', None) is not None else lambda *args, **kwargs: None -def create_ui(prompt, negative, styles, overrides): +def create_ui(prompt, negative, styles, overrides, init_image, init_strength, last_image, mp4_fps, mp4_interpolate, mp4_codec, mp4_ext, mp4_opt, mp4_video, mp4_frames, mp4_sf, width, height, frames, seed): with gr.Row(): with gr.Column(variant='compact', elem_id="ltx_settings", elem_classes=['settings-column'], scale=1): with gr.Row(): @@ -18,18 +16,8 @@ def create_ui(prompt, negative, styles, overrides): with gr.Row(): ltx_models = [m.name for m in models['LTX Video']] model = gr.Dropdown(label='LTX model', choices=ltx_models, value=ltx_models[0]) - with gr.Accordion(open=True, label="LTX size", elem_id='ltx_generate_accordion'): - with gr.Row(): - width, height = ui_sections.create_resolution_inputs('ltx', default_width=832, default_height=480) - with gr.Row(): - frames = gr.Slider(label='LTX frames', minimum=1, maximum=513, step=1, value=17, elem_id="ltx_frames") - seed = gr.Number(label='LTX seed', value=-1, elem_id="ltx_seed", container=True) - random_seed = ToolButton(ui_symbols.random, elem_id="ltx_seed_random") with gr.Accordion(open=False, label="Condition", elem_id='ltx_condition_accordion'): - condition_strength = gr.Slider(label='LTX condition strength', minimum=0.1, maximum=1.0, step=0.05, value=0.8, elem_id="ltx_condition_image_strength") with gr.Tabs(): - with gr.Tab('Image', id='ltx_condition_image_tab'): - condition_image = gr.Image(sources='upload', type="pil", label="Image", width=256, height=256, interactive=True, tool="editor", image_mode='RGB', elem_id="ltx_condition_image") with gr.Tab('Video', id='ltx_condition_video_tab'): condition_video = gr.Video(label='Video', type='filepath', elem_id="ltx_condition_video", width=256, height=256, source='upload') with gr.Row(): @@ -45,19 +33,6 @@ def create_ui(prompt, negative, styles, overrides): with gr.Row(): refine_enable = gr.Checkbox(label='LTX enable refine', value=False, elem_id="ltx_refine_enable") refine_strength = gr.Slider(label='LTX refine strength', minimum=0.1, maximum=1.0, step=0.05, value=0.4, elem_id="ltx_refine_strength") - with gr.Accordion(label="Video", open=False): - with gr.Row(): - mp4_fps = gr.Slider(label="FPS", minimum=1, maximum=60, value=24, step=1) - mp4_interpolate = gr.Slider(label="LTX interpolation", minimum=0, maximum=10, value=0, step=1) - with gr.Row(): - mp4_codec = gr.Dropdown(label="LTX codec", choices=['none', 'libx264'], value='libx264', type='value') - ui_common.create_refresh_button(mp4_codec, get_codecs, elem_id="framepack_mp4_codec_refresh") - mp4_ext = gr.Textbox(label="LTX format", value='mp4', elem_id="framepack_mp4_ext") - mp4_opt = gr.Textbox(label="LTX options", value='crf:16', elem_id="framepack_mp4_ext") - with gr.Row(): - mp4_video = gr.Checkbox(label='LTX save video', value=True, elem_id="framepack_mp4_video") - mp4_frames = gr.Checkbox(label='LTX save frames', value=False, elem_id="framepack_mp4_frames") - mp4_sf = gr.Checkbox(label='LTX save safetensors', value=False, elem_id="framepack_mp4_sf") with gr.Accordion(open=False, label="Advanced", elem_id='ltx_parameters_accordion'): steps, sampler_index = ui_sections.create_sampler_and_steps_selection(None, "ltx", default_steps=50) with gr.Row(): @@ -71,7 +46,6 @@ def create_ui(prompt, negative, styles, overrides): with gr.Row(): text = gr.HTML('', elem_id='ltx_generation_info', show_label=False) - random_seed.click(fn=lambda: -1, show_progress=False, inputs=[], outputs=[seed]) task_id = gr.Textbox(visible=False, value='') ui_state = gr.Textbox(visible=False, value='') state_inputs = [task_id, ui_state] @@ -83,7 +57,7 @@ def create_ui(prompt, negative, styles, overrides): steps, sampler_index, seed, upsample_enable, upsample_ratio, refine_enable, refine_strength, - condition_strength, condition_image, condition_files, condition_video, condition_video_frames, condition_video_skip, + init_strength, init_image, last_image, condition_files, condition_video, condition_video_frames, condition_video_skip, decode_timestep, image_cond_noise_scale, mp4_fps, mp4_interpolate, mp4_codec, mp4_ext, mp4_opt, mp4_video, mp4_frames, mp4_sf, overrides, diff --git a/modules/ltx/ltx_util.py b/modules/ltx/ltx_util.py index ddc53a323..a329373fd 100644 --- a/modules/ltx/ltx_util.py +++ b/modules/ltx/ltx_util.py @@ -54,19 +54,20 @@ def load_upsample(upsample_pipe, upsample_repo_id): return upsample_pipe -def get_conditions(width, height, condition_strength, condition_image, condition_files, condition_video, condition_video_frames, condition_video_skip): +def get_conditions(width, height, condition_strength, condition_images, condition_files, condition_video, condition_video_frames, condition_video_skip): from diffusers.pipelines.ltx.pipeline_ltx_condition import LTXVideoCondition conditions = [] - if condition_image is not None: - try: - if isinstance(condition_image, str): - from modules.api.api import decode_base64_to_image - condition_image = decode_base64_to_image(condition_image) - condition_image = condition_image.convert('RGB').resize((width, height), resample=Image.Resampling.LANCZOS) - conditions.append(LTXVideoCondition(image=condition_image, frame_index=0, strength=condition_strength)) - shared.log.debug(f'Video condition: image={condition_image.size} strength={condition_strength}') - except Exception as e: - shared.log.error(f'LTX condition image: {e}') + if condition_images is not None: + for condition_image in condition_images: + try: + if isinstance(condition_image, str): + from modules.api.api import decode_base64_to_image + condition_image = decode_base64_to_image(condition_image) + condition_image = condition_image.convert('RGB').resize((width, height), resample=Image.Resampling.LANCZOS) + conditions.append(LTXVideoCondition(image=condition_image, frame_index=0, strength=condition_strength)) + shared.log.debug(f'Video condition: image={condition_image.size} strength={condition_strength}') + except Exception as e: + shared.log.error(f'LTX condition image: {e}') if condition_files is not None: condition_images = [] for fn in condition_files: diff --git a/modules/models_hf.py b/modules/models_hf.py index 73a82de6e..800b838e2 100644 --- a/modules/models_hf.py +++ b/modules/models_hf.py @@ -1,4 +1,6 @@ import os +import time +import gradio as gr from installer import log, install from modules.shared import opts @@ -37,7 +39,7 @@ def hf_init(): obfuscated_token = None if len(opts.huggingface_token) > 0 and opts.huggingface_token.startswith('hf_'): obfuscated_token = 'hf_...' + opts.huggingface_token[-4:] - log.info(f'Huggingface init: transfer={opts.hf_transfer_mode} parallel={opts.sd_parallel_load} direct={opts.diffusers_to_gpu} token="{obfuscated_token}" cache="{opts.hfcache_dir}"') + log.info(f'Huggingface: transfer={opts.hf_transfer_mode} parallel={opts.sd_parallel_load} direct={opts.diffusers_to_gpu} token="{obfuscated_token}" cache="{opts.hfcache_dir}" init') def hf_check_cache(): @@ -48,22 +50,26 @@ def hf_check_cache(): if size//1024//1024 > 0: log.warning(f'Cache location changed: previous="{prev_default}" size={size//1024//1024} MB') size, _mtime = stat(opts.hfcache_dir) - log.debug(f'Huggingface cache: path="{opts.hfcache_dir}" size={size//1024//1024} MB') + log.debug(f'Huggingface: cache="{opts.hfcache_dir}" size={size//1024//1024} MB') def hf_search(keyword): import huggingface_hub as hf + t0 = time.time() hf_api = hf.HfApi() models = hf_api.list_models(model_name=keyword, full=True, library="diffusers", limit=50, sort="downloads", direction=-1) data = [] for model in models: tags = [t for t in model.tags if not t.startswith('diffusers') and not t.startswith('license') and not t.startswith('arxiv') and len(t) > 2] data.append([model.id, model.pipeline_tag, tags, model.downloads, model.lastModified, f'https://huggingface.co/{model.id}']) + log.debug(f'Huggingface: search="{keyword}" results={len(data)} time={time.time()-t0:.2f}') return data -def hf_select(evt, data): - return data[evt.index[0]][0] +def hf_select(evt: gr.SelectData, df): + row = list(df.iloc[evt.index[0]]) + log.debug(f'Huggingface: selected={row} index={evt.index}') + return row[0] # repo_id only def hf_download_model(hub_id: str, token, variant, revision, mirror, custom_pipeline): @@ -71,11 +77,11 @@ def hf_download_model(hub_id: str, token, variant, revision, mirror, custom_pipe download_diffusers_model(hub_id, cache_dir=opts.diffusers_dir, token=token, variant=variant, revision=revision, mirror=mirror, custom_pipeline=custom_pipeline) from modules.sd_models import list_models # pylint: disable=W0621 list_models() - log.info(f'Diffuser model downloaded: model="{hub_id}"') + log.info(f'Huggingface: model="{hub_id}" downloaded') return f'Diffuser model downloaded: model="{hub_id}"' def hf_update_token(token): - log.debug('Huggingface update token') + log.debug('Huggingface: update token') opts.huggingface_token = token opts.save() diff --git a/modules/processing.py b/modules/processing.py index aaf681840..5d92fe246 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -397,13 +397,13 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed: shared.state.batch_no = n + 1 debug(f'Processing inner: iteration={n+1}/{p.n_iter}') p.iteration = n + if shared.state.interrupted: + shared.log.debug(f'Process interrupted: {n+1}/{p.n_iter}') + break if shared.state.skipped: shared.log.debug(f'Process skipped: {n+1}/{p.n_iter}') shared.state.skipped = False continue - if shared.state.interrupted: - shared.log.debug(f'Process interrupted: {n+1}/{p.n_iter}') - break if not hasattr(p, 'keep_prompts'): p.prompts = p.all_prompts[n * p.batch_size:(n+1) * p.batch_size] @@ -441,6 +441,8 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed: infotexts = [create_infotext(p, p.all_prompts, p.all_seeds, p.all_subseeds, index=0)] else: samples = [] + if not shared.opts.keep_incomplete: + break if p.scripts is not None and isinstance(p.scripts, scripts_manager.ScriptRunner): p.scripts.postprocess_batch(p, samples, batch_number=n) @@ -460,6 +462,8 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed: if shared.cmd_opts.lowvram: devices.torch_gc(force=True, reason='lowvram') timer.process.record('post') + if shared.state.interrupted: + break if not p.xyz: if hasattr(shared.sd_model, 'restore_pipeline') and (shared.sd_model.restore_pipeline is not None): diff --git a/modules/processing_callbacks.py b/modules/processing_callbacks.py index 62f9b624b..3aec83fca 100644 --- a/modules/processing_callbacks.py +++ b/modules/processing_callbacks.py @@ -53,9 +53,9 @@ def diffusers_callback_legacy(step: int, timestep: int, latents: typing.Union[to def diffusers_callback(pipe, step: int = 0, timestep: int = 0, kwargs: dict = {}): t0 = time.time() - if devices.backend == "ipex": # xe driver on linux needs this + if devices.backend == "ipex": torch.xpu.synchronize(devices.device) - elif (devices.backend == "zluda") or (devices.backend == "rocm") or (devices.backend == "cuda"): + elif devices.backend in {"cuda", "zluda", "rocm"}: torch.cuda.synchronize(devices.device) latents = kwargs.get('latents', None) if debug: diff --git a/modules/processing_info.py b/modules/processing_info.py index cb1050349..54b4d7b7d 100644 --- a/modules/processing_info.py +++ b/modules/processing_info.py @@ -47,7 +47,7 @@ def create_infotext(p: StableDiffusionProcessing, all_prompts=None, all_seeds=No "Sampler": p.sampler_name if p.sampler_name != 'Default' else None, "Seed": all_seeds[index], "Seed resize from": None if p.seed_resize_from_w <= 0 or p.seed_resize_from_h <= 0 else f"{p.seed_resize_from_w}x{p.seed_resize_from_h}", - "CFG scale": p.cfg_scale if p.cfg_scale > 1.0 else None, + "CFG scale": p.cfg_scale if p.cfg_scale > 1.0 else 1.0, "CFG rescale": p.diffusers_guidance_rescale if p.diffusers_guidance_rescale > 0 else None, "CFG end": p.cfg_end if p.cfg_end < 1.0 else None, "CFG true": p.pag_scale if p.pag_scale > 0 else None, diff --git a/modules/processing_vae.py b/modules/processing_vae.py index 00ffbb450..270c194d7 100644 --- a/modules/processing_vae.py +++ b/modules/processing_vae.py @@ -216,7 +216,7 @@ def taesd_vae_decode(latents): t0 = time.time() if len(latents) == 0: return [] - if shared.opts.diffusers_vae_slicing and len(latents) > 1: + if len(latents) > 1: decoded = torch.zeros((len(latents), 3, latents.shape[2] * 8, latents.shape[3] * 8), dtype=devices.dtype_vae, device=devices.device) for i in range(latents.shape[0]): decoded[i] = sd_vae_taesd.decode(latents[i]) diff --git a/modules/rocm.py b/modules/rocm.py index 1b03bfd32..f14a711ca 100644 --- a/modules/rocm.py +++ b/modules/rocm.py @@ -96,7 +96,7 @@ class Agent: self.blaslt_supported = os.path.exists(os.path.join(blaslt_tensile_libpath, f"Kernels.so-000-{name}.hsaco" if sys.platform == "win32" else f"extop_{name}.co")) @property - def therock(self) -> str: + def therock(self) -> Union[str, None]: if (self.gfx_version & 0xFFF0) == 0x1100: return "gfx110X-dgpu" if self.gfx_version == 0x1151: @@ -107,7 +107,7 @@ class Agent: return "gfx94X-dcgpu" if self.gfx_version == 0x950: return "gfx950-dcgpu" - raise RuntimeError(f"Unsupported GPU architecture: {self.name}") + return None def get_gfx_version(self) -> Union[str, None]: if self.gfx_version >= 0x1100 and self.gfx_version < 0x1200: @@ -207,28 +207,47 @@ def get_flash_attention_command(agent: Agent) -> str: return "--no-build-isolation " + os.environ.get("FLASH_ATTENTION_PACKAGE", default) +def refresh(): + global environment, blaslt_tensile_libpath, is_installed, version # pylint: disable=global-statement + if sys.platform == "win32": + global agents # pylint: disable=global-statement + try: + agents = driver_get_agents() + except Exception: + agents = [] + environment = find() + if environment is not None: + if isinstance(environment, ROCmEnvironment): + blaslt_tensile_libpath = os.environ.get("HIPBLASLT_TENSILE_LIBPATH", os.path.join(environment.path, "bin" if sys.platform == "win32" else "lib", "hipblaslt", "library")) + is_installed = True + version = get_version() + + if sys.platform == "win32": def get_agents() -> List[Agent]: - if isinstance(environment, ROCmEnvironment): - out = spawn("amdgpu-arch", cwd=os.path.join(environment.path, 'bin')) - else: - # Assume that amdgpu-arch is in PATH (venv/Scripts/amdgpu-arch.exe) - out = spawn("amdgpu-arch") - out = out.strip() - return [Agent(x.split(' ')[-1].strip()) for x in out.split("\n")] + return agents + #if isinstance(environment, ROCmEnvironment): + # out = spawn("amdgpu-arch", cwd=os.path.join(environment.path, 'bin')) + #else: + # # Assume that amdgpu-arch is in PATH (venv/Scripts/amdgpu-arch.exe) + # out = spawn("amdgpu-arch") + #out = out.strip() + #if out == "": + # return [] + #return [Agent(x.split(' ')[-1].strip()) for x in out.split("\n")] def driver_get_agents() -> List[Agent]: # unsafe and experimental feature from modules import windows_hip_ffi hip = windows_hip_ffi.HIP() count = hip.get_device_count() - agents = [None] * count + _agents = [None] * count for i in range(count): prop = hip.get_device_properties(i) name = prop.gcnArchName.decode('utf-8').strip('\x00') - agents[i] = Agent(name) + _agents[i] = Agent(name) del hip - return agents + return _agents def postinstall(): import torch @@ -243,6 +262,14 @@ if sys.platform == "win32": os.environ["PATH"] = ";".join(paths_no_rocm) return + build_targets = torch.cuda.get_arch_list() + for available in agents: + if available.name in build_targets: + return + + # use cpu instead of crashing + torch.cuda.is_available = lambda: False + def rocm_init(): try: import torch @@ -275,15 +302,16 @@ if sys.platform == "win32": return True, None is_wsl: bool = False -else: + agents: List[Agent] = [] # temp +else: # sys.platform != "win32" def get_agents() -> List[Agent]: try: - agents = spawn("rocm_agent_enumerator").split("\n") - agents = [x for x in agents if x and x != 'gfx000'] + _agents = spawn("rocm_agent_enumerator").split("\n") + _agents = [x for x in _agents if x and x != 'gfx000'] except Exception: # old version of ROCm WSL doesn't have rocm_agent_enumerator - agents = spawn("rocminfo").split("\n") - agents = [x.strip().split(" ")[-1] for x in agents if x.startswith(' Name:') and "CPU" not in x] - return [Agent(x) for x in agents] + _agents = spawn("rocminfo").split("\n") + _agents = [x.strip().split(" ")[-1] for x in _agents if x.startswith(' Name:') and "CPU" not in x] + return [Agent(x) for x in _agents] def postinstall(): if is_wsl: @@ -300,17 +328,9 @@ else: return True, None is_wsl: bool = os.environ.get('WSL_DISTRO_NAME', 'unknown' if spawn('wslpath -w /') else None) is not None + environment = None blaslt_tensile_libpath = "" is_installed = False version = None - -def refresh(): - global environment, blaslt_tensile_libpath, is_installed, version # pylint: disable=global-statement - environment = find() - if environment is not None: - if isinstance(environment, ROCmEnvironment): - blaslt_tensile_libpath = os.environ.get("HIPBLASLT_TENSILE_LIBPATH", os.path.join(environment.path, "bin" if sys.platform == "win32" else "lib", "hipblaslt", "library")) - is_installed = True - version = get_version() refresh() diff --git a/modules/sd_samplers.py b/modules/sd_samplers.py index c41eb91fb..63b5dba0d 100644 --- a/modules/sd_samplers.py +++ b/modules/sd_samplers.py @@ -51,7 +51,7 @@ def find_sampler_config(name): def restore_default(model): if model is None: return None - if getattr(model, "default_scheduler", None) is not None: + if getattr(model, "default_scheduler", None) is not None and getattr(model, "scheduler", None) is not None: model.scheduler = copy.deepcopy(model.default_scheduler) if hasattr(model, "prior_pipe") and hasattr(model.prior_pipe, "scheduler"): model.prior_pipe.scheduler = copy.deepcopy(model.default_scheduler) diff --git a/modules/sdnq/layers/conv/conv_fp8_tensorwise.py b/modules/sdnq/layers/conv/conv_fp8_tensorwise.py index 2dc9fbda3..9010445a6 100644 --- a/modules/sdnq/layers/conv/conv_fp8_tensorwise.py +++ b/modules/sdnq/layers/conv/conv_fp8_tensorwise.py @@ -29,7 +29,7 @@ def conv_fp8_matmul_tensorwise( if svd_up is not None: input = input.flatten(0,-2) if bias is not None: - bias = torch.addmm(bias, torch.mm(input.to(dtype=svd_down.dtype), svd_down), svd_up) + bias = torch.addmm(bias.to(dtype=svd_down.dtype), torch.mm(input.to(dtype=svd_down.dtype), svd_down), svd_up) else: bias = torch.mm(torch.mm(input.to(dtype=svd_down.dtype), svd_down), svd_up) diff --git a/modules/sdnq/layers/conv/conv_int8.py b/modules/sdnq/layers/conv/conv_int8.py index 3332537d1..a1e297cc1 100644 --- a/modules/sdnq/layers/conv/conv_int8.py +++ b/modules/sdnq/layers/conv/conv_int8.py @@ -32,7 +32,7 @@ def conv_int8_matmul( if svd_up is not None: input = input.flatten(0,-2) if bias is not None: - bias = torch.addmm(bias, torch.mm(input.to(dtype=svd_down.dtype), svd_down), svd_up) + bias = torch.addmm(bias.to(dtype=svd_down.dtype), torch.mm(input.to(dtype=svd_down.dtype), svd_down), svd_up) else: bias = torch.mm(torch.mm(input.to(dtype=svd_down.dtype), svd_down), svd_up) diff --git a/modules/sdnq/layers/linear/linear_fp8_tensorwise.py b/modules/sdnq/layers/linear/linear_fp8_tensorwise.py index c7978ef0a..d58b6fbb1 100644 --- a/modules/sdnq/layers/linear/linear_fp8_tensorwise.py +++ b/modules/sdnq/layers/linear/linear_fp8_tensorwise.py @@ -31,7 +31,7 @@ def fp8_matmul_tensorwise( if svd_up is not None: input.flatten(0,-2) if bias is not None: - bias = torch.addmm(bias, torch.mm(input.to(dtype=svd_down.dtype), svd_down), svd_up) + bias = torch.addmm(bias.to(dtype=svd_down.dtype), torch.mm(input.to(dtype=svd_down.dtype), svd_down), svd_up) else: bias = torch.mm(torch.mm(input.to(dtype=svd_down.dtype), svd_down), svd_up) dummy_input_scale = torch.ones(1, device=input.device, dtype=torch.float32) diff --git a/modules/sdnq/layers/linear/linear_int8.py b/modules/sdnq/layers/linear/linear_int8.py index 6d7f6f2b8..7fa60a87f 100644 --- a/modules/sdnq/layers/linear/linear_int8.py +++ b/modules/sdnq/layers/linear/linear_int8.py @@ -36,7 +36,7 @@ def int8_matmul( if svd_up is not None: input = input.flatten(0,-2) if bias is not None: - bias = torch.addmm(bias, torch.mm(input.to(dtype=svd_down.dtype), svd_down), svd_up) + bias = torch.addmm(bias.to(dtype=svd_down.dtype), torch.mm(input.to(dtype=svd_down.dtype), svd_down), svd_up) else: bias = torch.mm(torch.mm(input.to(dtype=svd_down.dtype), svd_down), svd_up) input, scale = quantize_int8_matmul_input(input, scale) diff --git a/modules/sdnq/quantizer.py b/modules/sdnq/quantizer.py index 29f3b61d8..3a39122db 100644 --- a/modules/sdnq/quantizer.py +++ b/modules/sdnq/quantizer.py @@ -199,10 +199,13 @@ def sdnq_quantize_layer(layer, weights_dtype="int8", torch_dtype=None, group_siz layer.weight.data = layer.weight.to(dtype=torch.float32) if use_svd: - layer.weight.data, svd_up, svd_down = apply_svdquant(layer.weight, rank=svd_rank, niter=svd_steps) - if use_quantized_matmul: - svd_up = svd_up.t_() - svd_down = svd_down.t_() + try: + layer.weight.data, svd_up, svd_down = apply_svdquant(layer.weight, rank=svd_rank, niter=svd_steps) + if use_quantized_matmul: + svd_up = svd_up.t_() + svd_down = svd_down.t_() + except Exception: + svd_up, svd_down = None, None else: svd_up, svd_down = None, None @@ -210,9 +213,9 @@ def sdnq_quantize_layer(layer, weights_dtype="int8", torch_dtype=None, group_siz if use_quantized_matmul and dtype_dict[weights_dtype]["num_bits"] >= 6: group_size = -1 elif is_linear_type: - group_size = 2 ** ((2 if not use_svd else 3) + dtype_dict[weights_dtype]["num_bits"]) + group_size = 2 ** ((2 if svd_up is None else 3) + dtype_dict[weights_dtype]["num_bits"]) else: - group_size = 2 ** ((1 if not use_svd else 2) + dtype_dict[weights_dtype]["num_bits"]) + group_size = 2 ** ((1 if svd_up is None else 2) + dtype_dict[weights_dtype]["num_bits"]) elif use_quantized_matmul and dtype_dict[weights_dtype]["num_bits"] == 8: group_size = -1 # override user value, re-quantizing 8bit into 8bit is pointless elif group_size != -1 and not is_linear_type: diff --git a/modules/shared_state.py b/modules/shared_state.py index d79e90b86..f575f4482 100644 --- a/modules/shared_state.py +++ b/modules/shared_state.py @@ -194,7 +194,7 @@ class State: def begin(self, title="", task_id=0, api=None): import modules.devices self.clear() - self.interrupted = False + self.interrupted = self.interrupted if title.startswith('Save') else False self.skipped = False self.job_history += 1 self.total_jobs += 1 diff --git a/modules/ui_guidance.py b/modules/ui_guidance.py index a6ea46391..72ca8500d 100644 --- a/modules/ui_guidance.py +++ b/modules/ui_guidance.py @@ -15,7 +15,7 @@ def create_guidance_inputs(tab): guidance_btn = ui_components.ToolButton(value=ui_symbols.book, elem_id=f"{tab}_guider_docs") guidance_btn.click(fn=None, _js='getGuidanceDocs', inputs=[guidance_name], outputs=[]) with gr.Row(visible=shared.opts.model_modular_enable): - guidance_scale = gr.Slider(minimum=0.0, maximum=30.0, step=0.1, label='_Guidance scale', value=6.0, elem_id=f"{tab}_guidance_scale") + guidance_scale = gr.Slider(minimum=1.0, maximum=30.0, step=0.1, label='_Guidance scale', value=6.0, elem_id=f"{tab}_guidance_scale") guidance_rescale = gr.Slider(minimum=0.0, maximum=1.0, step=0.05, label='_Guidance rescale', value=0.0, elem_id=f"{tab}_guidance_rescale") with gr.Row(visible=shared.opts.model_modular_enable): guidance_start = gr.Slider(minimum=0.0, maximum=1.0, step=0.05, label='_Guidance start', value=0.0, elem_id=f"{tab}_guidance_start") @@ -114,7 +114,7 @@ def create_guidance_inputs(tab): gr.HTML(value='

Fallback guidance

', visible=shared.opts.model_modular_enable, elem_id=f"{tab}_guidance_note") with gr.Row(elem_id=f"{tab}_cfg_row", elem_classes=['flexbox']): - cfg_scale = gr.Slider(minimum=0.0, maximum=30.0, step=0.1, label='Guidance scale', value=6.0, elem_id=f"{tab}_cfg_scale") + cfg_scale = gr.Slider(minimum=1.0, maximum=30.0, step=0.1, label='Guidance scale', value=6.0, elem_id=f"{tab}_cfg_scale") cfg_end = gr.Slider(minimum=0.0, maximum=1.0, step=0.1, label='Guidance end', value=1.0, elem_id=f"{tab}_cfg_end") with gr.Row(): diffusers_guidance_rescale = gr.Slider(minimum=0.0, maximum=1.0, step=0.05, label='Guidance rescale', value=0.0, elem_id=f"{tab}_image_cfg_rescale") diff --git a/modules/ui_models.py b/modules/ui_models.py index 89e5697ab..7810bd7e0 100644 --- a/modules/ui_models.py +++ b/modules/ui_models.py @@ -561,7 +561,7 @@ def create_ui(): with gr.Row(): hf_headers = ['Name', 'Pipeline', 'Tags', 'Downloads', 'Updated', 'URL'] hf_types = ['str', 'str', 'str', 'number', 'date', 'markdown'] - hf_results = gr.DataFrame(None, label='Search results', show_label=True, interactive=False, wrap=True, headers=hf_headers, datatype=hf_types, type='array') + hf_results = gr.DataFrame(None, label='Search results', show_label=True, interactive=False, wrap=True, headers=hf_headers, datatype=hf_types) hf_search_text.submit(fn=hf_search, inputs=[hf_search_text], outputs=[hf_results]) hf_search_btn.click(fn=hf_search, inputs=[hf_search_text], outputs=[hf_results]) diff --git a/modules/ui_video.py b/modules/ui_video.py index 02c787c6e..dfe6c2018 100644 --- a/modules/ui_video.py +++ b/modules/ui_video.py @@ -27,15 +27,24 @@ def create_ui(): with gr.Row(elem_id="video_interface", equal_height=False): with gr.Tabs(elem_classes=['video-tabs'], elem_id='video-tabs'): overrides = ui_common.create_override_inputs('video') - with gr.Tab('Core', id='video-tab') as video_tab: + with gr.Tab('Size', id='video-size-tab') as _video_size_tab: from modules.video_models import video_ui - video_ui.create_ui(prompt, negative, styles, overrides) + width, height, frames, seed, reuse_seed = video_ui.create_ui_size() + with gr.Tab('Inputs', id='video-inputs-tab') as _video_inputs_tab: + from modules.video_models import video_ui + init_image, init_strength, last_image = video_ui.create_ui_inputs() + with gr.Tab('Video Output', id='video-outputs-tab') as _video_outputs_tab: + from modules.video_models import video_ui + mp4_fps, mp4_interpolate, mp4_codec, mp4_ext, mp4_opt, mp4_video, mp4_frames, mp4_sf = video_ui.create_ui_outputs() + with gr.Tab('Models', id='video-core-tab') as video_core_tab: + from modules.video_models import video_ui + video_ui.create_ui(prompt, negative, styles, overrides, init_image, init_strength, last_image, mp4_fps, mp4_interpolate, mp4_codec, mp4_ext, mp4_opt, mp4_video, mp4_frames, mp4_sf, width, height, frames, seed, reuse_seed) with gr.Tab('FramePack', id='framepack-tab') as framepack_tab: from modules.framepack import framepack_ui - framepack_ui.create_ui(prompt, negative, styles, overrides) + framepack_ui.create_ui(prompt, negative, styles, overrides, init_image, last_image, mp4_fps, mp4_interpolate, mp4_codec, mp4_ext, mp4_opt, mp4_video, mp4_frames, mp4_sf) with gr.Tab('LTX', id='ltx-tab') as ltx_tab: from modules.ltx import ltx_ui - ltx_ui.create_ui(prompt, negative, styles, overrides) + ltx_ui.create_ui(prompt, negative, styles, overrides, init_image, init_strength, last_image, mp4_fps, mp4_interpolate, mp4_codec, mp4_ext, mp4_opt, mp4_video, mp4_frames, mp4_sf, width, height, frames, seed) paste_fields = [ (prompt, "Prompt"), # cannot add more fields as they are not defined yet @@ -45,7 +54,7 @@ def create_ui(): generation_parameters_copypaste.register_paste_params_button(bindings) current_tab = gr.Textbox(visible=False, value='video') - video_tab.select(fn=lambda: 'video', inputs=[], outputs=[current_tab]) + video_core_tab.select(fn=lambda: 'video', inputs=[], outputs=[current_tab]) framepack_tab.select(fn=lambda: 'framepack', inputs=[], outputs=[current_tab]) ltx_tab.select(fn=lambda: 'ltx', inputs=[], outputs=[current_tab]) diff --git a/modules/upscaler.py b/modules/upscaler.py index ec907ad8a..9ce10d9a4 100644 --- a/modules/upscaler.py +++ b/modules/upscaler.py @@ -64,7 +64,7 @@ class Upscaler: scaler.custom = True scalers.append(scaler) loaded.append(file_name) - shared.log.debug(f'Upscaler type={self.name} folder="{folder}" model="{model_name}" path="{file_name}"') + # shared.log.debug(f'Upscaler type={self.name} folder="{folder}" model="{model_name}" path="{file_name}"') def find_scalers(self): scalers = [] diff --git a/modules/video_models/models_def.py b/modules/video_models/models_def.py index 14f0eb8e0..f3b64b40c 100644 --- a/modules/video_models/models_def.py +++ b/modules/video_models/models_def.py @@ -349,4 +349,24 @@ models = { te_cls=transformers.T5EncoderModel, dit_cls=diffusers.CosmosTransformer3DModel), ], + 'Kandinsky': [ + Model(name='Kandinsky 5.0 Lite SFT T2V', + url='https://huggingface.co/ai-forever/Kandinsky-5.0-T2V-Lite-sft-5s-Diffusers', + repo='ai-forever/Kandinsky-5.0-T2V-Lite-sft-5s-Diffusers', + repo_cls=diffusers.Kandinsky5T2VPipeline, + te_cls=transformers.Qwen2_5_VLForConditionalGeneration, + dit_cls=diffusers.Kandinsky5Transformer3DModel), + Model(name='Kandinsky 5.0 Lite CFG-distilled T2V', + url='https://huggingface.co/ai-forever/Kandinsky-5.0-T2V-Lite-nocfg-5s-Diffusers', + repo='ai-forever/Kandinsky-5.0-T2V-Lite-nocfg-5s-Diffusers', + repo_cls=diffusers.Kandinsky5T2VPipeline, + te_cls=transformers.Qwen2_5_VLForConditionalGeneration, + dit_cls=diffusers.Kandinsky5Transformer3DModel), + Model(name='Kandinsky 5.0 Lite Steps-distilled T2V', + url='https://huggingface.co/ai-forever/Kandinsky-5.0-T2V-Lite-distilled16steps-5s-Diffusers', + repo='ai-forever/Kandinsky-5.0-T2V-Lite-distilled16steps-5s-Diffusers', + repo_cls=diffusers.Kandinsky5T2VPipeline, + te_cls=transformers.Qwen2_5_VLForConditionalGeneration, + dit_cls=diffusers.Kandinsky5Transformer3DModel), + ], } diff --git a/modules/video_models/video_load.py b/modules/video_models/video_load.py index d41e3d463..43da69265 100644 --- a/modules/video_models/video_load.py +++ b/modules/video_models/video_load.py @@ -39,6 +39,10 @@ def load_model(selected: models_def.Model): selected.te = 'hunyuanvideo-community/HunyuanVideo' selected.te_folder = 'text_encoder' selected.te_revision = None + if selected.te_cls.__name__ == 'Qwen2_5_VLForConditionalGeneration' and shared.opts.te_shared_t5: + selected.te = 'ai-forever/Kandinsky-5.0-T2V-Lite-sft-5s-Diffusers' + selected.te_folder = 'text_encoder' + selected.te_revision = None shared.log.debug(f'Video load: module=te repo="{selected.te or selected.repo}" folder="{selected.te_folder}" cls={selected.te_cls.__name__} quant={model_quant.get_quant_type(quant_args)}') kwargs["text_encoder"] = selected.te_cls.from_pretrained( @@ -104,7 +108,7 @@ def load_model(selected: models_def.Model): shared.sd_model.sd_model_hash = None sd_models.set_diffuser_options(shared.sd_model, offload=False) - decode, text, image, slicing, tiling = False, False, False, False, False + decode, text, image, slicing, tiling, framewise = False, False, False, False, False, False if selected.vae_hijack and hasattr(shared.sd_model.vae, 'decode'): sd_hijack_vae.init_hijack(shared.sd_model) decode = True @@ -115,6 +119,9 @@ def load_model(selected: models_def.Model): shared.sd_model.orig_encode_image = shared.sd_model.encode_image shared.sd_model.encode_image = video_utils.hijack_encode_image image = True + if hasattr(shared.sd_model, 'vae') and hasattr(shared.sd_model.vae, 'use_framewise_decoding'): + shared.sd_model.vae.use_framewise_decoding = True + framewise = True if hasattr(shared.sd_model, 'vae') and hasattr(shared.sd_model.vae, 'enable_slicing'): shared.sd_model.vae.enable_slicing() slicing = True @@ -130,6 +137,6 @@ def load_model(selected: models_def.Model): loaded_model = selected.name msg = f'Video load: cls={shared.sd_model.__class__.__name__} model="{selected.name}" time={t1-t0:.2f}' shared.log.info(msg) - shared.log.debug(f'Video hijacks: decode={decode} text={text} image={image} slicing={slicing} tiling={tiling}') + shared.log.debug(f'Video hijacks: decode={decode} text={text} image={image} slicing={slicing} tiling={tiling} framewise={framewise}') shared.state.end(jobid) return msg diff --git a/modules/video_models/video_run.py b/modules/video_models/video_run.py index 40f7e154f..57d3fbc23 100644 --- a/modules/video_models/video_run.py +++ b/modules/video_models/video_run.py @@ -1,4 +1,5 @@ import os +import copy import time from modules import shared, errors, sd_models, processing, devices, images, ui_common from modules.video_models import models_def, video_utils, video_load, video_vae, video_overrides, video_save, video_prompt @@ -92,10 +93,21 @@ def generate(*args, **kwargs): p.task_args['height'] = p.height p.task_args['output_type'] = 'latent' if (p.vae_type == 'Remote') else 'pil' p.ops.append('video') + + # set scheduler params orig_dynamic_shift = shared.opts.schedulers_dynamic_shift orig_sampler_shift = shared.opts.schedulers_shift shared.opts.data['schedulers_dynamic_shift'] = dynamic_shift shared.opts.data['schedulers_shift'] = sampler_shift + if hasattr(shared.sd_model.scheduler, 'config') and hasattr(shared.sd_model.scheduler, 'register_to_config'): + if hasattr(shared.sd_model.scheduler.config, 'use_dynamic_shifting'): + shared.sd_model.scheduler.config.use_dynamic_shifting = dynamic_shift + shared.sd_model.scheduler.register_to_config(use_dynamic_shifting = dynamic_shift) + if hasattr(shared.sd_model.scheduler.config, 'flow_shift'): + shared.sd_model.scheduler.config.flow_shift = sampler_shift + shared.sd_model.scheduler.register_to_config(flow_shift = sampler_shift) + shared.sd_model.default_scheduler = copy.deepcopy(shared.sd_model.scheduler) + video_overrides.set_overrides(p, selected) debug(f'Video: task_args={p.task_args}') diff --git a/modules/video_models/video_ui.py b/modules/video_models/video_ui.py index cd78f45a2..5215055d7 100644 --- a/modules/video_models/video_ui.py +++ b/modules/video_models/video_ui.py @@ -72,10 +72,55 @@ def run_video(*args): return video_run.generate(*args) elif selected and 'anisora' in selected.name.lower(): return video_run.generate(*args) + elif selected and 'Kandinsky' in selected.name: + return video_run.generate(*args) return video_utils.queue_err(f'model not found: engine="{engine}" model="{model}"') -def create_ui(prompt, negative, styles, overrides): +def create_ui_inputs(): + with gr.Row(): + with gr.Column(variant='compact', elem_id="video_inputs", elem_classes=['settings-column'], scale=1): + init_strength = gr.Slider(label='Init strength', minimum=0.0, maximum=1.0, step=0.01, value=0.8, elem_id="video_denoising_strength") + gr.HTML("
  Init image") + init_image = gr.Image(elem_id="video_image", show_label=False, type="pil", image_mode="RGB", width=256, height=256) + gr.HTML("
  Last image") + last_image = gr.Image(elem_id="video_last", show_label=False, type="pil", image_mode="RGB", width=256, height=256) + return init_image, init_strength, last_image + + +def create_ui_outputs(): + with gr.Row(): + with gr.Column(variant='compact', elem_id="video_outputs", elem_classes=['settings-column'], scale=1): + with gr.Row(): + mp4_fps = gr.Slider(label="FPS", minimum=1, maximum=60, value=24, step=1) + mp4_interpolate = gr.Slider(label="Video interpolation", minimum=0, maximum=10, value=0, step=1) + with gr.Row(): + mp4_codec = gr.Dropdown(label="Video codec", choices=['none', 'libx264'], value='libx264', type='value') + ui_common.create_refresh_button(mp4_codec, video_utils.get_codecs, elem_id="framepack_mp4_codec_refresh") + mp4_ext = gr.Textbox(label="Video format", value='mp4', elem_id="framepack_mp4_ext") + mp4_opt = gr.Textbox(label="Video options", value='crf:16', elem_id="framepack_mp4_ext") + with gr.Row(): + mp4_video = gr.Checkbox(label='Video save video', value=True, elem_id="framepack_mp4_video") + mp4_frames = gr.Checkbox(label='Video save frames', value=False, elem_id="framepack_mp4_frames") + mp4_sf = gr.Checkbox(label='Video save safetensors', value=False, elem_id="framepack_mp4_sf") + return mp4_fps, mp4_interpolate, mp4_codec, mp4_ext, mp4_opt, mp4_video, mp4_frames, mp4_sf + + +def create_ui_size(): + with gr.Row(): + with gr.Column(variant='compact', elem_id="video_size", elem_classes=['settings-column'], scale=1): + with gr.Row(): + width, height = ui_sections.create_resolution_inputs('video', default_width=832, default_height=480) + with gr.Row(): + frames = gr.Slider(label='Frames', minimum=1, maximum=1024, step=1, value=17, elem_id="video_frames") + seed = gr.Number(label='Initial seed', value=-1, elem_id="video_seed", container=True) + random_seed = ToolButton(ui_symbols.random, elem_id="video_seed_random") + reuse_seed = ToolButton(ui_symbols.reuse, elem_id="video_seed_reuse") + random_seed.click(fn=lambda: -1, show_progress=False, inputs=[], outputs=[seed]) + return width, height, frames, seed, reuse_seed + + +def create_ui(prompt, negative, styles, overrides, init_image, init_strength, last_image, mp4_fps, mp4_interpolate, mp4_codec, mp4_ext, mp4_opt, mp4_video, mp4_frames, mp4_sf, width, height, frames, seed, reuse_seed): with gr.Row(): with gr.Column(variant='compact', elem_id="video_settings", elem_classes=['settings-column'], scale=1): with gr.Row(): @@ -86,14 +131,6 @@ def create_ui(prompt, negative, styles, overrides): btn_load = ToolButton(ui_symbols.loading, elem_id="video_model_load") with gr.Row(): url = gr.HTML(label='Model URL', elem_id='video_model_url', value='

') - with gr.Accordion(open=True, label="Size", elem_id='video_size_accordion'): - with gr.Row(): - width, height = ui_sections.create_resolution_inputs('video', default_width=832, default_height=480) - with gr.Row(): - frames = gr.Slider(label='Frames', minimum=1, maximum=1024, step=1, value=17, elem_id="video_frames") - seed = gr.Number(label='Initial seed', value=-1, elem_id="video_seed", container=True) - random_seed = ToolButton(ui_symbols.random, elem_id="video_seed_random") - reuse_seed = ToolButton(ui_symbols.reuse, elem_id="video_seed_reuse") with gr.Accordion(open=False, label="Parameters", elem_id='video_parameters_accordion'): steps, sampler_index = ui_sections.create_sampler_and_steps_selection(None, "video", default_steps=50) with gr.Row(): @@ -106,30 +143,9 @@ def create_ui(prompt, negative, styles, overrides): with gr.Row(): vae_type = gr.Dropdown(label='VAE decode', choices=['Default', 'Tiny', 'Remote'], value='Default', elem_id="video_vae_type") vae_tile_frames = gr.Slider(label='Tile frames', minimum=1, maximum=64, step=1, value=16, elem_id="video_vae_tile_frames") - with gr.Accordion(open=False, label="Init image", elem_id='video_init_accordion'): - init_strength = gr.Slider(label='Init strength', minimum=0.0, maximum=1.0, step=0.01, value=0.5, elem_id="video_denoising_strength") - gr.HTML("
  Init image") - init_image = gr.Image(elem_id="video_image", show_label=False, type="pil", image_mode="RGB", width=256, height=256) - gr.HTML("
  Last image") - last_image = gr.Image(elem_id="video_last", show_label=False, type="pil", image_mode="RGB", width=256, height=256) vlm_enhance, vlm_model, vlm_system_prompt = ui_video_vlm.create_ui(prompt_element=prompt, image_element=init_image) - with gr.Accordion(label="Video", open=False, elem_id='video_output_accordion'): - with gr.Row(): - mp4_fps = gr.Slider(label="FPS", minimum=1, maximum=60, value=24, step=1) - mp4_interpolate = gr.Slider(label="Video interpolation", minimum=0, maximum=10, value=0, step=1) - with gr.Row(): - mp4_codec = gr.Dropdown(label="Video codec", choices=['none', 'libx264'], value='libx264', type='value') - ui_common.create_refresh_button(mp4_codec, video_utils.get_codecs, elem_id="framepack_mp4_codec_refresh") - mp4_ext = gr.Textbox(label="Video format", value='mp4', elem_id="framepack_mp4_ext") - mp4_opt = gr.Textbox(label="Video options", value='crf:16', elem_id="framepack_mp4_ext") - with gr.Row(): - mp4_video = gr.Checkbox(label='Video save video', value=True, elem_id="framepack_mp4_video") - mp4_frames = gr.Checkbox(label='Video save frames', value=False, elem_id="framepack_mp4_frames") - mp4_sf = gr.Checkbox(label='Video save safetensors', value=False, elem_id="framepack_mp4_sf") - - # output panel with gallery and video tabs with gr.Column(elem_id='video-output-column', scale=2) as _column_output: with gr.Tabs(elem_classes=['video-output-tabs'], elem_id='video-output-tabs'): @@ -140,7 +156,6 @@ def create_ui(prompt, negative, styles, overrides): # connect reuse seed button ui_common.connect_reuse_seed(seed, reuse_seed, gen_info, is_subseed=False) - random_seed.click(fn=lambda: -1, show_progress=False, inputs=[], outputs=[seed]) # handle engine and model change engine.change(fn=engine_change, inputs=[engine], outputs=[model]) model.change(fn=model_change, inputs=[engine, model], outputs=[url]) diff --git a/modules/video_models/video_vae.py b/modules/video_models/video_vae.py index 3b7a52b70..aff67c3f6 100644 --- a/modules/video_models/video_vae.py +++ b/modules/video_models/video_vae.py @@ -9,7 +9,9 @@ vae_type = None def set_vae_params(p): global vae_type # pylint: disable=global-statement vae_type = p.vae_type - if p.vae_tile_frames > p.frames: + if hasattr(shared.sd_model.vae, 'enable_slicing'): + shared.sd_model.vae.enable_slicing() + if p.frames > p.vae_tile_frames: if hasattr(shared.sd_model.vae, 'tile_sample_min_num_frames'): shared.sd_model.vae.tile_sample_min_num_frames = p.vae_tile_frames if hasattr(shared.sd_model.vae, 'use_framewise_decoding'): @@ -30,6 +32,8 @@ def vae_decode_tiny(latents): variant = 'TAE MochiVideo' elif 'WAN' in shared.sd_model.__class__.__name__: variant = 'TAE WanVideo' + elif 'Kandinsky' in shared.sd_model.__class__.__name__: + variant = 'TAE HunyuanVideo' else: shared.log.warning(f'Decode: type=Tiny cls={shared.sd_model.__class__.__name__} not supported') return None diff --git a/modules/windows_hip_ffi.py b/modules/windows_hip_ffi.py index c612aa43d..3135bc89f 100644 --- a/modules/windows_hip_ffi.py +++ b/modules/windows_hip_ffi.py @@ -16,6 +16,7 @@ if sys.platform == "win32": def __init__(self): ctypes.windll.kernel32.LoadLibraryA.restype = ctypes.wintypes.HMODULE ctypes.windll.kernel32.LoadLibraryA.argtypes = [ctypes.c_char_p] + self.handle = None path = os.environ.get("windir", "C:\\Windows") + "\\System32\\amdhip64_6.dll" if not os.path.isfile(path): path = os.environ.get("windir", "C:\\Windows") + "\\System32\\amdhip64_7.dll" @@ -32,6 +33,8 @@ if sys.platform == "win32": ctypes.windll.kernel32.GetProcAddress(self.handle, b"hipGetDeviceProperties")) def __del__(self): + if self.handle is None: + return # Hopefully this will prevent conflicts with amdhip64_7.dll from ROCm Python packages or HIP SDK ctypes.windll.kernel32.FreeLibrary.argtypes = [ctypes.wintypes.HMODULE] ctypes.windll.kernel32.FreeLibrary(self.handle) diff --git a/scripts/daam/experiment.py b/scripts/daam/experiment.py index 4465a3054..301efa429 100644 --- a/scripts/daam/experiment.py +++ b/scripts/daam/experiment.py @@ -251,7 +251,7 @@ class GenerationExperiment: try: path = self.save_heat_map(word, tokenizer, crop=crop) path_map[word] = path - except: + except Exception: pass return path_map @@ -328,7 +328,7 @@ class GenerationExperiment: vocab=vocab, subtype=directory.name )) - except: + except Exception: pass return experiments diff --git a/wiki b/wiki index 78e4e6b94..847eae26c 160000 --- a/wiki +++ b/wiki @@ -1 +1 @@ -Subproject commit 78e4e6b94adac848c118b85d7a4c552babcb1acd +Subproject commit 847eae26c796ae32b8cd74b0cc79b705bfdc8f54