From eb025eaf3139d0a296496816f65603db8f391e2f Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Sun, 28 Sep 2025 18:01:31 -0400 Subject: [PATCH] remove split-attention and add attention slicing option Signed-off-by: Vladimir Mandic --- CHANGELOG.md | 4 +- javascript/guidance.js | 20 +++++ modules/control/run.py | 10 ++- modules/img2img.py | 11 ++- modules/modular_guiders.py | 87 ++++++++++++++++++++++ modules/processing_class.py | 13 +++- modules/processing_diffusers.py | 2 + modules/sd_models.py | 12 +-- modules/shared.py | 2 + modules/shared_items.py | 1 - modules/txt2img.py | 6 ++ modules/ui_control.py | 17 +++-- modules/ui_guidance.py | 125 ++++++++++++++++++++++++++++++++ modules/ui_img2img.py | 27 ++++--- modules/ui_sections.py | 15 ---- modules/ui_txt2img.py | 21 ++++-- scripts/xyz/xyz_grid_classes.py | 2 + scripts/xyz/xyz_grid_shared.py | 7 ++ scripts/xyz_grid_on.py | 1 + 19 files changed, 331 insertions(+), 52 deletions(-) create mode 100644 javascript/guidance.js create mode 100644 modules/modular_guiders.py create mode 100644 modules/ui_guidance.py diff --git a/CHANGELOG.md b/CHANGELOG.md index b10970e6d..474ab69f6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -54,7 +54,9 @@ - **video** new LTX model selection - replace `pynvml` with `nvidia-ml-py` for gpu monitoring - update **loopback** script with radon seed option, thanks @rabanti - - **vae** slicing enable for lowvram/medvram, tiling for lowvram, both disabled otherwise + - **vae** slicing enable for lowvram/medvram, tiling for lowvram, both disabled otherwise + - **attention** remove split-attention and add explicitly attention slicing enable/disable option + can be combined with sdp, enabling may improve stability when used on iGPU or shared memory systems - **Fixes** - ui: fix image metadata display when switching selected image in control tab - framepack: add explicit hf-login before framepack load diff --git a/javascript/guidance.js b/javascript/guidance.js new file mode 100644 index 000000000..11c2430d3 --- /dev/null +++ b/javascript/guidance.js @@ -0,0 +1,20 @@ +const guiders = { + None: '', + 'LSC: LayerSkipConfig': 'https://github.com/huggingface/diffusers/blob/041501aea92919c9c7f36e189fc9cf7d865ebb96/src/diffusers/hooks/layer_skip.py#L41', + 'CFG: ClassifierFreeGuidance': 'https://huggingface.co/docs/diffusers/v0.35.1/en/api/modular_diffusers/guiders#diffusers.ClassifierFreeGuidance', + 'Auto: AutoGuidance': 'https://huggingface.co/docs/diffusers/v0.35.1/en/api/modular_diffusers/guiders#diffusers.AutoGuidance', + 'Zero: ClassifierFreeZeroStar': 'https://huggingface.co/docs/diffusers/v0.35.1/en/api/modular_diffusers/guiders#diffusers.ClassifierFreeZeroStarGuidance', + 'PAG: PerturbedAttentionGuidance': 'https://huggingface.co/docs/diffusers/v0.35.1/en/api/modular_diffusers/guiders#diffusers.PerturbedAttentionGuidance', + 'APG: AdaptiveProjectedGuidance': 'https://huggingface.co/docs/diffusers/v0.35.1/en/api/modular_diffusers/guiders#diffusers.AdaptiveProjectedGuidance', + 'SLG: SkipLayerGuidance': 'https://huggingface.co/docs/diffusers/v0.35.1/en/api/modular_diffusers/guiders#diffusers.SkipLayerGuidance', + 'SEG: SmoothedEnergyGuidance': 'https://huggingface.co/docs/diffusers/v0.35.1/en/api/modular_diffusers/guiders#diffusers.SmoothedEnergyGuidance', + 'TCFG: TangentialClassifierFreeGuidance': 'https://huggingface.co/docs/diffusers/v0.35.1/en/api/modular_diffusers/guiders#diffusers.TangentialClassifierFreeGuidance', + 'FDG: FrequencyDecoupledGuidance': 'https://huggingface.co/docs/diffusers/v0.35.1/en/api/modular_diffusers/guiders#diffusers.FrequencyDecoupledGuidance', +}; + +function getGuidanceDocs(guider) { + if (guider.label) guider = guider.label; + const url = guiders[guider]; + log('getGuidanceDocs', guider, url); + if (url) window.open(url, '_blank'); +} diff --git a/modules/control/run.py b/modules/control/run.py index 586958a49..3f6b0f7be 100644 --- a/modules/control/run.py +++ b/modules/control/run.py @@ -255,6 +255,7 @@ def control_run(state: str = '', # pylint: disable=keyword-arg-before-vararg prompt: str = '', negative_prompt: str = '', styles: List[str] = [], steps: int = 20, sampler_index: int = None, seed: int = -1, subseed: int = -1, subseed_strength: float = 0, seed_resize_from_h: int = -1, seed_resize_from_w: int = -1, + guidance_name: str = 'Default', guidance_scale: float = 6.0, guidance_rescale: float = 0.0, guidance_start: float = 0.0, guidance_stop: float = 1.0, cfg_scale: float = 6.0, clip_skip: float = 1.0, image_cfg_scale: float = 6.0, diffusers_guidance_rescale: float = 0.7, pag_scale: float = 0.0, pag_adaptive: float = 0.5, cfg_end: float = 1.0, vae_type: str = 'Full', tiling: bool = False, hidiffusion: bool = False, detailer_enabled: bool = True, detailer_prompt: str = '', detailer_negative: str = '', detailer_steps: int = 10, detailer_strength: float = 0.3, detailer_resolution: int = 1024, @@ -306,7 +307,13 @@ def control_run(state: str = '', # pylint: disable=keyword-arg-before-vararg seed_resize_from_h = seed_resize_from_h, seed_resize_from_w = seed_resize_from_w, denoising_strength = denoising_strength, - # advanced + # modular guidance + guidance_name = guidance_name, + guidance_scale = guidance_scale, + guidance_rescale = guidance_rescale, + guidance_start = guidance_start, + guidance_stop = guidance_stop, + # legacy guidance cfg_scale = cfg_scale, cfg_end = cfg_end, clip_skip = clip_skip, @@ -314,6 +321,7 @@ def control_run(state: str = '', # pylint: disable=keyword-arg-before-vararg diffusers_guidance_rescale = diffusers_guidance_rescale, pag_scale = pag_scale, pag_adaptive = pag_adaptive, + # advanced vae_type = vae_type, tiling = tiling, hidiffusion = hidiffusion, diff --git a/modules/img2img.py b/modules/img2img.py index 19da4b92f..4809d9fa0 100644 --- a/modules/img2img.py +++ b/modules/img2img.py @@ -161,10 +161,8 @@ def img2img(id_task: str, state: str, mode: int, vae_type, tiling, hidiffusion, detailer_enabled, detailer_prompt, detailer_negative, detailer_steps, detailer_strength, detailer_resolution, n_iter, batch_size, - cfg_scale, image_cfg_scale, - diffusers_guidance_rescale, - pag_scale, pag_adaptive, - cfg_end, + guidance_name, guidance_scale, guidance_rescale, guidance_start, guidance_stop, + cfg_scale, image_cfg_scale, diffusers_guidance_rescale, pag_scale, pag_adaptive, cfg_end, refiner_start, clip_skip, denoising_strength, @@ -255,6 +253,11 @@ def img2img(id_task: str, state: str, mode: int, batch_size=batch_size, n_iter=n_iter, steps=steps, + guidance_name=guidance_name, + guidance_scale=guidance_scale, + guidance_rescale=guidance_rescale, + guidance_start=guidance_start, + guidance_stop=guidance_stop, cfg_scale=cfg_scale, cfg_end=cfg_end, clip_skip=clip_skip, diff --git a/modules/modular_guiders.py b/modules/modular_guiders.py new file mode 100644 index 000000000..ee9c3655c --- /dev/null +++ b/modules/modular_guiders.py @@ -0,0 +1,87 @@ +import diffusers +from modules import shared, errors, processing + + +# ['Default', 'CFG', 'Zero', 'PAG', 'APG', 'SLG', 'SEG', 'TCFG', 'FDG'] +guiders = { + # 'None': { 'cls': None, 'args': {}, }, + 'Default': { 'cls': None, 'args': {}, }, + 'CFG: ClassifierFreeGuidance': { 'cls': diffusers.ClassifierFreeGuidance, 'args': {} }, + 'Auto: AutoGuidance': { 'cls': diffusers.AutoGuidance, 'args': { 'dropout': 1.0, 'auto_guidance_layers': [7, 8, 9], 'auto_guidance_config': None } }, + 'Zero: ClassifierFreeZeroStar': { 'cls': diffusers.ClassifierFreeZeroStarGuidance, 'args': { 'zero_init_steps': 1 } }, + 'PAG: PerturbedAttentionGuidance': { 'cls': diffusers.PerturbedAttentionGuidance, 'args': { 'perturbed_guidance_scale': 2.8, 'perturbed_guidance_start': 0.01, 'perturbed_guidance_stop': 0.2, 'perturbed_guidance_layers': [7, 8, 9], 'perturbed_guidance_config': None } }, + 'APG: AdaptiveProjectedGuidance': { 'cls': diffusers.AdaptiveProjectedGuidance, 'args': { 'adaptive_projected_guidance_momentum': -1, 'adaptive_projected_guidance_rescale': 15.0 } }, + 'SLG: SkipLayerGuidance': { 'cls': diffusers.SkipLayerGuidance, 'args': { 'skip_layer_guidance_scale': 2.8, 'skip_layer_guidance_start': 0.01, 'skip_layer_guidance_stop': 0.2, 'skip_layer_guidance_layers': [7, 8, 9], 'skip_layer_config': None } }, + 'SEG: SmoothedEnergyGuidance': { 'cls': diffusers.SmoothedEnergyGuidance, 'args': { 'seg_guidance_scale': 3.0, 'seg_blur_sigma': 9999999.0, 'seg_blur_threshold_inf': 9999.0, 'seg_guidance_start': 0.0, 'seg_guidance_stop': 1.0, 'seg_guidance_layers': [7, 8, 9], 'seg_guidance_config': None } }, + 'TCFG: TangentialClassifierFreeGuidance': { 'cls': diffusers.TangentialClassifierFreeGuidance, 'args': {} }, + 'FDG: FrequencyDecoupledGuidance': { 'cls': diffusers.FrequencyDecoupledGuidance, 'args': { 'guidance_scales': [10.0, 5.0], 'parallel_weights': 1.0, 'guidance_rescale_space': "data" } }, +} +base_args = { + 'guidance_scale': 6.0, + 'guidance_rescale': 0.0, + 'start': 0.0, + 'stop': 1.0, +} + + +def set_guider(p: processing.StableDiffusionProcessing): + guidance_name = p.guidance_name or 'Default' + if guidance_name not in guiders: + return None + + if guidance_name == 'Default': + if hasattr(shared.sd_model, 'default_guider'): + guider_info = shared.sd_model.default_guider + shared.sd_model.update_components(guider=guider_info) + else: + guider_info = shared.sd_model.get_component_spec("guider") + shared.sd_model.default_guider = guider_info + guider_cls = guider_info.type_hint + if guider_info is not None and guider_cls is not None and guider_info.config is not None: + guider_args = {k: v for k, v in guider_info.config.items() if not k.startswith('_') and v is not None} + else: + guider_args = {} + shared.log.info(f'Guider: name={guidance_name} cls={guider_cls.__name__} args={guider_args}') + return + if guidance_name == 'None': + shared.sd_model.update_components(guider=None) # breaks the pipeline + shared.log.info(f'Guider: name={guidance_name}') + return + + guider_info = guiders[guidance_name] + guider_cls = guider_info['cls'] + guider_args = {} + for k, v in base_args.items(): + if v is not None and v >= 0.0: + guider_args[k] = v + shared.log.warning('Guiders: partially implemented') # TODO: guiders + for k, v in guider_info['args'].items(): + try: + if k is None: + pass + elif k.endswith('_layers') and isinstance(v, str): + guider_args[k] = [int(x.strip()) for x in v.split(',') if x.strip().isdigit()] + elif k.endswith('_config'): + # if lsc_enabled + # guider_args[k] = diffusers.LayerSkipConfig(...) + pass + elif isinstance(v, list) and len(v) > 0: + guider_args[k] = v + elif isinstance(v, int) and (v >= 0): + guider_args[k] = int(v) + elif isinstance(v, float) and (v >= 0.0): + guider_args[k] = float(v) + elif isinstance(v, str) and (len(v) > 0): + guider_args[k] = v + except Exception as e: + shared.log.error(f'Guiders: arg={k} value={v} error={e}') + errors.display(e, 'Guiders') + # guider_args.update(guider_info['args']) + if guider_cls is not None: + try: + guider_instance = guider_cls(**guider_args) + shared.log.info(f'Guider: name={guidance_name} cls={guider_cls.__name__} args={guider_args}') + shared.sd_model.update_components(guider=guider_instance) + except Exception as e: + shared.log.error(f'Guider: name={guidance_name} cls={guider_cls.__name__} args={guider_args} {e}') + return diff --git a/modules/processing_class.py b/modules/processing_class.py index d0d634cfa..ddad4686e 100644 --- a/modules/processing_class.py +++ b/modules/processing_class.py @@ -36,7 +36,13 @@ class StableDiffusionProcessing: sampler_name: str = None, hr_sampler_name: str = None, eta: float = None, - # guidance + # modular guidance + guidance_name: str = 'Default', + guidance_scale: float = 6.0, + guidance_rescale: float = 0.0, + guidance_start: float = 0.0, + guidance_stop: float = 1.0, + # legacy guidance cfg_scale: float = 6.0, cfg_end: float = 1, diffusers_guidance_rescale: float = 0.0, @@ -247,6 +253,11 @@ class StableDiffusionProcessing: self.do_not_save_grid = do_not_save_grid self.override_settings_restore_afterwards = override_settings_restore_afterwards self.eta = eta + self.guidance_name = guidance_name + self.guidance_scale = guidance_scale + self.guidance_rescale = guidance_rescale + self.guidance_start = guidance_start + self.guidance_stop = guidance_stop self.cfg_scale = cfg_scale self.cfg_end = cfg_end self.diffusers_guidance_rescale = diffusers_guidance_rescale diff --git a/modules/processing_diffusers.py b/modules/processing_diffusers.py index 6453516c0..277b5e1c5 100644 --- a/modules/processing_diffusers.py +++ b/modules/processing_diffusers.py @@ -102,6 +102,8 @@ def process_pre(p: processing.StableDiffusionProcessing): modular_pipe = modular.convert_to_modular(shared.sd_model) if modular_pipe is not None: shared.sd_model = modular_pipe + from modules import modular_guiders + modular_guiders.set_guider(p) timer.process.record('pre') diff --git a/modules/sd_models.py b/modules/sd_models.py index 9b9ec7d68..e759ac56f 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -1070,17 +1070,19 @@ def set_diffusers_attention(pipe, quiet:bool=False): pipe.enable_xformers_memory_efficient_attention() else: shared.log.warning(f"Attention: xFormers is not compatible with {pipe.__class__.__name__}") - elif shared.opts.cross_attention_optimization == "Split attention": - if hasattr(pipe, "enable_attention_slicing"): - pipe.enable_attention_slicing() - else: - shared.log.warning(f"Attention: Split attention is not compatible with {pipe.__class__.__name__}") elif shared.opts.cross_attention_optimization == "Batch matrix-matrix": set_attn(pipe, p.AttnProcessor(), name="Batch matrix-matrix") elif shared.opts.cross_attention_optimization == "Dynamic Attention BMM": from modules.sd_hijack_dynamic_atten import DynamicAttnProcessorBMM set_attn(pipe, DynamicAttnProcessorBMM(), name="Dynamic Attention BMM") + if shared.opts.attention_slicing != "Default" and hasattr(pipe, "enable_attention_slicing") and hasattr(pipe, "disable_attention_slicing"): + if shared.opts.attention_slicing: + pipe.enable_attention_slicing() + else: + pipe.disable_attention_slicing() + shared.log.debug(f"Attention: slicing={shared.opts.attention_slicing}") + pipe.current_attn_name = shared.opts.cross_attention_optimization diff --git a/modules/shared.py b/modules/shared.py index a9600ce71..8dcf8f26d 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -290,6 +290,8 @@ options_templates.update(options_section(('cuda', "Compute Settings"), { "cross_attention_sep": OptionInfo("

Cross Attention

", "", gr.HTML), "cross_attention_optimization": OptionInfo(startup_cross_attention, "Attention optimization method", gr.Radio, lambda: {"choices": shared_items.list_crossattention()}), + "attention_": OptionInfo("

Cross Attention

", "", gr.HTML), + "attention_slicing": OptionInfo('Default', "Attention slicing", gr.CheckboxGroup, {"choices": ['Default', 'Enabled', 'Disabled']}), "sdp_options": OptionInfo(startup_sdp_options, "SDP options", gr.CheckboxGroup, {"choices": startup_sdp_choices}), "xformers_options": OptionInfo(['Flash attention'], "xFormers options", gr.CheckboxGroup, {"choices": ['Flash attention'] }), "dynamic_attention_slice_rate": OptionInfo(0.5, "Dynamic Attention slicing rate in GB", gr.Slider, {"minimum": 0.01, "maximum": max(gpu_memory,4), "step": 0.01}), diff --git a/modules/shared_items.py b/modules/shared_items.py index 8eac5e070..f4d98e955 100644 --- a/modules/shared_items.py +++ b/modules/shared_items.py @@ -120,7 +120,6 @@ def list_crossattention(): "Scaled-Dot-Product", "xFormers", "Batch matrix-matrix", - "Split attention", "Dynamic Attention BMM" ] diff --git a/modules/txt2img.py b/modules/txt2img.py index 3397d4fe9..08d4f1bf2 100644 --- a/modules/txt2img.py +++ b/modules/txt2img.py @@ -14,6 +14,7 @@ def txt2img(id_task, state, vae_type, tiling, hidiffusion, detailer_enabled, detailer_prompt, detailer_negative, detailer_steps, detailer_strength, detailer_resolution, n_iter, batch_size, + guidance_name, guidance_scale, guidance_rescale, guidance_start, guidance_stop, cfg_scale, image_cfg_scale, diffusers_guidance_rescale, pag_scale, pag_adaptive, cfg_end, clip_skip, seed, subseed, subseed_strength, seed_resize_from_h, seed_resize_from_w, @@ -55,6 +56,11 @@ def txt2img(id_task, state, batch_size=batch_size, n_iter=n_iter, steps=steps, + guidance_name=guidance_name, + guidance_scale=guidance_scale, + guidance_rescale=guidance_rescale, + guidance_start=guidance_start, + guidance_stop=guidance_stop, cfg_scale=cfg_scale, image_cfg_scale=image_cfg_scale, diffusers_guidance_rescale=diffusers_guidance_rescale, diff --git a/modules/ui_control.py b/modules/ui_control.py index a1c77848d..3f43701af 100644 --- a/modules/ui_control.py +++ b/modules/ui_control.py @@ -2,7 +2,8 @@ import os import time import gradio as gr from modules.control import unit -from modules import errors, shared, progress, ui_common, ui_sections, generation_parameters_copypaste, call_queue, scripts_manager, masking, images, processing_vae, timer # pylint: disable=ungrouped-imports +from modules import errors, shared, progress, generation_parameters_copypaste, call_queue, scripts_manager, masking, images, processing_vae, timer # pylint: disable=ungrouped-imports +from modules import ui_common, ui_sections, ui_guidance from modules import ui_control_helpers as helpers @@ -156,7 +157,7 @@ def create_ui(_blocks: gr.Blocks=None): mask_controls = masking.create_segment_ui() - cfg_scale, image_cfg_scale, guidance_rescale, pag_scale, pag_adaptive, cfg_end = ui_sections.create_guidance_inputs('control') + guidance_name, guidance_scale, guidance_rescale, guidance_start, guidance_stop, cfg_scale, image_cfg_scale, diffusers_guidance_rescale, pag_scale, pag_adaptive, cfg_end = ui_guidance.create_guidance_inputs('control') vae_type, tiling, hidiffusion, clip_skip = ui_sections.create_advanced_inputs('control') hdr_mode, hdr_brightness, hdr_color, hdr_sharpen, hdr_clamp, hdr_boundary, hdr_threshold, hdr_maximize, hdr_max_center, hdr_max_boundary, hdr_color_picker, hdr_tint_ratio = ui_sections.create_correction_inputs('control') @@ -274,7 +275,8 @@ def create_ui(_blocks: gr.Blocks=None): prompt, negative, styles, steps, sampler_index, seed, subseed, subseed_strength, seed_resize_from_h, seed_resize_from_w, - cfg_scale, clip_skip, image_cfg_scale, guidance_rescale, pag_scale, pag_adaptive, cfg_end, vae_type, tiling, hidiffusion, + guidance_name, guidance_scale, guidance_rescale, guidance_start, guidance_stop, + cfg_scale, clip_skip, image_cfg_scale, diffusers_guidance_rescale, pag_scale, pag_adaptive, cfg_end, vae_type, tiling, hidiffusion, detailer_enabled, detailer_prompt, detailer_negative, detailer_steps, detailer_strength, detailer_resolution, hdr_mode, hdr_brightness, hdr_color, hdr_sharpen, hdr_clamp, hdr_boundary, hdr_threshold, hdr_maximize, hdr_max_center, hdr_max_boundary, hdr_color_picker, hdr_tint_ratio, resize_mode_before, resize_name_before, resize_context_before, width_before, height_before, scale_by_before, selected_scale_tab_before, @@ -354,14 +356,19 @@ def create_ui(_blocks: gr.Blocks=None): (mask_controls[4], "Mask erode"), (mask_controls[5], "Mask dilate"), (mask_controls[6], "Mask auto"), + # guidance + (guidance_name, "Guidance"), + (guidance_scale, "Guidance scale"), + (guidance_rescale, "Guidance rescale"), + (guidance_start, "Guidance start"), + (guidance_stop, "Guidance stop"), # advanced - (cfg_scale, "Guidance scale"), (cfg_scale, "CFG scale"), (cfg_end, "CFG end"), (clip_skip, "Clip skip"), (image_cfg_scale, "Image CFG scale"), (image_cfg_scale, "Hires CFG scale"), - (guidance_rescale, "CFG rescale"), + (diffusers_guidance_rescale, "CFG rescale"), (vae_type, "VAE type"), (tiling, "Tiling"), (hidiffusion, "HiDiffusion"), diff --git a/modules/ui_guidance.py b/modules/ui_guidance.py new file mode 100644 index 000000000..3f3b3f8f3 --- /dev/null +++ b/modules/ui_guidance.py @@ -0,0 +1,125 @@ +import gradio as gr +from modules import shared, modular_guiders +from modules import ui_symbols, ui_components + + +def create_guidance_inputs(tab): + with gr.Accordion(open=False, label='Guidance', elem_id=f"{tab}_guidance", elem_classes=["small-accordion"]): + with gr.Group(): + + with gr.Row(elem_id=f"{tab}_guider_row", elem_classes=['flexbox'], visible=shared.opts.model_modular_enable): + guidance_name = gr.Dropdown(choices=list(modular_guiders.guiders.keys()), value='Default', label='Guider', elem_id=f"{tab}_guider") + guidance_btn = ui_components.ToolButton(value=ui_symbols.book, elem_id=f"{tab}_guider_docs") + guidance_btn.click(fn=None, _js='getGuidanceDocs', inputs=[guidance_name], outputs=[]) + with gr.Row(visible=shared.opts.model_modular_enable): + guidance_scale = gr.Slider(minimum=0.0, maximum=30.0, step=0.1, label='Guidance scale', value=6.0, elem_id=f"{tab}_guidance_scale") + guidance_rescale = gr.Slider(minimum=0.0, maximum=1.0, step=0.05, label='Guidance rescale', value=0.0, elem_id=f"{tab}_guidance_rescale") + with gr.Row(visible=shared.opts.model_modular_enable): + guidance_start = gr.Slider(minimum=0.0, maximum=1.0, step=0.05, label='Guidance start', value=0.0, elem_id=f"{tab}_guidance_start") + guidance_stop = gr.Slider(minimum=0.0, maximum=1.0, step=0.1, label='Guidance stop', value=1.0, elem_id=f"{tab}_guidance_stop") + guidance_args = [guidance_name, guidance_scale, guidance_rescale, guidance_start, guidance_stop] + + lsc_group = gr.Accordion(open=False, label='Layer skip guidance', elem_classes=["small-accordion"], visible=shared.opts.model_modular_enable) + with lsc_group: + with gr.Row(): + guidance_lsc_enabled = gr.Checkbox(label='Enable LayerSkipConfig', value=False) + guidance_lsc_label = gr.Label(value='LSC: LayerSkipConfig', elem_id=f"{tab}_lsc_label", visible=False) + guidance_lsc_btn = ui_components.ToolButton(value=ui_symbols.book, elem_id=f"{tab}_lsc_docs", elem_classes=["guidance-docs"]) + guidance_lsc_btn.click(fn=None, _js='getGuidanceDocs', inputs=[guidance_lsc_label], outputs=[]) + with gr.Row(): + guidance_lsc_indices = gr.Textbox(label='LSC layer indices', value='1, 2, 3', placeholder='Comma-separated layer indices to skip') + with gr.Row(): + guidance_lsc_fqn = gr.Textbox(label='LSC fully qualified name', value='transformer_blocks', placeholder='Fully qualified name of the layer stack') + with gr.Row(): + guidance_lsc_skip_attention = gr.Checkbox(label='LSC skip attention blocks', value=True) + guidance_lsc_skip_ff = gr.Checkbox(label='LSC skip feed-forward blocks', value=True) + guidance_lsc_skip_attention_scores = gr.Checkbox(label='LSC skip attention scores', value=False) + with gr.Row(): + guidance_lsc_dropout = gr.Slider(minimum=0.0, maximum=1.0, step=0.05, label='LSC dropout rate', value=1.0) + lsc_args = [guidance_lsc_enabled, guidance_lsc_indices, guidance_lsc_fqn, guidance_lsc_skip_attention, guidance_lsc_skip_ff, guidance_lsc_skip_attention_scores, guidance_lsc_dropout] + + auto_group = gr.Accordion(open=True, label='Advanced guidance params', elem_classes=["small-accordion"], visible=False) + with auto_group: + guidance_auto_dropout = gr.Slider(minimum=0.0, maximum=1.0, step=0.05, label='AutoGuidance dropout', value=0.1) + guidance_auto_layers = gr.Textbox(label='AutoGuidance layers', value='7, 8, 9', placeholder='Comma-separated layer indices, e.g. 7,8,9') + guidance_auto_config = gr.Dropdown(choices=[None, 'config1', 'config2'], value=None, label='AutoGuidance config') + guidance_auto_args = [guidance_auto_dropout, guidance_auto_layers, guidance_auto_config] + + zero_group = gr.Accordion(open=True, label='Advanced guidance params', elem_classes=["small-accordion"], visible=False) + with zero_group: + guidance_zero_init_steps = gr.Slider(minimum=0, maximum=10, step=1, label='ZeroStar init steps', value=1) + guidance_zero_args = [guidance_zero_init_steps] + + pag_group = gr.Accordion(open=True, label='Advanced guidance params', elem_classes=["small-accordion"], visible=False) + with pag_group: + guidance_pag_scale = gr.Slider(minimum=0.0, maximum=30.0, step=0.05, label='PAG scale', value=2.8) + guidance_pag_start = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label='PAG start', value=0.01) + guidance_pag_stop = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label='PAG stop', value=0.2) + guidance_pag_layers = gr.Textbox(label='PAG layers', value='7, 8, 9', placeholder='Comma-separated layer indices, e.g. 7,8,9') + guidance_pag_config = gr.Dropdown(choices=[None, 'config1', 'config2'], value=None, label='PAG config') + guidance_pag_args = [guidance_pag_scale, guidance_pag_start, guidance_pag_stop, guidance_pag_layers, guidance_pag_config] + + apg_group = gr.Accordion(open=True, label='Advanced guidance params', elem_classes=["small-accordion"], visible=False) + with apg_group: + guidance_apg_momentum = gr.Slider(minimum=-1.0, maximum=1.0, step=0.05, label='APG momentum', value=-1.0) + guidance_apg_rescale = gr.Slider(minimum=0.0, maximum=30.0, step=0.1, label='APG rescale', value=15.0) + guidance_apg_args = [guidance_apg_momentum, guidance_apg_rescale] + + slg_group = gr.Accordion(open=True, label='Advanced guidance params', elem_classes=["small-accordion"], visible=False) + with slg_group: + guidance_slg_scale = gr.Slider(minimum=0.0, maximum=30.0, step=0.1, label='SLG scale', value=2.8) + guidance_slg_start = gr.Slider(minimum=0.0, maximum=1.0, step=0.1, label='SLG start', value=0.01) + guidance_slg_stop = gr.Slider(minimum=0.0, maximum=1.0, step=0.1, label='SLG stop', value=0.2) + guidance_slg_layers = gr.Textbox(label='SLG layers', value='7, 8, 9', placeholder='Comma-separated layer indices, e.g. 7,8,9') + guidance_slg_config = gr.Dropdown(choices=[None, 'config1', 'config2'], value=None, label='SLG config') + guidance_slg_args = [guidance_slg_scale, guidance_slg_start, guidance_slg_stop, guidance_slg_layers, guidance_slg_config] + + seg_group = gr.Accordion(open=True, label='Advanced guidance params', elem_classes=["small-accordion"], visible=False) + with seg_group: + guidance_seg_scale = gr.Slider(minimum=0.0, maximum=30.0, step=0.1, label='SEG scale', value=3.0) + guidance_seg_blur_sigma = gr.Number(label='SEG blur sigma', value=9999999.0) + guidance_seg_blur_threshold_inf = gr.Number(label='SEG blur threshold inf', value=9999.0) + guidance_seg_start = gr.Slider(minimum=0.0, maximum=1.0, step=0.1, label='SEG start', value=0.0) + guidance_seg_stop = gr.Slider(minimum=0.0, maximum=1.0, step=0.1, label='SEG stop', value=1.0) + guidance_seg_layers = gr.Textbox(label='SEG layers', value='7, 8, 9', placeholder='Comma-separated layer indices, e.g. 7,8,9') + guidance_seg_config = gr.Dropdown(choices=[None, 'config1', 'config2'], value=None, label='SEG config') + guidance_seg_args = [guidance_seg_scale, guidance_seg_blur_sigma, guidance_seg_blur_threshold_inf, guidance_seg_start, guidance_seg_stop, guidance_seg_layers, guidance_seg_config] + + tcfg_group = gr.Accordion(open=True, label='Advanced guidance params', elem_classes=["small-accordion"], visible=False) + with tcfg_group: + pass + + fdg_group = gr.Accordion(open=True, label='Advanced guidance params', elem_classes=["small-accordion"], visible=False) + with fdg_group: + guidance_fdg_scales = gr.Textbox(label='FDG scales', value='10.0, 5.0', placeholder='Comma-separated scales, e.g. 10.0,5.0') + guidance_fdg_weights = gr.Textbox(label='FDG weights', value='1.0', placeholder='Single float or comma-separated weights, e.g. 1.0 or 1.0,0.5') + guidance_fdg_rescale_space = gr.Dropdown(choices=['data', 'freq'], value='data', label='FDG rescale space') + guidance_fdg_args = [guidance_fdg_scales, guidance_fdg_weights, guidance_fdg_rescale_space] + + def adv_visibility(guidance_name): + return [ + gr.update(visible=guidance_name.startswith('Auto')), + gr.update(visible=guidance_name.startswith('Zero')), + gr.update(visible=guidance_name.startswith('PAG')), + gr.update(visible=guidance_name.startswith('APG')), + gr.update(visible=guidance_name.startswith('SLG')), + gr.update(visible=guidance_name.startswith('SEG')), + gr.update(visible=guidance_name.startswith('TCFG')), + gr.update(visible=guidance_name.startswith('FDG')), + ] + guidance_name.change(fn=adv_visibility, inputs=[guidance_name], outputs=[auto_group, zero_group, pag_group, apg_group, slg_group, seg_group, tcfg_group, fdg_group]) + + gr.HTML(value='

Fallback guidance

', visible=shared.opts.model_modular_enable, elem_id=f"{tab}_guidance_note") + with gr.Row(elem_id=f"{tab}_cfg_row", elem_classes=['flexbox']): + cfg_scale = gr.Slider(minimum=0.0, maximum=30.0, step=0.1, label='Guidance scale', value=6.0, elem_id=f"{tab}_cfg_scale") + cfg_end = gr.Slider(minimum=0.0, maximum=1.0, step=0.1, label='Guidance end', value=1.0, elem_id=f"{tab}_cfg_end") + with gr.Row(): + image_cfg_scale = gr.Slider(minimum=0.0, maximum=30.0, step=0.1, label='Refine guidance', value=6.0, elem_id=f"{tab}_image_cfg_scale") + diffusers_guidance_rescale = gr.Slider(minimum=0.0, maximum=1.0, step=0.05, label='Rescale guidance', value=0.0, elem_id=f"{tab}_image_cfg_rescale") + with gr.Row(): + diffusers_pag_scale = gr.Slider(minimum=0.0, maximum=30.0, step=0.05, label='Attention guidance', value=0.0, elem_id=f"{tab}_pag_scale") + diffusers_pag_adaptive = gr.Slider(minimum=0.0, maximum=1.0, step=0.05, label='Adaptive scaling', value=0.5, elem_id=f"{tab}_pag_adaptive") + + _modular_args = guidance_args + lsc_args + guidance_auto_args + guidance_zero_args + guidance_pag_args + guidance_apg_args + guidance_slg_args + guidance_seg_args + guidance_fdg_args + standard_args = [cfg_scale, image_cfg_scale, diffusers_guidance_rescale, diffusers_pag_scale, diffusers_pag_adaptive, cfg_end] + return guidance_args + standard_args diff --git a/modules/ui_img2img.py b/modules/ui_img2img.py index 7e7db44cb..cf1324484 100644 --- a/modules/ui_img2img.py +++ b/modules/ui_img2img.py @@ -1,11 +1,11 @@ -import os -from PIL import Image import gradio as gr -from modules.call_queue import wrap_gradio_gpu_call, wrap_queued_call -from modules import timer, shared, ui_common, ui_sections, generation_parameters_copypaste, processing_vae +from modules import timer, shared, call_queue, generation_parameters_copypaste, processing_vae +from modules import ui_common, ui_sections, ui_guidance def process_interrogate(mode, ii_input_files, ii_input_dir, ii_output_dir, *ii_singles): + import os + from PIL import Image from modules.interrogate.interrogate import interrogate mode = int(mode) if mode in {0, 1, 3, 4}: @@ -132,7 +132,7 @@ def create_ui(): denoising_strength = gr.Slider(minimum=0.00, maximum=0.99, step=0.01, label='Denoising strength', value=0.30, elem_id="img2img_denoising_strength") refiner_start = gr.Slider(minimum=0.0, maximum=1.0, step=0.05, label='Denoise start', value=0.0, elem_id="img2img_refiner_start") - cfg_scale, image_cfg_scale, diffusers_guidance_rescale, pag_scale, pag_adaptive, cfg_end = ui_sections.create_guidance_inputs('img2img') + guidance_name, guidance_scale, guidance_rescale, guidance_start, guidance_stop, cfg_scale, image_cfg_scale, diffusers_guidance_rescale, pag_scale, pag_adaptive, cfg_end = ui_guidance.create_guidance_inputs('img2img') vae_type, tiling, hidiffusion, clip_skip = ui_sections.create_advanced_inputs('img2img') hdr_mode, hdr_brightness, hdr_color, hdr_sharpen, hdr_clamp, hdr_boundary, hdr_threshold, hdr_maximize, hdr_max_center, hdr_max_boundary, hdr_color_picker, hdr_tint_ratio = ui_sections.create_correction_inputs('img2img') enable_hr, hr_sampler_index, hr_denoising_strength, hr_resize_mode, hr_resize_context, hr_upscaler, hr_force, hr_second_pass_steps, hr_scale, hr_resize_x, hr_resize_y, refiner_steps, hr_refiner_start, refiner_prompt, refiner_negative = ui_sections.create_hires_inputs('img2img') @@ -177,8 +177,8 @@ def create_ui(): vae_type, tiling, hidiffusion, detailer_enabled, detailer_prompt, detailer_negative, detailer_steps, detailer_strength, detailer_resolution, batch_count, batch_size, - cfg_scale, image_cfg_scale, - diffusers_guidance_rescale, pag_scale, pag_adaptive, cfg_end, + guidance_name, guidance_scale, guidance_rescale, guidance_start, guidance_stop, + cfg_scale, image_cfg_scale, diffusers_guidance_rescale, pag_scale, pag_adaptive, cfg_end, refiner_start, clip_skip, denoising_strength, @@ -194,7 +194,7 @@ def create_ui(): override_settings, ] img2img_dict = dict( - fn=wrap_gradio_gpu_call(modules.img2img.img2img, extra_outputs=[None, '', ''], name='Image'), + fn=call_queue.wrap_gradio_gpu_call(modules.img2img.img2img, extra_outputs=[None, '', ''], name='Image'), _js="submit_img2img", inputs= img2img_args + img2img_script_inputs, outputs=[ @@ -229,8 +229,8 @@ def create_ui(): ) interrogate_btn.click(fn=lambda *args: process_interrogate(*args), **interrogate_args) - img2img_token_button.click(fn=wrap_queued_call(ui_common.update_token_counter), inputs=[img2img_prompt], outputs=[img2img_token_counter], show_progress = False) - img2img_negative_token_button.click(fn=wrap_queued_call(ui_common.update_token_counter), inputs=[img2img_negative_prompt], outputs=[img2img_negative_token_counter], show_progress = False) + img2img_token_button.click(fn=call_queue.wrap_queued_call(ui_common.update_token_counter), inputs=[img2img_prompt], outputs=[img2img_token_counter], show_progress = False) + img2img_negative_token_button.click(fn=call_queue.wrap_queued_call(ui_common.update_token_counter), inputs=[img2img_negative_prompt], outputs=[img2img_negative_token_counter], show_progress = False) ui_extra_networks.setup_ui(extra_networks_ui_img2img, img2img_gallery) img2img_paste_fields = [ @@ -254,8 +254,13 @@ def create_ui(): (seed, "Seed"), (subseed, "Variation seed"), (subseed_strength, "Variation strength"), + # guidance + (guidance_name, "Guidance"), + (guidance_scale, "Guidance scale"), + (guidance_rescale, "Guidance rescale"), + (guidance_start, "Guidance start"), + (guidance_stop, "Guidance stop"), # advanced - (cfg_scale, "Guidance scale"), (cfg_scale, "CFG scale"), (cfg_end, "CFG end"), (image_cfg_scale, "Image CFG scale"), diff --git a/modules/ui_sections.py b/modules/ui_sections.py index 829c502bd..683681cd5 100644 --- a/modules/ui_sections.py +++ b/modules/ui_sections.py @@ -146,21 +146,6 @@ def create_video_inputs(tab:str, show_always:bool=False): return video_type, video_duration, video_loop, video_pad, video_interpolate -def create_guidance_inputs(tab): - with gr.Accordion(open=False, label="Guidance", elem_id=f"{tab}_guidance", elem_classes=["small-accordion"]): - with gr.Group(): - with gr.Row(elem_id=f"{tab}_cfg_row", elem_classes=['flexbox']): - cfg_scale = gr.Slider(minimum=0.0, maximum=30.0, step=0.1, label='Guidance scale', value=6.0, elem_id=f"{tab}_cfg_scale") - cfg_end = gr.Slider(minimum=0.0, maximum=1.0, step=0.1, label='Guidance end', value=1.0, elem_id=f"{tab}_cfg_end") - with gr.Row(): - image_cfg_scale = gr.Slider(minimum=0.0, maximum=30.0, step=0.1, label='Refine guidance', value=6.0, elem_id=f"{tab}_image_cfg_scale") - diffusers_guidance_rescale = gr.Slider(minimum=0.0, maximum=1.0, step=0.05, label='Rescale guidance', value=0.0, elem_id=f"{tab}_image_cfg_rescale") - with gr.Row(): - diffusers_pag_scale = gr.Slider(minimum=0.0, maximum=30.0, step=0.05, label='Attention guidance', value=0.0, elem_id=f"{tab}_pag_scale") - diffusers_pag_adaptive = gr.Slider(minimum=0.0, maximum=1.0, step=0.05, label='Adaptive scaling', value=0.5, elem_id=f"{tab}_pag_adaptive") - return cfg_scale, image_cfg_scale, diffusers_guidance_rescale, diffusers_pag_scale, diffusers_pag_adaptive, cfg_end - - def create_advanced_inputs(tab): with gr.Accordion(open=False, label="Advanced", elem_id=f"{tab}_advanced", elem_classes=["small-accordion"]): with gr.Group(): diff --git a/modules/ui_txt2img.py b/modules/ui_txt2img.py index 0fae0c606..6eccbdd82 100644 --- a/modules/ui_txt2img.py +++ b/modules/ui_txt2img.py @@ -1,7 +1,6 @@ import gradio as gr -from modules.call_queue import wrap_gradio_gpu_call, wrap_queued_call -from modules import timer, shared, ui_common, ui_sections, generation_parameters_copypaste, processing_vae, images -from modules.ui_components import ToolButton # pylint: disable=unused-import +from modules import timer, shared, call_queue, generation_parameters_copypaste, processing_vae, images +from modules import ui_common, ui_sections, ui_guidance def create_ui(): @@ -33,7 +32,7 @@ def create_ui(): with gr.Accordion(open=False, label="Samplers", elem_classes=["small-accordion"], elem_id="txt2img_sampler_group"): ui_sections.create_sampler_options('txt2img') seed, reuse_seed, subseed, reuse_subseed, subseed_strength, seed_resize_from_h, seed_resize_from_w = ui_sections.create_seed_inputs('txt2img') - cfg_scale, image_cfg_scale, diffusers_guidance_rescale, pag_scale, pag_adaptive, cfg_end = ui_sections.create_guidance_inputs('txt2img') + guidance_name, guidance_scale, guidance_rescale, guidance_start, guidance_stop, cfg_scale, image_cfg_scale, diffusers_guidance_rescale, pag_scale, pag_adaptive, cfg_end = ui_guidance.create_guidance_inputs('txt2img') vae_type, tiling, hidiffusion, clip_skip = ui_sections.create_advanced_inputs('txt2img') hdr_mode, hdr_brightness, hdr_color, hdr_sharpen, hdr_clamp, hdr_boundary, hdr_threshold, hdr_maximize, hdr_max_center, hdr_max_boundary, hdr_color_picker, hdr_tint_ratio = ui_sections.create_correction_inputs('txt2img') enable_hr, hr_sampler_index, denoising_strength, hr_resize_mode, hr_resize_context, hr_upscaler, hr_force, hr_second_pass_steps, hr_scale, hr_resize_x, hr_resize_y, refiner_steps, refiner_start, refiner_prompt, refiner_negative = ui_sections.create_hires_inputs('txt2img') @@ -57,6 +56,7 @@ def create_ui(): vae_type, tiling, hidiffusion, detailer_enabled, detailer_prompt, detailer_negative, detailer_steps, detailer_strength, detailer_resolution, batch_count, batch_size, + guidance_name, guidance_scale, guidance_rescale, guidance_start, guidance_stop, cfg_scale, image_cfg_scale, diffusers_guidance_rescale, pag_scale, pag_adaptive, cfg_end, clip_skip, seed, subseed, subseed_strength, seed_resize_from_h, seed_resize_from_w, @@ -68,7 +68,7 @@ def create_ui(): override_settings, ] txt2img_dict = dict( - fn=wrap_gradio_gpu_call(modules.txt2img.txt2img, extra_outputs=[None, '', ''], name='Text'), + fn=call_queue.wrap_gradio_gpu_call(modules.txt2img.txt2img, extra_outputs=[None, '', ''], name='Text'), _js="submit_txt2img", inputs=txt2img_args + txt2img_script_inputs, outputs=[ @@ -106,8 +106,13 @@ def create_ui(): (seed, "Seed"), (subseed, "Variation seed"), (subseed_strength, "Variation strength"), + # guidance + (guidance_name, "Guidance"), + (guidance_scale, "Guidance scale"), + (guidance_rescale, "Guidance rescale"), + (guidance_start, "Guidance start"), + (guidance_stop, "Guidance stop"), # advanced - (cfg_scale, "Guidance scale"), (cfg_scale, "CFG scale"), (cfg_end, "CFG end"), (clip_skip, "Clip skip"), @@ -156,7 +161,7 @@ def create_ui(): txt2img_bindings = generation_parameters_copypaste.ParamBinding(paste_button=txt2img_paste, tabname="txt2img", source_text_component=txt2img_prompt, source_image_component=None) generation_parameters_copypaste.register_paste_params_button(txt2img_bindings) - txt2img_token_button.click(fn=wrap_queued_call(ui_common.update_token_counter), inputs=[txt2img_prompt], outputs=[txt2img_token_counter], show_progress = False) - txt2img_negative_token_button.click(fn=wrap_queued_call(ui_common.update_token_counter), inputs=[txt2img_negative_prompt], outputs=[txt2img_negative_token_counter], show_progress = False) + txt2img_token_button.click(fn=call_queue.wrap_queued_call(ui_common.update_token_counter), inputs=[txt2img_prompt], outputs=[txt2img_token_counter], show_progress = False) + txt2img_negative_token_button.click(fn=call_queue.wrap_queued_call(ui_common.update_token_counter), inputs=[txt2img_negative_prompt], outputs=[txt2img_negative_token_counter], show_progress = False) ui_extra_networks.setup_ui(extra_networks_ui, txt2img_gallery) diff --git a/scripts/xyz/xyz_grid_classes.py b/scripts/xyz/xyz_grid_classes.py index 306b591f3..0836adbbf 100644 --- a/scripts/xyz/xyz_grid_classes.py +++ b/scripts/xyz/xyz_grid_classes.py @@ -20,6 +20,7 @@ from scripts.xyz.xyz_grid_shared import ( # pylint: disable=no-name-in-module, u apply_lora, apply_lora_strength, apply_te, + apply_guidance, apply_styles, apply_upscaler, apply_context, @@ -223,6 +224,7 @@ axis_options = [ AxisOption("[Sampler] Shift", float, apply_setting("schedulers_shift")), AxisOption("[Sampler] eta delta", float, apply_setting("eta_noise_seed_delta")), AxisOption("[Sampler] eta multiplier", float, apply_setting("scheduler_eta")), + AxisOption("[Guidance] Guidance name", str, apply_guidance, choices=lambda: ['Default', 'CFG', 'Auto', 'Zero', 'PAG', 'APG', 'SLG', 'SEG', 'TCFG', 'FDG']), AxisOption("[Refine] Upscaler", str, apply_field("hr_upscaler"), cost=0.3, choices=lambda: [x.name for x in shared.sd_upscalers]), AxisOption("[Refine] Sampler", str, apply_hr_sampler_name, fmt=format_value_add_label, confirm=confirm_samplers, choices=lambda: [x.name for x in sd_samplers.samplers]), AxisOption("[Refine] Denoising strength", float, apply_field("denoising_strength")), diff --git a/scripts/xyz/xyz_grid_shared.py b/scripts/xyz/xyz_grid_shared.py index 0c95d14de..76d4df769 100644 --- a/scripts/xyz/xyz_grid_shared.py +++ b/scripts/xyz/xyz_grid_shared.py @@ -254,6 +254,13 @@ def apply_te(p, x, xs): shared.log.debug(f'XYZ grid apply text-encoder: "{x}"') +def apply_guidance(p, x, xs): + from modules.modular_guiders import guiders + guiders = list(guiders.keys()) + p.guidance_name = [g for g in guiders if g.lower().startswith(x.lower())][0] + shared.log.debug(f'XYZ grid apply guidance: "{p.guidance_name}"') + + def apply_styles(p: processing.StableDiffusionProcessingTxt2Img, x: str, _): p.styles.extend(x.split(',')) shared.log.debug(f'XYZ grid apply style: "{x}"') diff --git a/scripts/xyz_grid_on.py b/scripts/xyz_grid_on.py index f79be6636..595536067 100644 --- a/scripts/xyz_grid_on.py +++ b/scripts/xyz_grid_on.py @@ -324,6 +324,7 @@ class Script(scripts_manager.Script): def cell(x, y, z, ix, iy, iz): if shared.state.interrupted: + shared.log.warning('XYZ grid: Interrupted') return processing.Processed(p, [], p.seed, ""), 0 p.xyz = True pc = copy(p)